8 files changed, 4010 insertions, 1476 deletions
diff --git a/vendor/regex-automata/src/nfa/compiler.rs b/vendor/regex-automata/src/nfa/compiler.rs
deleted file mode 100644
index d9b3945b3..000000000
--- a/vendor/regex-automata/src/nfa/compiler.rs
+++ /dev/null
@@ -1,1193 +0,0 @@
-// This module provides an NFA compiler using Thompson's construction
-// algorithm. The compiler takes a regex-syntax::Hir as input and emits an NFA
-// graph as output. The NFA graph is structured in a way that permits it to be
-// executed by a virtual machine and also used to efficiently build a DFA.
-//
-// The compiler deals with a slightly expanded set of NFA states that notably
-// includes an empty node that has exactly one epsilon transition to the next
-// state. In other words, it's a "goto" instruction if one views Thompson's NFA
-// as a set of bytecode instructions. These goto instructions are removed in
-// a subsequent phase before returning the NFA to the caller. The purpose of
-// these empty nodes is that they make the construction algorithm substantially
-// simpler to implement. We remove them before returning to the caller because
-// they can represent substantial overhead when traversing the NFA graph
-// (either while searching using the NFA directly or while building a DFA).
-//
-// In the future, it would be nice to provide a Glushkov compiler as well,
-// as it would work well as a bit-parallel NFA for smaller regexes. But
-// the Thompson construction is one I'm more familiar with and seems more
-// straight-forward to deal with when it comes to large Unicode character
-// classes.
-//
-// Internally, the compiler uses interior mutability to improve composition
-// in the face of the borrow checker. In particular, we'd really like to be
-// able to write things like this:
-//
-//     self.c_concat(exprs.iter().map(|e| self.c(e)))
-//
-// Which elegantly uses iterators to build up a sequence of compiled regex
-// sub-expressions and then hands it off to the concatenating compiler
-// routine. Without interior mutability, the borrow checker won't let us
-// borrow `self` mutably both inside and outside the closure at the same
-// time.
-
-use std::cell::RefCell;
-use std::mem;
-
-use regex_syntax::hir::{self, Hir, HirKind};
-use regex_syntax::utf8::{Utf8Range, Utf8Sequences};
-
-use classes::ByteClassSet;
-use error::{Error, Result};
-use nfa::map::{Utf8BoundedMap, Utf8SuffixKey, Utf8SuffixMap};
-use nfa::range_trie::RangeTrie;
-use nfa::{State, StateID, Transition, NFA};
-
-/// Config knobs for the NFA compiler. See the builder's methods for more
-/// docs on each one.
-#[derive(Clone, Copy, Debug)]
-struct Config {
-    anchored: bool,
-    allow_invalid_utf8: bool,
-    reverse: bool,
-    shrink: bool,
-}
-
-impl Default for Config {
-    fn default() -> Config {
-        Config {
-            anchored: false,
-            allow_invalid_utf8: false,
-            reverse: false,
-            shrink: true,
-        }
-    }
-}
-
-/// A builder for compiling an NFA.
-#[derive(Clone, Debug)]
-pub struct Builder {
-    config: Config,
-}
-
-impl Builder {
-    /// Create a new NFA builder with its default configuration.
-    pub fn new() -> Builder {
-        Builder { config: Config::default() }
-    }
-
-    /// Compile the given high level intermediate representation of a regular
-    /// expression into an NFA.
-    ///
-    /// If there was a problem building the NFA, then an error is returned.
-    /// For example, if the regex uses unsupported features (such as zero-width
-    /// assertions), then an error is returned.
-    pub fn build(&self, expr: &Hir) -> Result<NFA> {
-        let mut nfa = NFA::always_match();
-        self.build_with(&mut Compiler::new(), &mut nfa, expr)?;
-        Ok(nfa)
-    }
-
-    /// Compile the given high level intermediate representation of a regular
-    /// expression into the NFA given using the given compiler. Callers may
-    /// prefer this over `build` if they would like to reuse allocations while
-    /// compiling many regular expressions.
-    ///
-    /// On success, the given NFA is completely overwritten with the NFA
-    /// produced by the compiler.
-    ///
-    /// If there was a problem building the NFA, then an error is returned. For
-    /// example, if the regex uses unsupported features (such as zero-width
-    /// assertions), then an error is returned. When an error is returned,
-    /// the contents of `nfa` are unspecified and should not be relied upon.
-    /// However, it can still be reused in subsequent calls to this method.
-    pub fn build_with(
-        &self,
-        compiler: &mut Compiler,
-        nfa: &mut NFA,
-        expr: &Hir,
-    ) -> Result<()> {
-        compiler.clear();
-        compiler.configure(self.config);
-        compiler.compile(nfa, expr)
-    }
-
-    /// Set whether matching must be anchored at the beginning of the input.
-    ///
-    /// When enabled, a match must begin at the start of the input. When
-    /// disabled, the NFA will act as if the pattern started with a `.*?`,
-    /// which enables a match to appear anywhere.
-    ///
-    /// By default this is disabled.
-    pub fn anchored(&mut self, yes: bool) -> &mut Builder {
-        self.config.anchored = yes;
-        self
-    }
-
-    /// When enabled, the builder will permit the construction of an NFA that
-    /// may match invalid UTF-8.
-    ///
-    /// When disabled (the default), the builder is guaranteed to produce a
-    /// regex that will only ever match valid UTF-8 (otherwise, the builder
-    /// will return an error).
-    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut Builder {
-        self.config.allow_invalid_utf8 = yes;
-        self
-    }
-
-    /// Reverse the NFA.
-    ///
-    /// A NFA reversal is performed by reversing all of the concatenated
-    /// sub-expressions in the original pattern, recursively. The resulting
-    /// NFA can be used to match the pattern starting from the end of a string
-    /// instead of the beginning of a string.
-    ///
-    /// Reversing the NFA is useful for building a reverse DFA, which is most
-    /// useful for finding the start of a match.
-    pub fn reverse(&mut self, yes: bool) -> &mut Builder {
-        self.config.reverse = yes;
-        self
-    }
-
-    /// Apply best effort heuristics to shrink the NFA at the expense of more
-    /// time/memory.
-    ///
-    /// This is enabled by default. Generally speaking, if one is using an NFA
-    /// to compile DFA, then the extra time used to shrink the NFA will be
-    /// more than made up for during DFA construction (potentially by a lot).
-    /// In other words, enabling this can substantially decrease the overall
-    /// amount of time it takes to build a DFA.
-    ///
-    /// The only reason to disable this if you want to compile an NFA and start
-    /// using it as quickly as possible without needing to build a DFA.
-    pub fn shrink(&mut self, yes: bool) -> &mut Builder {
-        self.config.shrink = yes;
-        self
-    }
-}
-
-/// A compiler that converts a regex abstract syntax to an NFA via Thompson's
-/// construction. Namely, this compiler permits epsilon transitions between
-/// states.
-///
-/// Users of this crate cannot use a compiler directly. Instead, all one can
-/// do is create one and use it via the
-/// [`Builder::build_with`](struct.Builder.html#method.build_with)
-/// method. This permits callers to reuse compilers in order to amortize
-/// allocations.
-#[derive(Clone, Debug)]
-pub struct Compiler {
-    /// The set of compiled NFA states. Once a state is compiled, it is
-    /// assigned a state ID equivalent to its index in this list. Subsequent
-    /// compilation can modify previous states by adding new transitions.
-    states: RefCell<Vec<CState>>,
-    /// The configuration from the builder.
-    config: Config,
-    /// State used for compiling character classes to UTF-8 byte automata.
-    /// State is not retained between character class compilations. This just
-    /// serves to amortize allocation to the extent possible.
-    utf8_state: RefCell<Utf8State>,
-    /// State used for arranging character classes in reverse into a trie.
-    trie_state: RefCell<RangeTrie>,
-    /// State used for caching common suffixes when compiling reverse UTF-8
-    /// automata (for Unicode character classes).
-    utf8_suffix: RefCell<Utf8SuffixMap>,
-    /// A map used to re-map state IDs when translating the compiler's internal
-    /// NFA state representation to the external NFA representation.
-    remap: RefCell<Vec<StateID>>,
-    /// A set of compiler internal state IDs that correspond to states that are
-    /// exclusively epsilon transitions, i.e., goto instructions, combined with
-    /// the state that they point to. This is used to record said states while
-    /// transforming the compiler's internal NFA representation to the external
-    /// form.
-    empties: RefCell<Vec<(StateID, StateID)>>,
-}
-
-/// A compiler intermediate state representation for an NFA that is only used
-/// during compilation. Once compilation is done, `CState`s are converted to
-/// `State`s, which have a much simpler representation.
-#[derive(Clone, Debug, Eq, PartialEq)]
-enum CState {
-    /// An empty state whose only purpose is to forward the automaton to
-    /// another state via en epsilon transition. These are useful during
-    /// compilation but are otherwise removed at the end.
-    Empty { next: StateID },
-    /// A state that only transitions to `next` if the current input byte is
-    /// in the range `[start, end]` (inclusive on both ends).
-    Range { range: Transition },
-    /// A state with possibly many transitions, represented in a sparse
-    /// fashion. Transitions are ordered lexicographically by input range.
-    /// As such, this may only be used when every transition has equal
-    /// priority. (In practice, this is only used for encoding large UTF-8
-    /// automata.)
-    Sparse { ranges: Vec<Transition> },
-    /// An alternation such that there exists an epsilon transition to all
-    /// states in `alternates`, where matches found via earlier transitions
-    /// are preferred over later transitions.
-    Union { alternates: Vec<StateID> },
-    /// An alternation such that there exists an epsilon transition to all
-    /// states in `alternates`, where matches found via later transitions
-    /// are preferred over earlier transitions.
-    ///
-    /// This "reverse" state exists for convenience during compilation that
-    /// permits easy construction of non-greedy combinations of NFA states.
-    /// At the end of compilation, Union and UnionReverse states are merged
-    /// into one Union type of state, where the latter has its epsilon
-    /// transitions reversed to reflect the priority inversion.
-    UnionReverse { alternates: Vec<StateID> },
-    /// A match state. There is exactly one such occurrence of this state in
-    /// an NFA.
-    Match,
-}
-
-/// A value that represents the result of compiling a sub-expression of a
-/// regex's HIR. Specifically, this represents a sub-graph of the NFA that
-/// has an initial state at `start` and a final state at `end`.
-#[derive(Clone, Copy, Debug)]
-pub struct ThompsonRef {
-    start: StateID,
-    end: StateID,
-}
-
-impl Compiler {
-    /// Create a new compiler.
-    pub fn new() -> Compiler {
-        Compiler {
-            states: RefCell::new(vec![]),
-            config: Config::default(),
-            utf8_state: RefCell::new(Utf8State::new()),
-            trie_state: RefCell::new(RangeTrie::new()),
-            utf8_suffix: RefCell::new(Utf8SuffixMap::new(1000)),
-            remap: RefCell::new(vec![]),
-            empties: RefCell::new(vec![]),
-        }
-    }
-
-    /// Clear any memory used by this compiler such that it is ready to compile
-    /// a new regex.
-    ///
-    /// It is preferrable to reuse a compiler if possible in order to reuse
-    /// allocations.
-    fn clear(&self) {
-        self.states.borrow_mut().clear();
-        // We don't need to clear anything else since they are cleared on
-        // their own and only when they are used.
-    }
-
-    /// Configure this compiler from the builder's knobs.
-    ///
-    /// The compiler is always reconfigured by the builder before using it to
-    /// build an NFA.
-    fn configure(&mut self, config: Config) {
-        self.config = config;
-    }
-
-    /// Convert the current intermediate NFA to its final compiled form.
-    fn compile(&self, nfa: &mut NFA, expr: &Hir) -> Result<()> {
-        nfa.anchored = self.config.anchored;
-
-        let mut start = self.add_empty();
-        if !nfa.anchored {
-            let compiled = if self.config.allow_invalid_utf8 {
-                self.c_unanchored_prefix_invalid_utf8()?
-            } else {
-                self.c_unanchored_prefix_valid_utf8()?
-            };
-            self.patch(start, compiled.start);
-            start = compiled.end;
-        }
-        let compiled = self.c(&expr)?;
-        let match_id = self.add_match();
-        self.patch(start, compiled.start);
-        self.patch(compiled.end, match_id);
-        self.finish(nfa);
-        Ok(())
-    }
-
-    /// Finishes the compilation process and populates the provide NFA with
-    /// the final graph.
-    fn finish(&self, nfa: &mut NFA) {
-        let mut bstates = self.states.borrow_mut();
-        let mut remap = self.remap.borrow_mut();
-        remap.resize(bstates.len(), 0);
-        let mut empties = self.empties.borrow_mut();
-        empties.clear();
-
-        // We don't reuse allocations here becuase this is what we're
-        // returning.
-        nfa.states.clear();
-        let mut byteset = ByteClassSet::new();
-
-        // The idea here is to convert our intermediate states to their final
-        // form. The only real complexity here is the process of converting
-        // transitions, which are expressed in terms of state IDs. The new
-        // set of states will be smaller because of partial epsilon removal,
-        // so the state IDs will not be the same.
-        for (id, bstate) in bstates.iter_mut().enumerate() {
-            match *bstate {
-                CState::Empty { next } => {
-                    // Since we're removing empty states, we need to handle
-                    // them later since we don't yet know which new state this
-                    // empty state will be mapped to.
-                    empties.push((id, next));
-                }
-                CState::Range { ref range } => {
-                    remap[id] = nfa.states.len();
-                    byteset.set_range(range.start, range.end);
-                    nfa.states.push(State::Range { range: range.clone() });
-                }
-                CState::Sparse { ref mut ranges } => {
-                    remap[id] = nfa.states.len();
-
-                    let ranges = mem::replace(ranges, vec![]);
-                    for r in &ranges {
-                        byteset.set_range(r.start, r.end);
-                    }
-                    nfa.states.push(State::Sparse {
-                        ranges: ranges.into_boxed_slice(),
-                    });
-                }
-                CState::Union { ref mut alternates } => {
-                    remap[id] = nfa.states.len();
-
-                    let alternates = mem::replace(alternates, vec![]);
-                    nfa.states.push(State::Union {
-                        alternates: alternates.into_boxed_slice(),
-                    });
-                }
-                CState::UnionReverse { ref mut alternates } => {
-                    remap[id] = nfa.states.len();
-
-                    let mut alternates = mem::replace(alternates, vec![]);
-                    alternates.reverse();
-                    nfa.states.push(State::Union {
-                        alternates: alternates.into_boxed_slice(),
-                    });
-                }
-                CState::Match => {
-                    remap[id] = nfa.states.len();
-                    nfa.states.push(State::Match);
-                }
-            }
-        }
-        for &(empty_id, mut empty_next) in empties.iter() {
-            // empty states can point to other empty states, forming a chain.
-            // So we must follow the chain until the end, which must end at
-            // a non-empty state, and therefore, a state that is correctly
-            // remapped. We are guaranteed to terminate because our compiler
-            // never builds a loop among empty states.
-            while let CState::Empty { next } = bstates[empty_next] {
-                empty_next = next;
-            }
-            remap[empty_id] = remap[empty_next];
-        }
-        for state in &mut nfa.states {
-            state.remap(&remap);
-        }
-        // The compiler always begins the NFA at the first state.
-        nfa.start = remap[0];
-        nfa.byte_classes = byteset.byte_classes();
-    }
-
-    fn c(&self, expr: &Hir) -> Result<ThompsonRef> {
-        match *expr.kind() {
-            HirKind::Empty => {
-                let id = self.add_empty();
-                Ok(ThompsonRef { start: id, end: id })
-            }
-            HirKind::Literal(hir::Literal::Unicode(ch)) => {
-                let mut buf = [0; 4];
-                let it = ch
-                    .encode_utf8(&mut buf)
-                    .as_bytes()
-                    .iter()
-                    .map(|&b| Ok(self.c_range(b, b)));
-                self.c_concat(it)
-            }
-            HirKind::Literal(hir::Literal::Byte(b)) => Ok(self.c_range(b, b)),
-            HirKind::Class(hir::Class::Bytes(ref cls)) => {
-                self.c_byte_class(cls)
-            }
-            HirKind::Class(hir::Class::Unicode(ref cls)) => {
-                self.c_unicode_class(cls)
-            }
-            HirKind::Repetition(ref rep) => self.c_repetition(rep),
-            HirKind::Group(ref group) => self.c(&*group.hir),
-            HirKind::Concat(ref exprs) => {
-                self.c_concat(exprs.iter().map(|e| self.c(e)))
-            }
-            HirKind::Alternation(ref exprs) => {
-                self.c_alternation(exprs.iter().map(|e| self.c(e)))
-            }
-            HirKind::Anchor(_) => Err(Error::unsupported_anchor()),
-            HirKind::WordBoundary(_) => Err(Error::unsupported_word()),
-        }
-    }
-
-    fn c_concat<I>(&self, mut it: I) -> Result<ThompsonRef>
-    where
-        I: DoubleEndedIterator<Item = Result<ThompsonRef>>,
-    {
-        let first =
-            if self.config.reverse { it.next_back() } else { it.next() };
-        let ThompsonRef { start, mut end } = match first {
-            Some(result) => result?,
-            None => return Ok(self.c_empty()),
-        };
-        loop {
-            let next =
-                if self.config.reverse { it.next_back() } else { it.next() };
-            let compiled = match next {
-                Some(result) => result?,
-                None => break,
-            };
-            self.patch(end, compiled.start);
-            end = compiled.end;
-        }
-        Ok(ThompsonRef { start, end })
-    }
-
-    fn c_alternation<I>(&self, mut it: I) -> Result<ThompsonRef>
-    where
-        I: Iterator<Item = Result<ThompsonRef>>,
-    {
-        let first = it.next().expect("alternations must be non-empty")?;
-        let second = match it.next() {
-            None => return Ok(first),
-            Some(result) => result?,
-        };
-
-        let union = self.add_union();
-        let end = self.add_empty();
-        self.patch(union, first.start);
-        self.patch(first.end, end);
-        self.patch(union, second.start);
-        self.patch(second.end, end);
-        for result in it {
-            let compiled = result?;
-            self.patch(union, compiled.start);
-            self.patch(compiled.end, end);
-        }
-        Ok(ThompsonRef { start: union, end })
-    }
-
-    fn c_repetition(&self, rep: &hir::Repetition) -> Result<ThompsonRef> {
-        match rep.kind {
-            hir::RepetitionKind::ZeroOrOne => {
-                self.c_zero_or_one(&rep.hir, rep.greedy)
-            }
-            hir::RepetitionKind::ZeroOrMore => {
-                self.c_at_least(&rep.hir, rep.greedy, 0)
-            }
-            hir::RepetitionKind::OneOrMore => {
-                self.c_at_least(&rep.hir, rep.greedy, 1)
-            }
-            hir::RepetitionKind::Range(ref rng) => match *rng {
-                hir::RepetitionRange::Exactly(count) => {
-                    self.c_exactly(&rep.hir, count)
-                }
-                hir::RepetitionRange::AtLeast(m) => {
-                    self.c_at_least(&rep.hir, rep.greedy, m)
-                }
-                hir::RepetitionRange::Bounded(min, max) => {
-                    self.c_bounded(&rep.hir, rep.greedy, min, max)
-                }
-            },
-        }
-    }
-
-    fn c_bounded(
-        &self,
-        expr: &Hir,
-        greedy: bool,
-        min: u32,
-        max: u32,
-    ) -> Result<ThompsonRef> {
-        let prefix = self.c_exactly(expr, min)?;
-        if min == max {
-            return Ok(prefix);
-        }
-
-        // It is tempting here to compile the rest here as a concatenation
-        // of zero-or-one matches. i.e., for `a{2,5}`, compile it as if it
-        // were `aaa?a?a?`. The problem here is that it leads to this program:
-        //
-        //     >000000: 61 => 01
-        //     000001: 61 => 02
-        //     000002: alt(03, 04)
-        //     000003: 61 => 04
-        //     000004: alt(05, 06)
-        //     000005: 61 => 06
-        //     000006: alt(07, 08)
-        //     000007: 61 => 08
-        //     000008: MATCH
-        //
-        // And effectively, once you hit state 2, the epsilon closure will
-        // include states 3, 5, 5, 6, 7 and 8, which is quite a bit. It is
-        // better to instead compile it like so:
-        //
-        //     >000000: 61 => 01
-        //      000001: 61 => 02
-        //      000002: alt(03, 08)
-        //      000003: 61 => 04
-        //      000004: alt(05, 08)
-        //      000005: 61 => 06
-        //      000006: alt(07, 08)
-        //      000007: 61 => 08
-        //      000008: MATCH
-        //
-        // So that the epsilon closure of state 2 is now just 3 and 8.
-        let empty = self.add_empty();
-        let mut prev_end = prefix.end;
-        for _ in min..max {
-            let union = if greedy {
-                self.add_union()
-            } else {
-                self.add_reverse_union()
-            };
-            let compiled = self.c(expr)?;
-            self.patch(prev_end, union);
-            self.patch(union, compiled.start);
-            self.patch(union, empty);
-            prev_end = compiled.end;
-        }
-        self.patch(prev_end, empty);
-        Ok(ThompsonRef { start: prefix.start, end: empty })
-    }
-
-    fn c_at_least(
-        &self,
-        expr: &Hir,
-        greedy: bool,
-        n: u32,
-    ) -> Result<ThompsonRef> {
-        if n == 0 {
-            let union = if greedy {
-                self.add_union()
-            } else {
-                self.add_reverse_union()
-            };
-            let compiled = self.c(expr)?;
-            self.patch(union, compiled.start);
-            self.patch(compiled.end, union);
-            Ok(ThompsonRef { start: union, end: union })
-        } else if n == 1 {
-            let compiled = self.c(expr)?;
-            let union = if greedy {
-                self.add_union()
-            } else {
-                self.add_reverse_union()
-            };
-            self.patch(compiled.end, union);
-            self.patch(union, compiled.start);
-            Ok(ThompsonRef { start: compiled.start, end: union })
-        } else {
-            let prefix = self.c_exactly(expr, n - 1)?;
-            let last = self.c(expr)?;
-            let union = if greedy {
-                self.add_union()
-            } else {
-                self.add_reverse_union()
-            };
-            self.patch(prefix.end, last.start);
-            self.patch(last.end, union);
-            self.patch(union, last.start);
-            Ok(ThompsonRef { start: prefix.start, end: union })
-        }
-    }
-
-    fn c_zero_or_one(&self, expr: &Hir, greedy: bool) -> Result<ThompsonRef> {
-        let union =
-            if greedy { self.add_union() } else { self.add_reverse_union() };
-        let compiled = self.c(expr)?;
-        let empty = self.add_empty();
-        self.patch(union, compiled.start);
-        self.patch(union, empty);
-        self.patch(compiled.end, empty);
-        Ok(ThompsonRef { start: union, end: empty })
-    }
-
-    fn c_exactly(&self, expr: &Hir, n: u32) -> Result<ThompsonRef> {
-        let it = (0..n).map(|_| self.c(expr));
-        self.c_concat(it)
-    }
-
-    fn c_byte_class(&self, cls: &hir::ClassBytes) -> Result<ThompsonRef> {
-        let end = self.add_empty();
-        let mut trans = Vec::with_capacity(cls.ranges().len());
-        for r in cls.iter() {
-            trans.push(Transition {
-                start: r.start(),
-                end: r.end(),
-                next: end,
-            });
-        }
-        Ok(ThompsonRef { start: self.add_sparse(trans), end })
-    }
-
-    fn c_unicode_class(&self, cls: &hir::ClassUnicode) -> Result<ThompsonRef> {
-        // If all we have are ASCII ranges wrapped in a Unicode package, then
-        // there is zero reason to bring out the big guns. We can fit all ASCII
-        // ranges within a single sparse transition.
-        if cls.is_all_ascii() {
-            let end = self.add_empty();
-            let mut trans = Vec::with_capacity(cls.ranges().len());
-            for r in cls.iter() {
-                assert!(r.start() <= '\x7F');
-                assert!(r.end() <= '\x7F');
-                trans.push(Transition {
-                    start: r.start() as u8,
-                    end: r.end() as u8,
-                    next: end,
-                });
-            }
-            Ok(ThompsonRef { start: self.add_sparse(trans), end })
-        } else if self.config.reverse {
-            if !self.config.shrink {
-                // When we don't want to spend the extra time shrinking, we
-                // compile the UTF-8 automaton in reverse using something like
-                // the "naive" approach, but will attempt to re-use common
-                // suffixes.
-                self.c_unicode_class_reverse_with_suffix(cls)
-            } else {
-                // When we want to shrink our NFA for reverse UTF-8 automata,
-                // we cannot feed UTF-8 sequences directly to the UTF-8
-                // compiler, since the UTF-8 compiler requires all sequences
-                // to be lexicographically sorted. Instead, we organize our
-                // sequences into a range trie, which can then output our
-                // sequences in the correct order. Unfortunately, building the
-                // range trie is fairly expensive (but not nearly as expensive
-                // as building a DFA). Hence the reason why the 'shrink' option
-                // exists, so that this path can be toggled off.
-                let mut trie = self.trie_state.borrow_mut();
-                trie.clear();
-
-                for rng in cls.iter() {
-                    for mut seq in Utf8Sequences::new(rng.start(), rng.end()) {
-                        seq.reverse();
-                        trie.insert(seq.as_slice());
-                    }
-                }
-                let mut utf8_state = self.utf8_state.borrow_mut();
-                let mut utf8c = Utf8Compiler::new(self, &mut *utf8_state);
-                trie.iter(|seq| {
-                    utf8c.add(&seq);
-                });
-                Ok(utf8c.finish())
-            }
-        } else {
-            // In the forward direction, we always shrink our UTF-8 automata
-            // because we can stream it right into the UTF-8 compiler. There
-            // is almost no downside (in either memory or time) to using this
-            // approach.
-            let mut utf8_state = self.utf8_state.borrow_mut();
-            let mut utf8c = Utf8Compiler::new(self, &mut *utf8_state);
-            for rng in cls.iter() {
-                for seq in Utf8Sequences::new(rng.start(), rng.end()) {
-                    utf8c.add(seq.as_slice());
-                }
-            }
-            Ok(utf8c.finish())
-        }
-
-        // For reference, the code below is the "naive" version of compiling a
-        // UTF-8 automaton. It is deliciously simple (and works for both the
-        // forward and reverse cases), but will unfortunately produce very
-        // large NFAs. When compiling a forward automaton, the size difference
-        // can sometimes be an order of magnitude. For example, the '\w' regex
-        // will generate about ~3000 NFA states using the naive approach below,
-        // but only 283 states when using the approach above. This is because
-        // the approach above actually compiles a *minimal* (or near minimal,
-        // because of the bounded hashmap) UTF-8 automaton.
-        //
-        // The code below is kept as a reference point in order to make it
-        // easier to understand the higher level goal here.
-        /*
-        let it = cls
-            .iter()
-            .flat_map(|rng| Utf8Sequences::new(rng.start(), rng.end()))
-            .map(|seq| {
-                let it = seq
-                    .as_slice()
-                    .iter()
-                    .map(|rng| Ok(self.c_range(rng.start, rng.end)));
-                self.c_concat(it)
-            });
-        self.c_alternation(it);
-        */
-    }
-
-    fn c_unicode_class_reverse_with_suffix(
-        &self,
-        cls: &hir::ClassUnicode,
-    ) -> Result<ThompsonRef> {
-        // N.B. It would likely be better to cache common *prefixes* in the
-        // reverse direction, but it's not quite clear how to do that. The
-        // advantage of caching suffixes is that it does give us a win, and
-        // has a very small additional overhead.
-        let mut cache = self.utf8_suffix.borrow_mut();
-        cache.clear();
-
-        let union = self.add_union();
-        let alt_end = self.add_empty();
-        for urng in cls.iter() {
-            for seq in Utf8Sequences::new(urng.start(), urng.end()) {
-                let mut end = alt_end;
-                for brng in seq.as_slice() {
-                    let key = Utf8SuffixKey {
-                        from: end,
-                        start: brng.start,
-                        end: brng.end,
-                    };
-                    let hash = cache.hash(&key);
-                    if let Some(id) = cache.get(&key, hash) {
-                        end = id;
-                        continue;
-                    }
-
-                    let compiled = self.c_range(brng.start, brng.end);
-                    self.patch(compiled.end, end);
-                    end = compiled.start;
-                    cache.set(key, hash, end);
-                }
-                self.patch(union, end);
-            }
-        }
-        Ok(ThompsonRef { start: union, end: alt_end })
-    }
-
-    fn c_range(&self, start: u8, end: u8) -> ThompsonRef {
-        let id = self.add_range(start, end);
-        ThompsonRef { start: id, end: id }
-    }
-
-    fn c_empty(&self) -> ThompsonRef {
-        let id = self.add_empty();
-        ThompsonRef { start: id, end: id }
-    }
-
-    fn c_unanchored_prefix_valid_utf8(&self) -> Result<ThompsonRef> {
-        self.c(&Hir::repetition(hir::Repetition {
-            kind: hir::RepetitionKind::ZeroOrMore,
-            greedy: false,
-            hir: Box::new(Hir::any(false)),
-        }))
-    }
-
-    fn c_unanchored_prefix_invalid_utf8(&self) -> Result<ThompsonRef> {
-        self.c(&Hir::repetition(hir::Repetition {
-            kind: hir::RepetitionKind::ZeroOrMore,
-            greedy: false,
-            hir: Box::new(Hir::any(true)),
-        }))
-    }
-
-    fn patch(&self, from: StateID, to: StateID) {
-        match self.states.borrow_mut()[from] {
-            CState::Empty { ref mut next } => {
-                *next = to;
-            }
-            CState::Range { ref mut range } => {
-                range.next = to;
-            }
-            CState::Sparse { .. } => {
-                panic!("cannot patch from a sparse NFA state")
-            }
-            CState::Union { ref mut alternates } => {
-                alternates.push(to);
-            }
-            CState::UnionReverse { ref mut alternates } => {
-                alternates.push(to);
-            }
-            CState::Match => {}
-        }
-    }
-
-    fn add_empty(&self) -> StateID {
-        let id = self.states.borrow().len();
-        self.states.borrow_mut().push(CState::Empty { next: 0 });
-        id
-    }
-
-    fn add_range(&self, start: u8, end: u8) -> StateID {
-        let id = self.states.borrow().len();
-        let trans = Transition { start, end, next: 0 };
-        let state = CState::Range { range: trans };
-        self.states.borrow_mut().push(state);
-        id
-    }
-
-    fn add_sparse(&self, ranges: Vec<Transition>) -> StateID {
-        if ranges.len() == 1 {
-            let id = self.states.borrow().len();
-            let state = CState::Range { range: ranges[0] };
-            self.states.borrow_mut().push(state);
-            return id;
-        }
-        let id = self.states.borrow().len();
-        let state = CState::Sparse { ranges };
-        self.states.borrow_mut().push(state);
-        id
-    }
-
-    fn add_union(&self) -> StateID {
-        let id = self.states.borrow().len();
-        let state = CState::Union { alternates: vec![] };
-        self.states.borrow_mut().push(state);
-        id
-    }
-
-    fn add_reverse_union(&self) -> StateID {
-        let id = self.states.borrow().len();
-        let state = CState::UnionReverse { alternates: vec![] };
-        self.states.borrow_mut().push(state);
-        id
-    }
-
-    fn add_match(&self) -> StateID {
-        let id = self.states.borrow().len();
-        self.states.borrow_mut().push(CState::Match);
-        id
-    }
-}
-
-#[derive(Debug)]
-struct Utf8Compiler<'a> {
-    nfac: &'a Compiler,
-    state: &'a mut Utf8State,
-    target: StateID,
-}
-
-#[derive(Clone, Debug)]
-struct Utf8State {
-    compiled: Utf8BoundedMap,
-    uncompiled: Vec<Utf8Node>,
-}
-
-#[derive(Clone, Debug)]
-struct Utf8Node {
-    trans: Vec<Transition>,
-    last: Option<Utf8LastTransition>,
-}
-
-#[derive(Clone, Debug)]
-struct Utf8LastTransition {
-    start: u8,
-    end: u8,
-}
-
-impl Utf8State {
-    fn new() -> Utf8State {
-        Utf8State { compiled: Utf8BoundedMap::new(5000), uncompiled: vec![] }
-    }
-
-    fn clear(&mut self) {
-        self.compiled.clear();
-        self.uncompiled.clear();
-    }
-}
-
-impl<'a> Utf8Compiler<'a> {
-    fn new(nfac: &'a Compiler, state: &'a mut Utf8State) -> Utf8Compiler<'a> {
-        let target = nfac.add_empty();
-        state.clear();
-        let mut utf8c = Utf8Compiler { nfac, state, target };
-        utf8c.add_empty();
-        utf8c
-    }
-
-    fn finish(&mut self) -> ThompsonRef {
-        self.compile_from(0);
-        let node = self.pop_root();
-        let start = self.compile(node);
-        ThompsonRef { start, end: self.target }
-    }
-
-    fn add(&mut self, ranges: &[Utf8Range]) {
-        let prefix_len = ranges
-            .iter()
-            .zip(&self.state.uncompiled)
-            .take_while(|&(range, node)| {
-                node.last.as_ref().map_or(false, |t| {
-                    (t.start, t.end) == (range.start, range.end)
-                })
-            })
-            .count();
-        assert!(prefix_len < ranges.len());
-        self.compile_from(prefix_len);
-        self.add_suffix(&ranges[prefix_len..]);
-    }
-
-    fn compile_from(&mut self, from: usize) {
-        let mut next = self.target;
-        while from + 1 < self.state.uncompiled.len() {
-            let node = self.pop_freeze(next);
-            next = self.compile(node);
-        }
-        self.top_last_freeze(next);
-    }
-
-    fn compile(&mut self, node: Vec<Transition>) -> StateID {
-        let hash = self.state.compiled.hash(&node);
-        if let Some(id) = self.state.compiled.get(&node, hash) {
-            return id;
-        }
-        let id = self.nfac.add_sparse(node.clone());
-        self.state.compiled.set(node, hash, id);
-        id
-    }
-
-    fn add_suffix(&mut self, ranges: &[Utf8Range]) {
-        assert!(!ranges.is_empty());
-        let last = self
-            .state
-            .uncompiled
-            .len()
-            .checked_sub(1)
-            .expect("non-empty nodes");
-        assert!(self.state.uncompiled[last].last.is_none());
-        self.state.uncompiled[last].last = Some(Utf8LastTransition {
-            start: ranges[0].start,
-            end: ranges[0].end,
-        });
-        for r in &ranges[1..] {
-            self.state.uncompiled.push(Utf8Node {
-                trans: vec![],
-                last: Some(Utf8LastTransition { start: r.start, end: r.end }),
-            });
-        }
-    }
-
-    fn add_empty(&mut self) {
-        self.state.uncompiled.push(Utf8Node { trans: vec![], last: None });
-    }
-
-    fn pop_freeze(&mut self, next: StateID) -> Vec<Transition> {
-        let mut uncompiled = self.state.uncompiled.pop().unwrap();
-        uncompiled.set_last_transition(next);
-        uncompiled.trans
-    }
-
-    fn pop_root(&mut self) -> Vec<Transition> {
-        assert_eq!(self.state.uncompiled.len(), 1);
-        assert!(self.state.uncompiled[0].last.is_none());
-        self.state.uncompiled.pop().expect("non-empty nodes").trans
-    }
-
-    fn top_last_freeze(&mut self, next: StateID) {
-        let last = self
-            .state
-            .uncompiled
-            .len()
-            .checked_sub(1)
-            .expect("non-empty nodes");
-        self.state.uncompiled[last].set_last_transition(next);
-    }
-}
-
-impl Utf8Node {
-    fn set_last_transition(&mut self, next: StateID) {
-        if let Some(last) = self.last.take() {
-            self.trans.push(Transition {
-                start: last.start,
-                end: last.end,
-                next,
-            });
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use regex_syntax::hir::Hir;
-    use regex_syntax::ParserBuilder;
-
-    use super::{Builder, State, StateID, Transition, NFA};
-
-    fn parse(pattern: &str) -> Hir {
-        ParserBuilder::new().build().parse(pattern).unwrap()
-    }
-
-    fn build(pattern: &str) -> NFA {
-        Builder::new().anchored(true).build(&parse(pattern)).unwrap()
-    }
-
-    fn s_byte(byte: u8, next: StateID) -> State {
-        let trans = Transition { start: byte, end: byte, next };
-        State::Range { range: trans }
-    }
-
-    fn s_range(start: u8, end: u8, next: StateID) -> State {
-        let trans = Transition { start, end, next };
-        State::Range { range: trans }
-    }
-
-    fn s_sparse(ranges: &[(u8, u8, StateID)]) -> State {
-        let ranges = ranges
-            .iter()
-            .map(|&(start, end, next)| Transition { start, end, next })
-            .collect();
-        State::Sparse { ranges }
-    }
-
-    fn s_union(alts: &[StateID]) -> State {
-        State::Union { alternates: alts.to_vec().into_boxed_slice() }
-    }
-
-    fn s_match() -> State {
-        State::Match
-    }
-
-    #[test]
-    fn errors() {
-        // unsupported anchors
-        assert!(Builder::new().build(&parse(r"^")).is_err());
-        assert!(Builder::new().build(&parse(r"$")).is_err());
-        assert!(Builder::new().build(&parse(r"\A")).is_err());
-        assert!(Builder::new().build(&parse(r"\z")).is_err());
-
-        // unsupported word boundaries
-        assert!(Builder::new().build(&parse(r"\b")).is_err());
-        assert!(Builder::new().build(&parse(r"\B")).is_err());
-        assert!(Builder::new().build(&parse(r"(?-u)\b")).is_err());
-    }
-
-    // Test that building an unanchored NFA has an appropriate `.*?` prefix.
-    #[test]
-    fn compile_unanchored_prefix() {
-        // When the machine can only match valid UTF-8.
-        let nfa = Builder::new().anchored(false).build(&parse(r"a")).unwrap();
-        // There should be many states since the `.` in `.*?` matches any
-        // Unicode scalar value.
-        assert_eq!(11, nfa.len());
-        assert_eq!(nfa.states[10], s_match());
-        assert_eq!(nfa.states[9], s_byte(b'a', 10));
-
-        // When the machine can match invalid UTF-8.
-        let nfa = Builder::new()
-            .anchored(false)
-            .allow_invalid_utf8(true)
-            .build(&parse(r"a"))
-            .unwrap();
-        assert_eq!(
-            nfa.states,
-            &[
-                s_union(&[2, 1]),
-                s_range(0, 255, 0),
-                s_byte(b'a', 3),
-                s_match(),
-            ]
-        );
-    }
-
-    #[test]
-    fn compile_empty() {
-        assert_eq!(build("").states, &[s_match(),]);
-    }
-
-    #[test]
-    fn compile_literal() {
-        assert_eq!(build("a").states, &[s_byte(b'a', 1), s_match(),]);
-        assert_eq!(
-            build("ab").states,
-            &[s_byte(b'a', 1), s_byte(b'b', 2), s_match(),]
-        );
-        assert_eq!(
-            build("☃").states,
-            &[s_byte(0xE2, 1), s_byte(0x98, 2), s_byte(0x83, 3), s_match(),]
-        );
-
-        // Check that non-UTF-8 literals work.
-        let hir = ParserBuilder::new()
-            .allow_invalid_utf8(true)
-            .build()
-            .parse(r"(?-u)\xFF")
-            .unwrap();
-        let nfa = Builder::new()
-            .anchored(true)
-            .allow_invalid_utf8(true)
-            .build(&hir)
-            .unwrap();
-        assert_eq!(nfa.states, &[s_byte(b'\xFF', 1), s_match(),]);
-    }
-
-    #[test]
-    fn compile_class() {
-        assert_eq!(
-            build(r"[a-z]").states,
-            &[s_range(b'a', b'z', 1), s_match(),]
-        );
-        assert_eq!(
-            build(r"[x-za-c]").states,
-            &[s_sparse(&[(b'a', b'c', 1), (b'x', b'z', 1)]), s_match()]
-        );
-        assert_eq!(
-            build(r"[\u03B1-\u03B4]").states,
-            &[s_range(0xB1, 0xB4, 2), s_byte(0xCE, 0), s_match()]
-        );
-        assert_eq!(
-            build(r"[\u03B1-\u03B4\u{1F919}-\u{1F91E}]").states,
-            &[
-                s_range(0xB1, 0xB4, 5),
-                s_range(0x99, 0x9E, 5),
-                s_byte(0xA4, 1),
-                s_byte(0x9F, 2),
-                s_sparse(&[(0xCE, 0xCE, 0), (0xF0, 0xF0, 3)]),
-                s_match(),
-            ]
-        );
-        assert_eq!(
-            build(r"[a-z☃]").states,
-            &[
-                s_byte(0x83, 3),
-                s_byte(0x98, 0),
-                s_sparse(&[(b'a', b'z', 3), (0xE2, 0xE2, 1)]),
-                s_match(),
-            ]
-        );
-    }
-
-    #[test]
-    fn compile_repetition() {
-        assert_eq!(
-            build(r"a?").states,
-            &[s_union(&[1, 2]), s_byte(b'a', 2), s_match(),]
-        );
-        assert_eq!(
-            build(r"a??").states,
-            &[s_union(&[2, 1]), s_byte(b'a', 2), s_match(),]
-        );
-    }
-
-    #[test]
-    fn compile_group() {
-        assert_eq!(
-            build(r"ab+").states,
-            &[s_byte(b'a', 1), s_byte(b'b', 2), s_union(&[1, 3]), s_match(),]
-        );
-        assert_eq!(
-            build(r"(ab)").states,
-            &[s_byte(b'a', 1), s_byte(b'b', 2), s_match(),]
-        );
-        assert_eq!(
-            build(r"(ab)+").states,
-            &[s_byte(b'a', 1), s_byte(b'b', 2), s_union(&[0, 3]), s_match(),]
-        );
-    }
-
-    #[test]
-    fn compile_alternation() {
-        assert_eq!(
-            build(r"a|b").states,
-            &[s_byte(b'a', 3), s_byte(b'b', 3), s_union(&[0, 1]), s_match(),]
-        );
-        assert_eq!(
-            build(r"|b").states,
-            &[s_byte(b'b', 2), s_union(&[2, 0]), s_match(),]
-        );
-        assert_eq!(
-            build(r"a|").states,
-            &[s_byte(b'a', 2), s_union(&[0, 2]), s_match(),]
-        );
-    }
-}
diff --git a/vendor/regex-automata/src/nfa/mod.rs b/vendor/regex-automata/src/nfa/mod.rs
index 02d0501de..61ce5ef47 100644
--- a/vendor/regex-automata/src/nfa/mod.rs
+++ b/vendor/regex-automata/src/nfa/mod.rs
@@ -1,252 +1 @@
-use std::fmt;
-
-use classes::ByteClasses;
-pub use nfa::compiler::Builder;
-
-mod compiler;
-mod map;
-mod range_trie;
-
-/// The representation for an NFA state identifier.
-pub type StateID = usize;
-
-/// A final compiled NFA.
-///
-/// The states of the NFA are indexed by state IDs, which are how transitions
-/// are expressed.
-#[derive(Clone)]
-pub struct NFA {
-    /// Whether this NFA can only match at the beginning of input or not.
-    ///
-    /// When true, a match should only be reported if it begins at the 0th
-    /// index of the haystack.
-    anchored: bool,
-    /// The starting state of this NFA.
-    start: StateID,
-    /// The state list. This list is guaranteed to be indexable by the starting
-    /// state ID, and it is also guaranteed to contain exactly one `Match`
-    /// state.
-    states: Vec<State>,
-    /// A mapping from any byte value to its corresponding equivalence class
-    /// identifier. Two bytes in the same equivalence class cannot discriminate
-    /// between a match or a non-match. This map can be used to shrink the
-    /// total size of a DFA's transition table with a small match-time cost.
-    ///
-    /// Note that the NFA's transitions are *not* defined in terms of these
-    /// equivalence classes. The NFA's transitions are defined on the original
-    /// byte values. For the most part, this is because they wouldn't really
-    /// help the NFA much since the NFA already uses a sparse representation
-    /// to represent transitions. Byte classes are most effective in a dense
-    /// representation.
-    byte_classes: ByteClasses,
-}
-
-impl NFA {
-    /// Returns an NFA that always matches at every position.
-    pub fn always_match() -> NFA {
-        NFA {
-            anchored: false,
-            start: 0,
-            states: vec![State::Match],
-            byte_classes: ByteClasses::empty(),
-        }
-    }
-
-    /// Returns an NFA that never matches at any position.
-    pub fn never_match() -> NFA {
-        NFA {
-            anchored: false,
-            start: 0,
-            states: vec![State::Fail],
-            byte_classes: ByteClasses::empty(),
-        }
-    }
-
-    /// Returns true if and only if this NFA is anchored.
-    pub fn is_anchored(&self) -> bool {
-        self.anchored
-    }
-
-    /// Return the number of states in this NFA.
-    pub fn len(&self) -> usize {
-        self.states.len()
-    }
-
-    /// Return the ID of the initial state of this NFA.
-    pub fn start(&self) -> StateID {
-        self.start
-    }
-
-    /// Return the NFA state corresponding to the given ID.
-    pub fn state(&self, id: StateID) -> &State {
-        &self.states[id]
-    }
-
-    /// Return the set of equivalence classes for this NFA. The slice returned
-    /// always has length 256 and maps each possible byte value to its
-    /// corresponding equivalence class ID (which is never more than 255).
-    pub fn byte_classes(&self) -> &ByteClasses {
-        &self.byte_classes
-    }
-}
-
-impl fmt::Debug for NFA {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        for (i, state) in self.states.iter().enumerate() {
-            let status = if i == self.start { '>' } else { ' ' };
-            writeln!(f, "{}{:06}: {:?}", status, i, state)?;
-        }
-        Ok(())
-    }
-}
-
-/// A state in a final compiled NFA.
-#[derive(Clone, Eq, PartialEq)]
-pub enum State {
-    /// A state that transitions to `next` if and only if the current input
-    /// byte is in the range `[start, end]` (inclusive).
-    ///
-    /// This is a special case of Sparse in that it encodes only one transition
-    /// (and therefore avoids the allocation).
-    Range { range: Transition },
-    /// A state with possibly many transitions, represented in a sparse
-    /// fashion. Transitions are ordered lexicographically by input range.
-    /// As such, this may only be used when every transition has equal
-    /// priority. (In practice, this is only used for encoding large UTF-8
-    /// automata.)
-    Sparse { ranges: Box<[Transition]> },
-    /// An alternation such that there exists an epsilon transition to all
-    /// states in `alternates`, where matches found via earlier transitions
-    /// are preferred over later transitions.
-    Union { alternates: Box<[StateID]> },
-    /// A fail state. When encountered, the automaton is guaranteed to never
-    /// reach a match state.
-    Fail,
-    /// A match state. There is exactly one such occurrence of this state in
-    /// an NFA.
-    Match,
-}
-
-/// A transition to another state, only if the given byte falls in the
-/// inclusive range specified.
-#[derive(Clone, Copy, Eq, Hash, PartialEq)]
-pub struct Transition {
-    pub start: u8,
-    pub end: u8,
-    pub next: StateID,
-}
-
-impl State {
-    /// Returns true if and only if this state contains one or more epsilon
-    /// transitions.
-    pub fn is_epsilon(&self) -> bool {
-        match *self {
-            State::Range { .. }
-            | State::Sparse { .. }
-            | State::Fail
-            | State::Match => false,
-            State::Union { .. } => true,
-        }
-    }
-
-    /// Remap the transitions in this state using the given map. Namely, the
-    /// given map should be indexed according to the transitions currently
-    /// in this state.
-    ///
-    /// This is used during the final phase of the NFA compiler, which turns
-    /// its intermediate NFA into the final NFA.
-    fn remap(&mut self, remap: &[StateID]) {
-        match *self {
-            State::Range { ref mut range } => range.next = remap[range.next],
-            State::Sparse { ref mut ranges } => {
-                for r in ranges.iter_mut() {
-                    r.next = remap[r.next];
-                }
-            }
-            State::Union { ref mut alternates } => {
-                for alt in alternates.iter_mut() {
-                    *alt = remap[*alt];
-                }
-            }
-            State::Fail => {}
-            State::Match => {}
-        }
-    }
-}
-
-impl fmt::Debug for State {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            State::Range { ref range } => range.fmt(f),
-            State::Sparse { ref ranges } => {
-                let rs = ranges
-                    .iter()
-                    .map(|t| format!("{:?}", t))
-                    .collect::<Vec<String>>()
-                    .join(", ");
-                write!(f, "sparse({})", rs)
-            }
-            State::Union { ref alternates } => {
-                let alts = alternates
-                    .iter()
-                    .map(|id| format!("{}", id))
-                    .collect::<Vec<String>>()
-                    .join(", ");
-                write!(f, "alt({})", alts)
-            }
-            State::Fail => write!(f, "FAIL"),
-            State::Match => write!(f, "MATCH"),
-        }
-    }
-}
-
-impl fmt::Debug for Transition {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let Transition { start, end, next } = *self;
-        if self.start == self.end {
-            write!(f, "{} => {}", escape(start), next)
-        } else {
-            write!(f, "{}-{} => {}", escape(start), escape(end), next)
-        }
-    }
-}
-
-/// Return the given byte as its escaped string form.
-fn escape(b: u8) -> String {
-    use std::ascii;
-
-    String::from_utf8(ascii::escape_default(b).collect::<Vec<_>>()).unwrap()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use dense;
-    use dfa::DFA;
-
-    #[test]
-    fn always_match() {
-        let nfa = NFA::always_match();
-        let dfa = dense::Builder::new().build_from_nfa::<usize>(&nfa).unwrap();
-
-        assert_eq!(Some(0), dfa.find_at(b"", 0));
-        assert_eq!(Some(0), dfa.find_at(b"a", 0));
-        assert_eq!(Some(1), dfa.find_at(b"a", 1));
-        assert_eq!(Some(0), dfa.find_at(b"ab", 0));
-        assert_eq!(Some(1), dfa.find_at(b"ab", 1));
-        assert_eq!(Some(2), dfa.find_at(b"ab", 2));
-    }
-
-    #[test]
-    fn never_match() {
-        let nfa = NFA::never_match();
-        let dfa = dense::Builder::new().build_from_nfa::<usize>(&nfa).unwrap();
-
-        assert_eq!(None, dfa.find_at(b"", 0));
-        assert_eq!(None, dfa.find_at(b"a", 0));
-        assert_eq!(None, dfa.find_at(b"a", 1));
-        assert_eq!(None, dfa.find_at(b"ab", 0));
-        assert_eq!(None, dfa.find_at(b"ab", 1));
-        assert_eq!(None, dfa.find_at(b"ab", 2));
-    }
-}
+pub mod thompson;
diff --git a/vendor/regex-automata/src/nfa/thompson/compiler.rs b/vendor/regex-automata/src/nfa/thompson/compiler.rs
new file mode 100644
index 000000000..301194005
--- /dev/null
+++ b/vendor/regex-automata/src/nfa/thompson/compiler.rs
@@ -0,0 +1,1713 @@
+/*
+This module provides an NFA compiler using Thompson's construction
+algorithm. The compiler takes a regex-syntax::Hir as input and emits an NFA
+graph as output. The NFA graph is structured in a way that permits it to be
+executed by a virtual machine and also used to efficiently build a DFA.
+
+The compiler deals with a slightly expanded set of NFA states that notably
+includes an empty node that has exactly one epsilon transition to the next
+state. In other words, it's a "goto" instruction if one views Thompson's NFA
+as a set of bytecode instructions. These goto instructions are removed in
+a subsequent phase before returning the NFA to the caller. The purpose of
+these empty nodes is that they make the construction algorithm substantially
+simpler to implement. We remove them before returning to the caller because
+they can represent substantial overhead when traversing the NFA graph
+(either while searching using the NFA directly or while building a DFA).
+
+In the future, it would be nice to provide a Glushkov compiler as well,
+as it would work well as a bit-parallel NFA for smaller regexes. But
+the Thompson construction is one I'm more familiar with and seems more
+straight-forward to deal with when it comes to large Unicode character
+classes.
+
+Internally, the compiler uses interior mutability to improve composition
+in the face of the borrow checker. In particular, we'd really like to be
+able to write things like this:
+
+    self.c_concat(exprs.iter().map(|e| self.c(e)))
+
+Which elegantly uses iterators to build up a sequence of compiled regex
+sub-expressions and then hands it off to the concatenating compiler
+routine. Without interior mutability, the borrow checker won't let us
+borrow `self` mutably both inside and outside the closure at the same
+time.
+*/
+
+use core::{
+    borrow::Borrow,
+    cell::{Cell, RefCell},
+    mem,
+};
+
+use alloc::{sync::Arc, vec, vec::Vec};
+
+use regex_syntax::{
+    hir::{self, Anchor, Class, Hir, HirKind, Literal, WordBoundary},
+    utf8::{Utf8Range, Utf8Sequences},
+    ParserBuilder,
+};
+
+use crate::{
+    nfa::thompson::{
+        error::Error,
+        map::{Utf8BoundedMap, Utf8SuffixKey, Utf8SuffixMap},
+        range_trie::RangeTrie,
+        Look, SparseTransitions, State, Transition, NFA,
+    },
+    util::{
+        alphabet::ByteClassSet,
+        id::{IteratorIDExt, PatternID, StateID},
+    },
+};
+
+/// The configuration used for compiling a Thompson NFA from a regex pattern.
+#[derive(Clone, Copy, Debug, Default)]
+pub struct Config {
+    reverse: Option<bool>,
+    utf8: Option<bool>,
+    nfa_size_limit: Option<Option<usize>>,
+    shrink: Option<bool>,
+    captures: Option<bool>,
+    #[cfg(test)]
+    unanchored_prefix: Option<bool>,
+}
+
+impl Config {
+    /// Return a new default Thompson NFA compiler configuration.
+    pub fn new() -> Config {
+        Config::default()
+    }
+
+    /// Reverse the NFA.
+    ///
+    /// A NFA reversal is performed by reversing all of the concatenated
+    /// sub-expressions in the original pattern, recursively. The resulting
+    /// NFA can be used to match the pattern starting from the end of a string
+    /// instead of the beginning of a string.
+    ///
+    /// Reversing the NFA is useful for building a reverse DFA, which is most
+    /// useful for finding the start of a match after its ending position has
+    /// been found.
+    ///
+    /// This is disabled by default.
+    pub fn reverse(mut self, yes: bool) -> Config {
+        self.reverse = Some(yes);
+        self
+    }
+
+    /// Whether to enable UTF-8 mode or not.
+    ///
+    /// When UTF-8 mode is enabled (which is the default), unanchored searches
+    /// will only match through valid UTF-8. If invalid UTF-8 is seen, then
+    /// an unanchored search will stop at that point. This is equivalent to
+    /// putting a `(?s:.)*?` at the start of the regex.
+    ///
+    /// When UTF-8 mode is disabled, then unanchored searches will match
+    /// through any arbitrary byte. This is equivalent to putting a
+    /// `(?s-u:.)*?` at the start of the regex.
+    ///
+    /// Generally speaking, UTF-8 mode should only be used when you know you
+    /// are searching valid UTF-8, such as a Rust `&str`. If UTF-8 mode is used
+    /// on input that is not valid UTF-8, then the regex is not likely to work
+    /// as expected.
+    ///
+    /// This is enabled by default.
+    pub fn utf8(mut self, yes: bool) -> Config {
+        self.utf8 = Some(yes);
+        self
+    }
+
+    /// Sets an approximate size limit on the total heap used by the NFA being
+    /// compiled.
+    ///
+    /// This permits imposing constraints on the size of a compiled NFA. This
+    /// may be useful in contexts where the regex pattern is untrusted and one
+    /// wants to avoid using too much memory.
+    ///
+    /// This size limit does not apply to auxiliary heap used during
+    /// compilation that is not part of the built NFA.
+    ///
+    /// Note that this size limit is applied during compilation in order for
+    /// the limit to prevent too much heap from being used. However, the
+    /// implementation may use an intermediate NFA representation that is
+    /// otherwise slightly bigger than the final public form. Since the size
+    /// limit may be applied to an intermediate representation, there is not
+    /// necessarily a precise correspondence between the configured size limit
+    /// and the heap usage of the final NFA.
+    ///
+    /// There is no size limit by default.
+    ///
+    /// # Example
+    ///
+    /// This example demonstrates how Unicode mode can greatly increase the
+    /// size of the NFA.
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::NFA;
+    ///
+    /// // 300KB isn't enough!
+    /// NFA::builder()
+    ///     .configure(NFA::config().nfa_size_limit(Some(300_000)))
+    ///     .build(r"\w{20}")
+    ///     .unwrap_err();
+    ///
+    /// // ... but 400KB probably is.
+    /// let nfa = NFA::builder()
+    ///     .configure(NFA::config().nfa_size_limit(Some(400_000)))
+    ///     .build(r"\w{20}")?;
+    ///
+    /// assert_eq!(nfa.pattern_len(), 1);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn nfa_size_limit(mut self, bytes: Option<usize>) -> Config {
+        self.nfa_size_limit = Some(bytes);
+        self
+    }
+
+    /// Apply best effort heuristics to shrink the NFA at the expense of more
+    /// time/memory.
+    ///
+    /// This is enabled by default. Generally speaking, if one is using an NFA
+    /// to compile a DFA, then the extra time used to shrink the NFA will be
+    /// more than made up for during DFA construction (potentially by a lot).
+    /// In other words, enabling this can substantially decrease the overall
+    /// amount of time it takes to build a DFA.
+    ///
+    /// The only reason to disable this if you want to compile an NFA and start
+    /// using it as quickly as possible without needing to build a DFA. e.g.,
+    /// for an NFA simulation or for a lazy DFA.
+    ///
+    /// This is enabled by default.
+    pub fn shrink(mut self, yes: bool) -> Config {
+        self.shrink = Some(yes);
+        self
+    }
+
+    /// Whether to include 'Capture' states in the NFA.
+    ///
+    /// This can only be enabled when compiling a forward NFA. This is
+    /// always disabled---with no way to override it---when the `reverse`
+    /// configuration is enabled.
+    ///
+    /// This is enabled by default.
+    pub fn captures(mut self, yes: bool) -> Config {
+        self.captures = Some(yes);
+        self
+    }
+
+    /// Whether to compile an unanchored prefix into this NFA.
+    ///
+    /// This is enabled by default. It is made available for tests only to make
+    /// it easier to unit test the output of the compiler.
+    #[cfg(test)]
+    fn unanchored_prefix(mut self, yes: bool) -> Config {
+        self.unanchored_prefix = Some(yes);
+        self
+    }
+
+    pub fn get_reverse(&self) -> bool {
+        self.reverse.unwrap_or(false)
+    }
+
+    pub fn get_utf8(&self) -> bool {
+        self.utf8.unwrap_or(true)
+    }
+
+    pub fn get_nfa_size_limit(&self) -> Option<usize> {
+        self.nfa_size_limit.unwrap_or(None)
+    }
+
+    pub fn get_shrink(&self) -> bool {
+        self.shrink.unwrap_or(true)
+    }
+
+    pub fn get_captures(&self) -> bool {
+        !self.get_reverse() && self.captures.unwrap_or(true)
+    }
+
+    fn get_unanchored_prefix(&self) -> bool {
+        #[cfg(test)]
+        {
+            self.unanchored_prefix.unwrap_or(true)
+        }
+        #[cfg(not(test))]
+        {
+            true
+        }
+    }
+
+    pub(crate) fn overwrite(self, o: Config) -> Config {
+        Config {
+            reverse: o.reverse.or(self.reverse),
+            utf8: o.utf8.or(self.utf8),
+            nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit),
+            shrink: o.shrink.or(self.shrink),
+            captures: o.captures.or(self.captures),
+            #[cfg(test)]
+            unanchored_prefix: o.unanchored_prefix.or(self.unanchored_prefix),
+        }
+    }
+}
+
+/// A builder for compiling an NFA.
+#[derive(Clone, Debug)]
+pub struct Builder {
+    config: Config,
+    parser: ParserBuilder,
+}
+
+impl Builder {
+    /// Create a new NFA builder with its default configuration.
+    pub fn new() -> Builder {
+        Builder { config: Config::default(), parser: ParserBuilder::new() }
+    }
+
+    /// Compile the given regular expression into an NFA.
+    ///
+    /// If there was a problem parsing the regex, then that error is returned.
+    ///
+    /// Otherwise, if there was a problem building the NFA, then an error is
+    /// returned. The only error that can occur is if the compiled regex would
+    /// exceed the size limits configured on this builder.
+    pub fn build(&self, pattern: &str) -> Result<NFA, Error> {
+        self.build_many(&[pattern])
+    }
+
+    pub fn build_many<P: AsRef<str>>(
+        &self,
+        patterns: &[P],
+    ) -> Result<NFA, Error> {
+        let mut hirs = vec![];
+        for p in patterns {
+            hirs.push(
+                self.parser
+                    .build()
+                    .parse(p.as_ref())
+                    .map_err(Error::syntax)?,
+            );
+            log!(log::trace!("parsed: {:?}", p.as_ref()));
+        }
+        self.build_many_from_hir(&hirs)
+    }
+
+    /// Compile the given high level intermediate representation of a regular
+    /// expression into an NFA.
+    ///
+    /// If there was a problem building the NFA, then an error is returned. The
+    /// only error that can occur is if the compiled regex would exceed the
+    /// size limits configured on this builder.
+    pub fn build_from_hir(&self, expr: &Hir) -> Result<NFA, Error> {
+        self.build_from_hir_with(&mut Compiler::new(), expr)
+    }
+
+    pub fn build_many_from_hir<H: Borrow<Hir>>(
+        &self,
+        exprs: &[H],
+    ) -> Result<NFA, Error> {
+        self.build_many_from_hir_with(&mut Compiler::new(), exprs)
+    }
+
+    /// Compile the given high level intermediate representation of a regular
+    /// expression into the NFA given using the given compiler. Callers may
+    /// prefer this over `build` if they would like to reuse allocations while
+    /// compiling many regular expressions.
+    ///
+    /// On success, the given NFA is completely overwritten with the NFA
+    /// produced by the compiler.
+    ///
+    /// If there was a problem building the NFA, then an error is returned.
+    /// The only error that can occur is if the compiled regex would exceed
+    /// the size limits configured on this builder. When an error is returned,
+    /// the contents of `nfa` are unspecified and should not be relied upon.
+    /// However, it can still be reused in subsequent calls to this method.
+    fn build_from_hir_with(
+        &self,
+        compiler: &mut Compiler,
+        expr: &Hir,
+    ) -> Result<NFA, Error> {
+        self.build_many_from_hir_with(compiler, &[expr])
+    }
+
+    fn build_many_from_hir_with<H: Borrow<Hir>>(
+        &self,
+        compiler: &mut Compiler,
+        exprs: &[H],
+    ) -> Result<NFA, Error> {
+        compiler.configure(self.config);
+        compiler.compile(exprs)
+    }
+
+    /// Apply the given NFA configuration options to this builder.
+    pub fn configure(&mut self, config: Config) -> &mut Builder {
+        self.config = self.config.overwrite(config);
+        self
+    }
+
+    /// Set the syntax configuration for this builder using
+    /// [`SyntaxConfig`](../../struct.SyntaxConfig.html).
+    ///
+    /// This permits setting things like case insensitivity, Unicode and multi
+    /// line mode.
+    ///
+    /// This syntax configuration generally only applies when an NFA is built
+    /// directly from a pattern string. If an NFA is built from an HIR, then
+    /// all syntax settings are ignored.
+    pub fn syntax(
+        &mut self,
+        config: crate::util::syntax::SyntaxConfig,
+    ) -> &mut Builder {
+        config.apply(&mut self.parser);
+        self
+    }
+}
+
+/// A compiler that converts a regex abstract syntax to an NFA via Thompson's
+/// construction. Namely, this compiler permits epsilon transitions between
+/// states.
+#[derive(Clone, Debug)]
+pub struct Compiler {
+    /// The configuration from the builder.
+    config: Config,
+    /// The final NFA that is built.
+    ///
+    /// Parts of this NFA are constructed during compilation, but the actual
+    /// states aren't added until a final "finish" step. This is because the
+    /// states constructed during compilation have unconditional epsilon
+    /// transitions, which makes the logic of compilation much simpler. The
+    /// "finish" step removes these unconditional epsilon transitions and must
+    /// therefore remap all of the transition state IDs.
+    nfa: RefCell<NFA>,
+    /// The set of compiled NFA states. Once a state is compiled, it is
+    /// assigned a state ID equivalent to its index in this list. Subsequent
+    /// compilation can modify previous states by adding new transitions.
+    states: RefCell<Vec<CState>>,
+    /// State used for compiling character classes to UTF-8 byte automata.
+    /// State is not retained between character class compilations. This just
+    /// serves to amortize allocation to the extent possible.
+    utf8_state: RefCell<Utf8State>,
+    /// State used for arranging character classes in reverse into a trie.
+    trie_state: RefCell<RangeTrie>,
+    /// State used for caching common suffixes when compiling reverse UTF-8
+    /// automata (for Unicode character classes).
+    utf8_suffix: RefCell<Utf8SuffixMap>,
+    /// A map used to re-map state IDs when translating the compiler's internal
+    /// NFA state representation to the external NFA representation.
+    remap: RefCell<Vec<StateID>>,
+    /// A set of compiler internal state IDs that correspond to states that are
+    /// exclusively epsilon transitions, i.e., goto instructions, combined with
+    /// the state that they point to. This is used to record said states while
+    /// transforming the compiler's internal NFA representation to the external
+    /// form.
+    empties: RefCell<Vec<(StateID, StateID)>>,
+    /// The total memory used by each of the 'CState's in 'states'. This only
+    /// includes heap usage by each state, and not the size of the state
+    /// itself.
+    memory_cstates: Cell<usize>,
+}
+
+/// A compiler intermediate state representation for an NFA that is only used
+/// during compilation. Once compilation is done, `CState`s are converted
+/// to `State`s (defined in the parent module), which have a much simpler
+/// representation.
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum CState {
+    /// An empty state whose only purpose is to forward the automaton to
+    /// another state via en epsilon transition. These are useful during
+    /// compilation but are otherwise removed at the end.
+    Empty {
+        next: StateID,
+    },
+    /// An empty state that records a capture location.
+    ///
+    /// From the perspective of finite automata, this is precisely equivalent
+    /// to 'Empty', but serves the purpose of instructing NFA simulations to
+    /// record additional state when the finite state machine passes through
+    /// this epsilon transition.
+    ///
+    /// These transitions are treated as epsilon transitions with no additional
+    /// effects in DFAs.
+    ///
+    /// 'slot' in this context refers to the specific capture group offset that
+    /// is being recorded. Each capturing group has two slots corresponding to
+    /// the start and end of the matching portion of that group.
+    CaptureStart {
+        next: StateID,
+        capture_index: u32,
+        name: Option<Arc<str>>,
+    },
+    CaptureEnd {
+        next: StateID,
+        capture_index: u32,
+    },
+    /// A state that only transitions to `next` if the current input byte is
+    /// in the range `[start, end]` (inclusive on both ends).
+    Range {
+        range: Transition,
+    },
+    /// A state with possibly many transitions, represented in a sparse
+    /// fashion. Transitions are ordered lexicographically by input range.
+    /// As such, this may only be used when every transition has equal
+    /// priority. (In practice, this is only used for encoding large UTF-8
+    /// automata.) In contrast, a `Union` state has each alternate in order
+    /// of priority. Priority is used to implement greedy matching and also
+    /// alternations themselves, e.g., `abc|a` where `abc` has priority over
+    /// `a`.
+    ///
+    /// To clarify, it is possible to remove `Sparse` and represent all things
+    /// that `Sparse` is used for via `Union`. But this creates a more bloated
+    /// NFA with more epsilon transitions than is necessary in the special case
+    /// of character classes.
+    Sparse {
+        ranges: Vec<Transition>,
+    },
+    /// A conditional epsilon transition satisfied via some sort of
+    /// look-around.
+    Look {
+        look: Look,
+        next: StateID,
+    },
+    /// An alternation such that there exists an epsilon transition to all
+    /// states in `alternates`, where matches found via earlier transitions
+    /// are preferred over later transitions.
+    Union {
+        alternates: Vec<StateID>,
+    },
+    /// An alternation such that there exists an epsilon transition to all
+    /// states in `alternates`, where matches found via later transitions are
+    /// preferred over earlier transitions.
+    ///
+    /// This "reverse" state exists for convenience during compilation that
+    /// permits easy construction of non-greedy combinations of NFA states. At
+    /// the end of compilation, Union and UnionReverse states are merged into
+    /// one Union type of state, where the latter has its epsilon transitions
+    /// reversed to reflect the priority inversion.
+    ///
+    /// The "convenience" here arises from the fact that as new states are
+    /// added to the list of `alternates`, we would like that add operation
+    /// to be amortized constant time. But if we used a `Union`, we'd need to
+    /// prepend the state, which takes O(n) time. There are other approaches we
+    /// could use to solve this, but this seems simple enough.
+    UnionReverse {
+        alternates: Vec<StateID>,
+    },
+    /// A match state. There is at most one such occurrence of this state in
+    /// an NFA for each pattern compiled into the NFA. At time of writing, a
+    /// match state is always produced for every pattern given, but in theory,
+    /// if a pattern can never lead to a match, then the match state could be
+    /// omitted.
+    ///
+    /// `id` refers to the ID of the pattern itself, which corresponds to the
+    /// pattern's index (starting at 0). `start_id` refers to the anchored
+    /// NFA starting state corresponding to this pattern.
+    Match {
+        pattern_id: PatternID,
+        start_id: StateID,
+    },
+}
+
+/// A value that represents the result of compiling a sub-expression of a
+/// regex's HIR. Specifically, this represents a sub-graph of the NFA that
+/// has an initial state at `start` and a final state at `end`.
+#[derive(Clone, Copy, Debug)]
+pub struct ThompsonRef {
+    start: StateID,
+    end: StateID,
+}
+
+impl Compiler {
+    /// Create a new compiler.
+    pub fn new() -> Compiler {
+        Compiler {
+            config: Config::default(),
+            nfa: RefCell::new(NFA::empty()),
+            states: RefCell::new(vec![]),
+            utf8_state: RefCell::new(Utf8State::new()),
+            trie_state: RefCell::new(RangeTrie::new()),
+            utf8_suffix: RefCell::new(Utf8SuffixMap::new(1000)),
+            remap: RefCell::new(vec![]),
+            empties: RefCell::new(vec![]),
+            memory_cstates: Cell::new(0),
+        }
+    }
+
+    /// Configure and prepare this compiler from the builder's knobs.
+    ///
+    /// The compiler is must always reconfigured by the builder before using it
+    /// to build an NFA. Namely, this will also clear any latent state in the
+    /// compiler used during previous compilations.
+    fn configure(&mut self, config: Config) {
+        self.config = config;
+        self.nfa.borrow_mut().clear();
+        self.states.borrow_mut().clear();
+        self.memory_cstates.set(0);
+        // We don't need to clear anything else since they are cleared on
+        // their own and only when they are used.
+    }
+
+    /// Convert the current intermediate NFA to its final compiled form.
+    fn compile<H: Borrow<Hir>>(&self, exprs: &[H]) -> Result<NFA, Error> {
+        if exprs.is_empty() {
+            return Ok(NFA::never_match());
+        }
+        if exprs.len() > PatternID::LIMIT {
+            return Err(Error::too_many_patterns(exprs.len()));
+        }
+
+        // We always add an unanchored prefix unless we were specifically told
+        // not to (for tests only), or if we know that the regex is anchored
+        // for all matches. When an unanchored prefix is not added, then the
+        // NFA's anchored and unanchored start states are equivalent.
+        let all_anchored =
+            exprs.iter().all(|e| e.borrow().is_anchored_start());
+        let anchored = !self.config.get_unanchored_prefix() || all_anchored;
+        let unanchored_prefix = if anchored {
+            self.c_empty()?
+        } else {
+            if self.config.get_utf8() {
+                self.c_unanchored_prefix_valid_utf8()?
+            } else {
+                self.c_unanchored_prefix_invalid_utf8()?
+            }
+        };
+
+        let compiled = self.c_alternation(
+            exprs.iter().with_pattern_ids().map(|(pid, e)| {
+                let group_kind = hir::GroupKind::CaptureIndex(0);
+                let one = self.c_group(&group_kind, e.borrow())?;
+                let match_state_id = self.add_match(pid, one.start)?;
+                self.patch(one.end, match_state_id)?;
+                Ok(ThompsonRef { start: one.start, end: match_state_id })
+            }),
+        )?;
+        self.patch(unanchored_prefix.end, compiled.start)?;
+        self.finish(compiled.start, unanchored_prefix.start)?;
+        Ok(self.nfa.replace(NFA::empty()))
+    }
+
+    /// Finishes the compilation process and populates the NFA attached to this
+    /// compiler with the final graph.
+    fn finish(
+        &self,
+        start_anchored: StateID,
+        start_unanchored: StateID,
+    ) -> Result<(), Error> {
+        trace!(
+            "intermediate NFA compilation complete, \
+             intermediate NFA size: {} states, {} bytes on heap",
+            self.states.borrow().len(),
+            self.nfa_memory_usage(),
+        );
+        let mut nfa = self.nfa.borrow_mut();
+        let mut bstates = self.states.borrow_mut();
+        let mut remap = self.remap.borrow_mut();
+        let mut empties = self.empties.borrow_mut();
+        remap.resize(bstates.len(), StateID::ZERO);
+        empties.clear();
+
+        // The idea here is to convert our intermediate states to their final
+        // form. The only real complexity here is the process of converting
+        // transitions, which are expressed in terms of state IDs. The new
+        // set of states will be smaller because of partial epsilon removal,
+        // so the state IDs will not be the same.
+        for (sid, bstate) in bstates.iter_mut().with_state_ids() {
+            match *bstate {
+                CState::Empty { next } => {
+                    // Since we're removing empty states, we need to handle
+                    // them later since we don't yet know which new state this
+                    // empty state will be mapped to.
+                    empties.push((sid, next));
+                }
+                CState::CaptureStart { next, capture_index, ref name } => {
+                    // We can't remove this empty state because of the side
+                    // effect of capturing an offset for this capture slot.
+                    remap[sid] = nfa.add_capture_start(
+                        next,
+                        capture_index,
+                        name.clone(),
+                    )?;
+                }
+                CState::CaptureEnd { next, capture_index } => {
+                    // We can't remove this empty state because of the side
+                    // effect of capturing an offset for this capture slot.
+                    remap[sid] = nfa.add_capture_end(next, capture_index)?;
+                }
+                CState::Range { range } => {
+                    remap[sid] = nfa.add_range(range)?;
+                }
+                CState::Sparse { ref mut ranges } => {
+                    let ranges =
+                        mem::replace(ranges, vec![]).into_boxed_slice();
+                    remap[sid] =
+                        nfa.add_sparse(SparseTransitions { ranges })?;
+                }
+                CState::Look { look, next } => {
+                    remap[sid] = nfa.add_look(next, look)?;
+                }
+                CState::Union { ref mut alternates } => {
+                    let alternates =
+                        mem::replace(alternates, vec![]).into_boxed_slice();
+                    remap[sid] = nfa.add_union(alternates)?;
+                }
+                CState::UnionReverse { ref mut alternates } => {
+                    let mut alternates =
+                        mem::replace(alternates, vec![]).into_boxed_slice();
+                    alternates.reverse();
+                    remap[sid] = nfa.add_union(alternates)?;
+                }
+                CState::Match { start_id, .. } => {
+                    remap[sid] = nfa.add_match()?;
+                    nfa.finish_pattern(start_id)?;
+                }
+            }
+        }
+        for &(empty_id, mut empty_next) in empties.iter() {
+            // empty states can point to other empty states, forming a chain.
+            // So we must follow the chain until the end, which must end at
+            // a non-empty state, and therefore, a state that is correctly
+            // remapped. We are guaranteed to terminate because our compiler
+            // never builds a loop among only empty states.
+            while let CState::Empty { next } = bstates[empty_next] {
+                empty_next = next;
+            }
+            remap[empty_id] = remap[empty_next];
+        }
+        nfa.set_start_anchored(start_anchored);
+        nfa.set_start_unanchored(start_unanchored);
+        nfa.remap(&remap);
+        trace!(
+            "final NFA (reverse? {:?}) compilation complete, \
+             final NFA size: {} states, {} bytes on heap",
+            self.config.get_reverse(),
+            nfa.states().len(),
+            nfa.memory_usage(),
+        );
+        Ok(())
+    }
+
+    fn c(&self, expr: &Hir) -> Result<ThompsonRef, Error> {
+        match *expr.kind() {
+            HirKind::Empty => self.c_empty(),
+            HirKind::Literal(Literal::Unicode(ch)) => self.c_char(ch),
+            HirKind::Literal(Literal::Byte(b)) => self.c_range(b, b),
+            HirKind::Class(Class::Bytes(ref c)) => self.c_byte_class(c),
+            HirKind::Class(Class::Unicode(ref c)) => self.c_unicode_class(c),
+            HirKind::Anchor(ref anchor) => self.c_anchor(anchor),
+            HirKind::WordBoundary(ref wb) => self.c_word_boundary(wb),
+            HirKind::Repetition(ref rep) => self.c_repetition(rep),
+            HirKind::Group(ref group) => self.c_group(&group.kind, &group.hir),
+            HirKind::Concat(ref es) => {
+                self.c_concat(es.iter().map(|e| self.c(e)))
+            }
+            HirKind::Alternation(ref es) => {
+                self.c_alternation(es.iter().map(|e| self.c(e)))
+            }
+        }
+    }
+
+    fn c_concat<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
+    where
+        I: DoubleEndedIterator<Item = Result<ThompsonRef, Error>>,
+    {
+        let first = if self.is_reverse() { it.next_back() } else { it.next() };
+        let ThompsonRef { start, mut end } = match first {
+            Some(result) => result?,
+            None => return self.c_empty(),
+        };
+        loop {
+            let next =
+                if self.is_reverse() { it.next_back() } else { it.next() };
+            let compiled = match next {
+                Some(result) => result?,
+                None => break,
+            };
+            self.patch(end, compiled.start)?;
+            end = compiled.end;
+        }
+        Ok(ThompsonRef { start, end })
+    }
+
+    fn c_alternation<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
+    where
+        I: Iterator<Item = Result<ThompsonRef, Error>>,
+    {
+        let first = it.next().expect("alternations must be non-empty")?;
+        let second = match it.next() {
+            None => return Ok(first),
+            Some(result) => result?,
+        };
+
+        let union = self.add_union()?;
+        let end = self.add_empty()?;
+        self.patch(union, first.start)?;
+        self.patch(first.end, end)?;
+        self.patch(union, second.start)?;
+        self.patch(second.end, end)?;
+        for result in it {
+            let compiled = result?;
+            self.patch(union, compiled.start)?;
+            self.patch(compiled.end, end)?;
+        }
+        Ok(ThompsonRef { start: union, end })
+    }
+
+    fn c_group(
+        &self,
+        kind: &hir::GroupKind,
+        expr: &Hir,
+    ) -> Result<ThompsonRef, Error> {
+        if !self.config.get_captures() {
+            return self.c(expr);
+        }
+        let (capi, name) = match *kind {
+            hir::GroupKind::NonCapturing => return self.c(expr),
+            hir::GroupKind::CaptureIndex(index) => (index, None),
+            hir::GroupKind::CaptureName { ref name, index } => {
+                (index, Some(Arc::from(&**name)))
+            }
+        };
+
+        let start = self.add_capture_start(capi, name)?;
+        let inner = self.c(expr)?;
+        let end = self.add_capture_end(capi)?;
+
+        self.patch(start, inner.start)?;
+        self.patch(inner.end, end)?;
+        Ok(ThompsonRef { start, end })
+    }
+
+    fn c_repetition(
+        &self,
+        rep: &hir::Repetition,
+    ) -> Result<ThompsonRef, Error> {
+        match rep.kind {
+            hir::RepetitionKind::ZeroOrOne => {
+                self.c_zero_or_one(&rep.hir, rep.greedy)
+            }
+            hir::RepetitionKind::ZeroOrMore => {
+                self.c_at_least(&rep.hir, rep.greedy, 0)
+            }
+            hir::RepetitionKind::OneOrMore => {
+                self.c_at_least(&rep.hir, rep.greedy, 1)
+            }
+            hir::RepetitionKind::Range(ref rng) => match *rng {
+                hir::RepetitionRange::Exactly(count) => {
+                    self.c_exactly(&rep.hir, count)
+                }
+                hir::RepetitionRange::AtLeast(m) => {
+                    self.c_at_least(&rep.hir, rep.greedy, m)
+                }
+                hir::RepetitionRange::Bounded(min, max) => {
+                    self.c_bounded(&rep.hir, rep.greedy, min, max)
+                }
+            },
+        }
+    }
+
+    fn c_bounded(
+        &self,
+        expr: &Hir,
+        greedy: bool,
+        min: u32,
+        max: u32,
+    ) -> Result<ThompsonRef, Error> {
+        let prefix = self.c_exactly(expr, min)?;
+        if min == max {
+            return Ok(prefix);
+        }
+
+        // It is tempting here to compile the rest here as a concatenation
+        // of zero-or-one matches. i.e., for `a{2,5}`, compile it as if it
+        // were `aaa?a?a?`. The problem here is that it leads to this program:
+        //
+        //     >000000: 61 => 01
+        //      000001: 61 => 02
+        //      000002: union(03, 04)
+        //      000003: 61 => 04
+        //      000004: union(05, 06)
+        //      000005: 61 => 06
+        //      000006: union(07, 08)
+        //      000007: 61 => 08
+        //      000008: MATCH
+        //
+        // And effectively, once you hit state 2, the epsilon closure will
+        // include states 3, 5, 6, 7 and 8, which is quite a bit. It is better
+        // to instead compile it like so:
+        //
+        //     >000000: 61 => 01
+        //      000001: 61 => 02
+        //      000002: union(03, 08)
+        //      000003: 61 => 04
+        //      000004: union(05, 08)
+        //      000005: 61 => 06
+        //      000006: union(07, 08)
+        //      000007: 61 => 08
+        //      000008: MATCH
+        //
+        // So that the epsilon closure of state 2 is now just 3 and 8.
+        let empty = self.add_empty()?;
+        let mut prev_end = prefix.end;
+        for _ in min..max {
+            let union = if greedy {
+                self.add_union()
+            } else {
+                self.add_reverse_union()
+            }?;
+            let compiled = self.c(expr)?;
+            self.patch(prev_end, union)?;
+            self.patch(union, compiled.start)?;
+            self.patch(union, empty)?;
+            prev_end = compiled.end;
+        }
+        self.patch(prev_end, empty)?;
+        Ok(ThompsonRef { start: prefix.start, end: empty })
+    }
+
+    fn c_at_least(
+        &self,
+        expr: &Hir,
+        greedy: bool,
+        n: u32,
+    ) -> Result<ThompsonRef, Error> {
+        if n == 0 {
+            // When the expression cannot match the empty string, then we
+            // can get away with something much simpler: just one 'alt'
+            // instruction that optionally repeats itself. But if the expr
+            // can match the empty string... see below.
+            if !expr.is_match_empty() {
+                let union = if greedy {
+                    self.add_union()
+                } else {
+                    self.add_reverse_union()
+                }?;
+                let compiled = self.c(expr)?;
+                self.patch(union, compiled.start)?;
+                self.patch(compiled.end, union)?;
+                return Ok(ThompsonRef { start: union, end: union });
+            }
+
+            // What's going on here? Shouldn't x* be simpler than this? It
+            // turns out that when implementing leftmost-first (Perl-like)
+            // match semantics, x* results in an incorrect preference order
+            // when computing the transitive closure of states if and only if
+            // 'x' can match the empty string. So instead, we compile x* as
+            // (x+)?, which preserves the correct preference order.
+            //
+            // See: https://github.com/rust-lang/regex/issues/779
+            let compiled = self.c(expr)?;
+            let plus = if greedy {
+                self.add_union()
+            } else {
+                self.add_reverse_union()
+            }?;
+            self.patch(compiled.end, plus)?;
+            self.patch(plus, compiled.start)?;
+
+            let question = if greedy {
+                self.add_union()
+            } else {
+                self.add_reverse_union()
+            }?;
+            let empty = self.add_empty()?;
+            self.patch(question, compiled.start)?;
+            self.patch(question, empty)?;
+            self.patch(plus, empty)?;
+            Ok(ThompsonRef { start: question, end: empty })
+        } else if n == 1 {
+            let compiled = self.c(expr)?;
+            let union = if greedy {
+                self.add_union()
+            } else {
+                self.add_reverse_union()
+            }?;
+            self.patch(compiled.end, union)?;
+            self.patch(union, compiled.start)?;
+            Ok(ThompsonRef { start: compiled.start, end: union })
+        } else {
+            let prefix = self.c_exactly(expr, n - 1)?;
+            let last = self.c(expr)?;
+            let union = if greedy {
+                self.add_union()
+            } else {
+                self.add_reverse_union()
+            }?;
+            self.patch(prefix.end, last.start)?;
+            self.patch(last.end, union)?;
+            self.patch(union, last.start)?;
+            Ok(ThompsonRef { start: prefix.start, end: union })
+        }
+    }
+
+    fn c_zero_or_one(
+        &self,
+        expr: &Hir,
+        greedy: bool,
+    ) -> Result<ThompsonRef, Error> {
+        let union =
+            if greedy { self.add_union() } else { self.add_reverse_union() }?;
+        let compiled = self.c(expr)?;
+        let empty = self.add_empty()?;
+        self.patch(union, compiled.start)?;
+        self.patch(union, empty)?;
+        self.patch(compiled.end, empty)?;
+        Ok(ThompsonRef { start: union, end: empty })
+    }
+
+    fn c_exactly(&self, expr: &Hir, n: u32) -> Result<ThompsonRef, Error> {
+        let it = (0..n).map(|_| self.c(expr));
+        self.c_concat(it)
+    }
+
+    fn c_byte_class(
+        &self,
+        cls: &hir::ClassBytes,
+    ) -> Result<ThompsonRef, Error> {
+        let end = self.add_empty()?;
+        let mut trans = Vec::with_capacity(cls.ranges().len());
+        for r in cls.iter() {
+            trans.push(Transition {
+                start: r.start(),
+                end: r.end(),
+                next: end,
+            });
+        }
+        Ok(ThompsonRef { start: self.add_sparse(trans)?, end })
+    }
+
+    fn c_unicode_class(
+        &self,
+        cls: &hir::ClassUnicode,
+    ) -> Result<ThompsonRef, Error> {
+        // If all we have are ASCII ranges wrapped in a Unicode package, then
+        // there is zero reason to bring out the big guns. We can fit all ASCII
+        // ranges within a single sparse state.
+        if cls.is_all_ascii() {
+            let end = self.add_empty()?;
+            let mut trans = Vec::with_capacity(cls.ranges().len());
+            for r in cls.iter() {
+                assert!(r.start() <= '\x7F');
+                assert!(r.end() <= '\x7F');
+                trans.push(Transition {
+                    start: r.start() as u8,
+                    end: r.end() as u8,
+                    next: end,
+                });
+            }
+            Ok(ThompsonRef { start: self.add_sparse(trans)?, end })
+        } else if self.is_reverse() {
+            if !self.config.get_shrink() {
+                // When we don't want to spend the extra time shrinking, we
+                // compile the UTF-8 automaton in reverse using something like
+                // the "naive" approach, but will attempt to re-use common
+                // suffixes.
+                self.c_unicode_class_reverse_with_suffix(cls)
+            } else {
+                // When we want to shrink our NFA for reverse UTF-8 automata,
+                // we cannot feed UTF-8 sequences directly to the UTF-8
+                // compiler, since the UTF-8 compiler requires all sequences
+                // to be lexicographically sorted. Instead, we organize our
+                // sequences into a range trie, which can then output our
+                // sequences in the correct order. Unfortunately, building the
+                // range trie is fairly expensive (but not nearly as expensive
+                // as building a DFA). Hence the reason why the 'shrink' option
+                // exists, so that this path can be toggled off. For example,
+                // we might want to turn this off if we know we won't be
+                // compiling a DFA.
+                let mut trie = self.trie_state.borrow_mut();
+                trie.clear();
+
+                for rng in cls.iter() {
+                    for mut seq in Utf8Sequences::new(rng.start(), rng.end()) {
+                        seq.reverse();
+                        trie.insert(seq.as_slice());
+                    }
+                }
+                let mut utf8_state = self.utf8_state.borrow_mut();
+                let mut utf8c = Utf8Compiler::new(self, &mut *utf8_state)?;
+                trie.iter(|seq| {
+                    utf8c.add(&seq)?;
+                    Ok(())
+                })?;
+                utf8c.finish()
+            }
+        } else {
+            // In the forward direction, we always shrink our UTF-8 automata
+            // because we can stream it right into the UTF-8 compiler. There
+            // is almost no downside (in either memory or time) to using this
+            // approach.
+            let mut utf8_state = self.utf8_state.borrow_mut();
+            let mut utf8c = Utf8Compiler::new(self, &mut *utf8_state)?;
+            for rng in cls.iter() {
+                for seq in Utf8Sequences::new(rng.start(), rng.end()) {
+                    utf8c.add(seq.as_slice())?;
+                }
+            }
+            utf8c.finish()
+        }
+
+        // For reference, the code below is the "naive" version of compiling a
+        // UTF-8 automaton. It is deliciously simple (and works for both the
+        // forward and reverse cases), but will unfortunately produce very
+        // large NFAs. When compiling a forward automaton, the size difference
+        // can sometimes be an order of magnitude. For example, the '\w' regex
+        // will generate about ~3000 NFA states using the naive approach below,
+        // but only 283 states when using the approach above. This is because
+        // the approach above actually compiles a *minimal* (or near minimal,
+        // because of the bounded hashmap for reusing equivalent states) UTF-8
+        // automaton.
+        //
+        // The code below is kept as a reference point in order to make it
+        // easier to understand the higher level goal here. Although, it will
+        // almost certainly bit-rot, so keep that in mind.
+        /*
+        let it = cls
+            .iter()
+            .flat_map(|rng| Utf8Sequences::new(rng.start(), rng.end()))
+            .map(|seq| {
+                let it = seq
+                    .as_slice()
+                    .iter()
+                    .map(|rng| self.c_range(rng.start, rng.end));
+                self.c_concat(it)
+            });
+        self.c_alternation(it)
+        */
+    }
+
+    fn c_unicode_class_reverse_with_suffix(
+        &self,
+        cls: &hir::ClassUnicode,
+    ) -> Result<ThompsonRef, Error> {
+        // N.B. It would likely be better to cache common *prefixes* in the
+        // reverse direction, but it's not quite clear how to do that. The
+        // advantage of caching suffixes is that it does give us a win, and
+        // has a very small additional overhead.
+        let mut cache = self.utf8_suffix.borrow_mut();
+        cache.clear();
+
+        let union = self.add_union()?;
+        let alt_end = self.add_empty()?;
+        for urng in cls.iter() {
+            for seq in Utf8Sequences::new(urng.start(), urng.end()) {
+                let mut end = alt_end;
+                for brng in seq.as_slice() {
+                    let key = Utf8SuffixKey {
+                        from: end,
+                        start: brng.start,
+                        end: brng.end,
+                    };
+                    let hash = cache.hash(&key);
+                    if let Some(id) = cache.get(&key, hash) {
+                        end = id;
+                        continue;
+                    }
+
+                    let compiled = self.c_range(brng.start, brng.end)?;
+                    self.patch(compiled.end, end)?;
+                    end = compiled.start;
+                    cache.set(key, hash, end);
+                }
+                self.patch(union, end)?;
+            }
+        }
+        Ok(ThompsonRef { start: union, end: alt_end })
+    }
+
+    fn c_anchor(&self, anchor: &Anchor) -> Result<ThompsonRef, Error> {
+        let look = match *anchor {
+            Anchor::StartLine => Look::StartLine,
+            Anchor::EndLine => Look::EndLine,
+            Anchor::StartText => Look::StartText,
+            Anchor::EndText => Look::EndText,
+        };
+        let id = self.add_look(look)?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    fn c_word_boundary(
+        &self,
+        wb: &WordBoundary,
+    ) -> Result<ThompsonRef, Error> {
+        let look = match *wb {
+            WordBoundary::Unicode => Look::WordBoundaryUnicode,
+            WordBoundary::UnicodeNegate => Look::WordBoundaryUnicodeNegate,
+            WordBoundary::Ascii => Look::WordBoundaryAscii,
+            WordBoundary::AsciiNegate => Look::WordBoundaryAsciiNegate,
+        };
+        let id = self.add_look(look)?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    fn c_char(&self, ch: char) -> Result<ThompsonRef, Error> {
+        let mut buf = [0; 4];
+        let it = ch
+            .encode_utf8(&mut buf)
+            .as_bytes()
+            .iter()
+            .map(|&b| self.c_range(b, b));
+        self.c_concat(it)
+    }
+
+    fn c_range(&self, start: u8, end: u8) -> Result<ThompsonRef, Error> {
+        let id = self.add_range(start, end)?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    fn c_empty(&self) -> Result<ThompsonRef, Error> {
+        let id = self.add_empty()?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    fn c_unanchored_prefix_valid_utf8(&self) -> Result<ThompsonRef, Error> {
+        self.c_at_least(&Hir::any(false), false, 0)
+    }
+
+    fn c_unanchored_prefix_invalid_utf8(&self) -> Result<ThompsonRef, Error> {
+        self.c_at_least(&Hir::any(true), false, 0)
+    }
+
+    fn patch(&self, from: StateID, to: StateID) -> Result<(), Error> {
+        let old_memory_cstates = self.memory_cstates.get();
+        match self.states.borrow_mut()[from] {
+            CState::Empty { ref mut next } => {
+                *next = to;
+            }
+            CState::Range { ref mut range } => {
+                range.next = to;
+            }
+            CState::Sparse { .. } => {
+                panic!("cannot patch from a sparse NFA state")
+            }
+            CState::Look { ref mut next, .. } => {
+                *next = to;
+            }
+            CState::Union { ref mut alternates } => {
+                alternates.push(to);
+                self.memory_cstates
+                    .set(old_memory_cstates + mem::size_of::<StateID>());
+            }
+            CState::UnionReverse { ref mut alternates } => {
+                alternates.push(to);
+                self.memory_cstates
+                    .set(old_memory_cstates + mem::size_of::<StateID>());
+            }
+            CState::CaptureStart { ref mut next, .. } => {
+                *next = to;
+            }
+            CState::CaptureEnd { ref mut next, .. } => {
+                *next = to;
+            }
+            CState::Match { .. } => {}
+        }
+        if old_memory_cstates != self.memory_cstates.get() {
+            self.check_nfa_size_limit()?;
+        }
+        Ok(())
+    }
+
+    fn add_empty(&self) -> Result<StateID, Error> {
+        self.add_state(CState::Empty { next: StateID::ZERO })
+    }
+
+    fn add_capture_start(
+        &self,
+        capture_index: u32,
+        name: Option<Arc<str>>,
+    ) -> Result<StateID, Error> {
+        self.add_state(CState::CaptureStart {
+            next: StateID::ZERO,
+            capture_index,
+            name,
+        })
+    }
+
+    fn add_capture_end(&self, capture_index: u32) -> Result<StateID, Error> {
+        self.add_state(CState::CaptureEnd {
+            next: StateID::ZERO,
+            capture_index,
+        })
+    }
+
+    fn add_range(&self, start: u8, end: u8) -> Result<StateID, Error> {
+        let trans = Transition { start, end, next: StateID::ZERO };
+        self.add_state(CState::Range { range: trans })
+    }
+
+    fn add_sparse(&self, ranges: Vec<Transition>) -> Result<StateID, Error> {
+        if ranges.len() == 1 {
+            self.add_state(CState::Range { range: ranges[0] })
+        } else {
+            self.add_state(CState::Sparse { ranges })
+        }
+    }
+
+    fn add_look(&self, mut look: Look) -> Result<StateID, Error> {
+        if self.is_reverse() {
+            look = look.reversed();
+        }
+        self.add_state(CState::Look { look, next: StateID::ZERO })
+    }
+
+    fn add_union(&self) -> Result<StateID, Error> {
+        self.add_state(CState::Union { alternates: vec![] })
+    }
+
+    fn add_reverse_union(&self) -> Result<StateID, Error> {
+        self.add_state(CState::UnionReverse { alternates: vec![] })
+    }
+
+    fn add_match(
+        &self,
+        pattern_id: PatternID,
+        start_id: StateID,
+    ) -> Result<StateID, Error> {
+        self.add_state(CState::Match { pattern_id, start_id })
+    }
+
+    fn add_state(&self, state: CState) -> Result<StateID, Error> {
+        let mut states = self.states.borrow_mut();
+        let id = StateID::new(states.len())
+            .map_err(|_| Error::too_many_states(states.len()))?;
+        self.memory_cstates
+            .set(self.memory_cstates.get() + state.memory_usage());
+        states.push(state);
+        // If we don't explicitly drop this, then 'nfa_memory_usage' will also
+        // try to borrow it when we check the size limit and hit an error.
+        drop(states);
+        self.check_nfa_size_limit()?;
+        Ok(id)
+    }
+
+    fn is_reverse(&self) -> bool {
+        self.config.get_reverse()
+    }
+
+    /// If an NFA size limit was set, this checks that the NFA compiled so far
+    /// fits within that limit. If so, then nothing is returned. Otherwise, an
+    /// error is returned.
+    ///
+    /// This should be called after increasing the heap usage of the
+    /// intermediate NFA.
+    ///
+    /// Note that this borrows 'self.states', so callers should ensure there is
+    /// no mutable borrow of it outstanding.
+    fn check_nfa_size_limit(&self) -> Result<(), Error> {
+        if let Some(limit) = self.config.get_nfa_size_limit() {
+            if self.nfa_memory_usage() > limit {
+                return Err(Error::exceeded_size_limit(limit));
+            }
+        }
+        Ok(())
+    }
+
+    /// Returns the heap memory usage, in bytes, of the NFA compiled so far.
+    ///
+    /// Note that this is an approximation of how big the final NFA will be.
+    /// In practice, the final NFA will likely be a bit smaller since it uses
+    /// things like `Box<[T]>` instead of `Vec<T>`.
+    fn nfa_memory_usage(&self) -> usize {
+        self.states.borrow().len() * mem::size_of::<CState>()
+            + self.memory_cstates.get()
+    }
+}
+
+impl CState {
+    fn memory_usage(&self) -> usize {
+        match *self {
+            CState::Empty { .. }
+            | CState::Range { .. }
+            | CState::Look { .. }
+            | CState::CaptureStart { .. }
+            | CState::CaptureEnd { .. }
+            | CState::Match { .. } => 0,
+            CState::Sparse { ref ranges } => {
+                ranges.len() * mem::size_of::<Transition>()
+            }
+            CState::Union { ref alternates } => {
+                alternates.len() * mem::size_of::<StateID>()
+            }
+            CState::UnionReverse { ref alternates } => {
+                alternates.len() * mem::size_of::<StateID>()
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Utf8Compiler<'a> {
+    nfac: &'a Compiler,
+    state: &'a mut Utf8State,
+    target: StateID,
+}
+
+#[derive(Clone, Debug)]
+struct Utf8State {
+    compiled: Utf8BoundedMap,
+    uncompiled: Vec<Utf8Node>,
+}
+
+#[derive(Clone, Debug)]
+struct Utf8Node {
+    trans: Vec<Transition>,
+    last: Option<Utf8LastTransition>,
+}
+
+#[derive(Clone, Debug)]
+struct Utf8LastTransition {
+    start: u8,
+    end: u8,
+}
+
+impl Utf8State {
+    fn new() -> Utf8State {
+        Utf8State { compiled: Utf8BoundedMap::new(10_000), uncompiled: vec![] }
+    }
+
+    fn clear(&mut self) {
+        self.compiled.clear();
+        self.uncompiled.clear();
+    }
+}
+
+impl<'a> Utf8Compiler<'a> {
+    fn new(
+        nfac: &'a Compiler,
+        state: &'a mut Utf8State,
+    ) -> Result<Utf8Compiler<'a>, Error> {
+        let target = nfac.add_empty()?;
+        state.clear();
+        let mut utf8c = Utf8Compiler { nfac, state, target };
+        utf8c.add_empty();
+        Ok(utf8c)
+    }
+
+    fn finish(&mut self) -> Result<ThompsonRef, Error> {
+        self.compile_from(0)?;
+        let node = self.pop_root();
+        let start = self.compile(node)?;
+        Ok(ThompsonRef { start, end: self.target })
+    }
+
+    fn add(&mut self, ranges: &[Utf8Range]) -> Result<(), Error> {
+        let prefix_len = ranges
+            .iter()
+            .zip(&self.state.uncompiled)
+            .take_while(|&(range, node)| {
+                node.last.as_ref().map_or(false, |t| {
+                    (t.start, t.end) == (range.start, range.end)
+                })
+            })
+            .count();
+        assert!(prefix_len < ranges.len());
+        self.compile_from(prefix_len)?;
+        self.add_suffix(&ranges[prefix_len..]);
+        Ok(())
+    }
+
+    fn compile_from(&mut self, from: usize) -> Result<(), Error> {
+        let mut next = self.target;
+        while from + 1 < self.state.uncompiled.len() {
+            let node = self.pop_freeze(next);
+            next = self.compile(node)?;
+        }
+        self.top_last_freeze(next);
+        Ok(())
+    }
+
+    fn compile(&mut self, node: Vec<Transition>) -> Result<StateID, Error> {
+        let hash = self.state.compiled.hash(&node);
+        if let Some(id) = self.state.compiled.get(&node, hash) {
+            return Ok(id);
+        }
+        let id = self.nfac.add_sparse(node.clone())?;
+        self.state.compiled.set(node, hash, id);
+        Ok(id)
+    }
+
+    fn add_suffix(&mut self, ranges: &[Utf8Range]) {
+        assert!(!ranges.is_empty());
+        let last = self
+            .state
+            .uncompiled
+            .len()
+            .checked_sub(1)
+            .expect("non-empty nodes");
+        assert!(self.state.uncompiled[last].last.is_none());
+        self.state.uncompiled[last].last = Some(Utf8LastTransition {
+            start: ranges[0].start,
+            end: ranges[0].end,
+        });
+        for r in &ranges[1..] {
+            self.state.uncompiled.push(Utf8Node {
+                trans: vec![],
+                last: Some(Utf8LastTransition { start: r.start, end: r.end }),
+            });
+        }
+    }
+
+    fn add_empty(&mut self) {
+        self.state.uncompiled.push(Utf8Node { trans: vec![], last: None });
+    }
+
+    fn pop_freeze(&mut self, next: StateID) -> Vec<Transition> {
+        let mut uncompiled = self.state.uncompiled.pop().unwrap();
+        uncompiled.set_last_transition(next);
+        uncompiled.trans
+    }
+
+    fn pop_root(&mut self) -> Vec<Transition> {
+        assert_eq!(self.state.uncompiled.len(), 1);
+        assert!(self.state.uncompiled[0].last.is_none());
+        self.state.uncompiled.pop().expect("non-empty nodes").trans
+    }
+
+    fn top_last_freeze(&mut self, next: StateID) {
+        let last = self
+            .state
+            .uncompiled
+            .len()
+            .checked_sub(1)
+            .expect("non-empty nodes");
+        self.state.uncompiled[last].set_last_transition(next);
+    }
+}
+
+impl Utf8Node {
+    fn set_last_transition(&mut self, next: StateID) {
+        if let Some(last) = self.last.take() {
+            self.trans.push(Transition {
+                start: last.start,
+                end: last.end,
+                next,
+            });
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::vec::Vec;
+
+    use super::{
+        Builder, Config, PatternID, SparseTransitions, State, StateID,
+        Transition, NFA,
+    };
+
+    fn build(pattern: &str) -> NFA {
+        Builder::new()
+            .configure(Config::new().captures(false).unanchored_prefix(false))
+            .build(pattern)
+            .unwrap()
+    }
+
+    fn pid(id: usize) -> PatternID {
+        PatternID::new(id).unwrap()
+    }
+
+    fn sid(id: usize) -> StateID {
+        StateID::new(id).unwrap()
+    }
+
+    fn s_byte(byte: u8, next: usize) -> State {
+        let next = sid(next);
+        let trans = Transition { start: byte, end: byte, next };
+        State::Range { range: trans }
+    }
+
+    fn s_range(start: u8, end: u8, next: usize) -> State {
+        let next = sid(next);
+        let trans = Transition { start, end, next };
+        State::Range { range: trans }
+    }
+
+    fn s_sparse(ranges: &[(u8, u8, usize)]) -> State {
+        let ranges = ranges
+            .iter()
+            .map(|&(start, end, next)| Transition {
+                start,
+                end,
+                next: sid(next),
+            })
+            .collect();
+        State::Sparse(SparseTransitions { ranges })
+    }
+
+    fn s_union(alts: &[usize]) -> State {
+        State::Union {
+            alternates: alts
+                .iter()
+                .map(|&id| sid(id))
+                .collect::<Vec<StateID>>()
+                .into_boxed_slice(),
+        }
+    }
+
+    fn s_match(id: usize) -> State {
+        State::Match { id: pid(id) }
+    }
+
+    // Test that building an unanchored NFA has an appropriate `(?s:.)*?`
+    // prefix.
+    #[test]
+    fn compile_unanchored_prefix() {
+        // When the machine can only match valid UTF-8.
+        let nfa = Builder::new()
+            .configure(Config::new().captures(false))
+            .build(r"a")
+            .unwrap();
+        // There should be many states since the `.` in `(?s:.)*?` matches any
+        // Unicode scalar value.
+        assert_eq!(11, nfa.len());
+        assert_eq!(nfa.states[10], s_match(0));
+        assert_eq!(nfa.states[9], s_byte(b'a', 10));
+
+        // When the machine can match through invalid UTF-8.
+        let nfa = Builder::new()
+            .configure(Config::new().captures(false).utf8(false))
+            .build(r"a")
+            .unwrap();
+        assert_eq!(
+            nfa.states,
+            &[
+                s_union(&[2, 1]),
+                s_range(0, 255, 0),
+                s_byte(b'a', 3),
+                s_match(0),
+            ]
+        );
+    }
+
+    #[test]
+    fn compile_empty() {
+        assert_eq!(build("").states, &[s_match(0),]);
+    }
+
+    #[test]
+    fn compile_literal() {
+        assert_eq!(build("a").states, &[s_byte(b'a', 1), s_match(0),]);
+        assert_eq!(
+            build("ab").states,
+            &[s_byte(b'a', 1), s_byte(b'b', 2), s_match(0),]
+        );
+        assert_eq!(
+            build("☃").states,
+            &[s_byte(0xE2, 1), s_byte(0x98, 2), s_byte(0x83, 3), s_match(0)]
+        );
+
+        // Check that non-UTF-8 literals work.
+        let nfa = Builder::new()
+            .configure(
+                Config::new()
+                    .captures(false)
+                    .utf8(false)
+                    .unanchored_prefix(false),
+            )
+            .syntax(crate::SyntaxConfig::new().utf8(false))
+            .build(r"(?-u)\xFF")
+            .unwrap();
+        assert_eq!(nfa.states, &[s_byte(b'\xFF', 1), s_match(0),]);
+    }
+
+    #[test]
+    fn compile_class() {
+        assert_eq!(
+            build(r"[a-z]").states,
+            &[s_range(b'a', b'z', 1), s_match(0),]
+        );
+        assert_eq!(
+            build(r"[x-za-c]").states,
+            &[s_sparse(&[(b'a', b'c', 1), (b'x', b'z', 1)]), s_match(0)]
+        );
+        assert_eq!(
+            build(r"[\u03B1-\u03B4]").states,
+            &[s_range(0xB1, 0xB4, 2), s_byte(0xCE, 0), s_match(0)]
+        );
+        assert_eq!(
+            build(r"[\u03B1-\u03B4\u{1F919}-\u{1F91E}]").states,
+            &[
+                s_range(0xB1, 0xB4, 5),
+                s_range(0x99, 0x9E, 5),
+                s_byte(0xA4, 1),
+                s_byte(0x9F, 2),
+                s_sparse(&[(0xCE, 0xCE, 0), (0xF0, 0xF0, 3)]),
+                s_match(0),
+            ]
+        );
+        assert_eq!(
+            build(r"[a-z☃]").states,
+            &[
+                s_byte(0x83, 3),
+                s_byte(0x98, 0),
+                s_sparse(&[(b'a', b'z', 3), (0xE2, 0xE2, 1)]),
+                s_match(0),
+            ]
+        );
+    }
+
+    #[test]
+    fn compile_repetition() {
+        assert_eq!(
+            build(r"a?").states,
+            &[s_union(&[1, 2]), s_byte(b'a', 2), s_match(0),]
+        );
+        assert_eq!(
+            build(r"a??").states,
+            &[s_union(&[2, 1]), s_byte(b'a', 2), s_match(0),]
+        );
+    }
+
+    #[test]
+    fn compile_group() {
+        assert_eq!(
+            build(r"ab+").states,
+            &[s_byte(b'a', 1), s_byte(b'b', 2), s_union(&[1, 3]), s_match(0)]
+        );
+        assert_eq!(
+            build(r"(ab)").states,
+            &[s_byte(b'a', 1), s_byte(b'b', 2), s_match(0)]
+        );
+        assert_eq!(
+            build(r"(ab)+").states,
+            &[s_byte(b'a', 1), s_byte(b'b', 2), s_union(&[0, 3]), s_match(0)]
+        );
+    }
+
+    #[test]
+    fn compile_alternation() {
+        assert_eq!(
+            build(r"a|b").states,
+            &[s_byte(b'a', 3), s_byte(b'b', 3), s_union(&[0, 1]), s_match(0)]
+        );
+        assert_eq!(
+            build(r"|b").states,
+            &[s_byte(b'b', 2), s_union(&[2, 0]), s_match(0)]
+        );
+        assert_eq!(
+            build(r"a|").states,
+            &[s_byte(b'a', 2), s_union(&[0, 2]), s_match(0)]
+        );
+    }
+
+    #[test]
+    fn many_start_pattern() {
+        let nfa = Builder::new()
+            .configure(Config::new().captures(false).unanchored_prefix(false))
+            .build_many(&["a", "b"])
+            .unwrap();
+        assert_eq!(
+            nfa.states,
+            &[
+                s_byte(b'a', 1),
+                s_match(0),
+                s_byte(b'b', 3),
+                s_match(1),
+                s_union(&[0, 2]),
+            ]
+        );
+        assert_eq!(nfa.start_anchored().as_usize(), 4);
+        assert_eq!(nfa.start_unanchored().as_usize(), 4);
+        // Test that the start states for each individual pattern are correct.
+        assert_eq!(nfa.start_pattern(pid(0)), sid(0));
+        assert_eq!(nfa.start_pattern(pid(1)), sid(2));
+    }
+}
diff --git a/vendor/regex-automata/src/nfa/thompson/error.rs b/vendor/regex-automata/src/nfa/thompson/error.rs
new file mode 100644
index 000000000..52f02e888
--- /dev/null
+++ b/vendor/regex-automata/src/nfa/thompson/error.rs
@@ -0,0 +1,145 @@
+use crate::util::id::{PatternID, StateID};
+
+/// An error that can occured during the construction of a thompson NFA.
+///
+/// This error does not provide many introspection capabilities. There are
+/// generally only two things you can do with it:
+///
+/// * Obtain a human readable message via its `std::fmt::Display` impl.
+/// * Access an underlying [`regex_syntax::Error`] type from its `source`
+/// method via the `std::error::Error` trait. This error only occurs when using
+/// convenience routines for building an NFA directly from a pattern string.
+///
+/// Otherwise, errors typically occur when a limit has been breeched. For
+/// example, if the total heap usage of the compiled NFA exceeds the limit
+/// set by [`Config::nfa_size_limit`](crate::nfa::thompson::Config), then
+/// building the NFA will fail.
+#[derive(Clone, Debug)]
+pub struct Error {
+    kind: ErrorKind,
+}
+
+/// The kind of error that occurred during the construction of a thompson NFA.
+#[derive(Clone, Debug)]
+enum ErrorKind {
+    /// An error that occurred while parsing a regular expression. Note that
+    /// this error may be printed over multiple lines, and is generally
+    /// intended to be end user readable on its own.
+    Syntax(regex_syntax::Error),
+    /// An error that occurs if too many patterns were given to the NFA
+    /// compiler.
+    TooManyPatterns {
+        /// The number of patterns given, which exceeds the limit.
+        given: usize,
+        /// The limit on the number of patterns.
+        limit: usize,
+    },
+    /// An error that occurs if too states are produced while building an NFA.
+    TooManyStates {
+        /// The minimum number of states that are desired, which exceeds the
+        /// limit.
+        given: usize,
+        /// The limit on the number of states.
+        limit: usize,
+    },
+    /// An error that occurs when NFA compilation exceeds a configured heap
+    /// limit.
+    ExceededSizeLimit {
+        /// The configured limit, in bytes.
+        limit: usize,
+    },
+    /// An error that occurs when an invalid capture group index is added to
+    /// the NFA. An "invalid" index can be one that is too big (e.g., results
+    /// in an integer overflow) or one that is discontinuous from previous
+    /// capture group indices added.
+    InvalidCaptureIndex {
+        /// The invalid index that was given.
+        index: usize,
+    },
+    /// An error that occurs when an NFA contains a Unicode word boundary, but
+    /// where the crate was compiled without the necessary data for dealing
+    /// with Unicode word boundaries.
+    UnicodeWordUnavailable,
+}
+
+impl Error {
+    fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+
+    pub(crate) fn syntax(err: regex_syntax::Error) -> Error {
+        Error { kind: ErrorKind::Syntax(err) }
+    }
+
+    pub(crate) fn too_many_patterns(given: usize) -> Error {
+        let limit = PatternID::LIMIT;
+        Error { kind: ErrorKind::TooManyPatterns { given, limit } }
+    }
+
+    pub(crate) fn too_many_states(given: usize) -> Error {
+        let limit = StateID::LIMIT;
+        Error { kind: ErrorKind::TooManyStates { given, limit } }
+    }
+
+    pub(crate) fn exceeded_size_limit(limit: usize) -> Error {
+        Error { kind: ErrorKind::ExceededSizeLimit { limit } }
+    }
+
+    pub(crate) fn invalid_capture_index(index: usize) -> Error {
+        Error { kind: ErrorKind::InvalidCaptureIndex { index } }
+    }
+
+    pub(crate) fn unicode_word_unavailable() -> Error {
+        Error { kind: ErrorKind::UnicodeWordUnavailable }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for Error {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match self.kind() {
+            ErrorKind::Syntax(ref err) => Some(err),
+            ErrorKind::TooManyPatterns { .. } => None,
+            ErrorKind::TooManyStates { .. } => None,
+            ErrorKind::ExceededSizeLimit { .. } => None,
+            ErrorKind::InvalidCaptureIndex { .. } => None,
+            ErrorKind::UnicodeWordUnavailable => None,
+        }
+    }
+}
+
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self.kind() {
+            ErrorKind::Syntax(_) => write!(f, "error parsing regex"),
+            ErrorKind::TooManyPatterns { given, limit } => write!(
+                f,
+                "attemped to compile {} patterns, \
+                 which exceeds the limit of {}",
+                given, limit,
+            ),
+            ErrorKind::TooManyStates { given, limit } => write!(
+                f,
+                "attemped to compile {} NFA states, \
+                 which exceeds the limit of {}",
+                given, limit,
+            ),
+            ErrorKind::ExceededSizeLimit { limit } => write!(
+                f,
+                "heap usage during NFA compilation exceeded limit of {}",
+                limit,
+            ),
+            ErrorKind::InvalidCaptureIndex { index } => write!(
+                f,
+                "capture group index {} is invalid (too big or discontinuous)",
+                index,
+            ),
+            ErrorKind::UnicodeWordUnavailable => write!(
+                f,
+                "crate has been compiled without Unicode word boundary \
+                 support, but the NFA contains Unicode word boundary \
+                 assertions",
+            ),
+        }
+    }
+}
diff --git a/vendor/regex-automata/src/nfa/map.rs b/vendor/regex-automata/src/nfa/thompson/map.rs
index e636c0dd3..79ff63ca3 100644
--- a/vendor/regex-automata/src/nfa/map.rs
+++ b/vendor/regex-automata/src/nfa/thompson/map.rs
@@ -7,10 +7,10 @@
 // These maps are used in some fairly hot code when generating NFA states for
 // large Unicode character classes.
 //
-// Instead of exposing a rich hashmap entry API, we just permit the caller
-// to produce a hash of the key directly. The hash can then be reused for both
-// lookups and insertions at the cost of leaking things a bit. But these are
-// for internal use only, so it's fine.
+// Instead of exposing a rich hashmap entry API, we just permit the caller to
+// produce a hash of the key directly. The hash can then be reused for both
+// lookups and insertions at the cost of leaking abstraction a bit. But these
+// are for internal use only, so it's fine.
 //
 // The Utf8BoundedMap is used for Daciuk's algorithm for constructing a
 // (almost) minimal DFA for large Unicode character classes in linear time.
@@ -33,7 +33,9 @@
 // could make one generic map, but the machinery didn't seem worth it. They
 // are simple enough.
 
-use nfa::{StateID, Transition};
+use alloc::{vec, vec::Vec};
+
+use crate::{nfa::thompson::Transition, util::id::StateID};
 
 // Basic FNV-1a hash constants as described in:
 // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
@@ -57,7 +59,7 @@ const INIT: u64 = 14695981039346656037;
 /// Specifically, one could observe the difference with std's hashmap via
 /// something like the following benchmark:
 ///
-///   hyperfine "regex-automata-debug debug -acqr '\w{40} ecurB'"
+///   hyperfine "regex-cli debug nfa thompson --quiet --reverse '\w{90} ecurB'"
 ///
 /// But to observe that difference, you'd have to modify the code to use
 /// std's hashmap.
@@ -74,6 +76,9 @@ pub struct Utf8BoundedMap {
     /// The current version of this map. Only entries with matching versions
     /// are considered during lookups. If an entry is found with a mismatched
     /// version, then the map behaves as if the entry does not exist.
+    ///
+    /// This makes it possible to clear the map by simply incrementing the
+    /// version number instead of actually deallocating any storage.
     version: u16,
     /// The total number of entries this map can store.
     capacity: usize,
@@ -119,6 +124,9 @@ impl Utf8BoundedMap {
             self.map = vec![Utf8BoundedEntry::default(); self.capacity];
         } else {
             self.version = self.version.wrapping_add(1);
+            // If we loop back to version 0, then we forcefully clear the
+            // entire map. Otherwise, it might be possible to incorrectly
+            // match entries used to generate other NFAs.
             if self.version == 0 {
                 self.map = vec![Utf8BoundedEntry::default(); self.capacity];
             }
@@ -131,7 +139,7 @@ impl Utf8BoundedMap {
         for t in key {
             h = (h ^ (t.start as u64)).wrapping_mul(PRIME);
             h = (h ^ (t.end as u64)).wrapping_mul(PRIME);
-            h = (h ^ (t.next as u64)).wrapping_mul(PRIME);
+            h = (h ^ (t.next.as_usize() as u64)).wrapping_mul(PRIME);
         }
         (h as usize) % self.map.len()
     }
@@ -244,7 +252,7 @@ impl Utf8SuffixMap {
         const INIT: u64 = 14695981039346656037;
 
         let mut h = INIT;
-        h = (h ^ (key.from as u64)).wrapping_mul(PRIME);
+        h = (h ^ (key.from.as_usize() as u64)).wrapping_mul(PRIME);
         h = (h ^ (key.start as u64)).wrapping_mul(PRIME);
         h = (h ^ (key.end as u64)).wrapping_mul(PRIME);
         (h as usize) % self.map.len()
diff --git a/vendor/regex-automata/src/nfa/thompson/mod.rs b/vendor/regex-automata/src/nfa/thompson/mod.rs
new file mode 100644
index 000000000..88a438e8e
--- /dev/null
+++ b/vendor/regex-automata/src/nfa/thompson/mod.rs
@@ -0,0 +1,1555 @@
+use core::{convert::TryFrom, fmt, mem, ops::Range};
+
+use alloc::{boxed::Box, format, string::String, sync::Arc, vec, vec::Vec};
+
+use crate::util::{
+    alphabet::{self, ByteClassSet},
+    decode_last_utf8, decode_utf8,
+    id::{IteratorIDExt, PatternID, PatternIDIter, StateID},
+    is_word_byte, is_word_char_fwd, is_word_char_rev,
+};
+
+pub use self::{
+    compiler::{Builder, Config},
+    error::Error,
+};
+
+mod compiler;
+mod error;
+mod map;
+pub mod pikevm;
+mod range_trie;
+
+/// A map from capture group name to its corresponding capture index.
+///
+/// Since there are always two slots for each capture index, the pair of slots
+/// corresponding to the capture index for a pattern ID of 0 are indexed at
+/// `map["<name>"] * 2` and `map["<name>"] * 2 + 1`.
+///
+/// This type is actually wrapped inside a Vec indexed by pattern ID on the
+/// NFA, since multiple patterns may have the same capture group name.
+///
+/// Note that this is somewhat of a sub-optimal representation, since it
+/// requires a hashmap for each pattern. A better representation would be
+/// HashMap<(PatternID, Arc<str>), usize>, but this makes it difficult to look
+/// up a capture index by name without producing a `Arc<str>`, which requires
+/// an allocation. To fix this, I think we'd need to define our own unsized
+/// type or something?
+#[cfg(feature = "std")]
+type CaptureNameMap = std::collections::HashMap<Arc<str>, usize>;
+#[cfg(not(feature = "std"))]
+type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, usize>;
+
+// The NFA API below is not something I'm terribly proud of at the moment. In
+// particular, it supports both mutating the NFA and actually using the NFA to
+// perform a search. I think combining these two things muddies the waters a
+// bit too much.
+//
+// I think the issue is that I saw the compiler as the 'builder,' and where
+// the compiler had the ability to manipulate the internal state of the NFA.
+// However, one of my goals was to make it possible for others to build their
+// own NFAs in a way that is *not* couple to the regex-syntax crate.
+//
+// So I think really, there should be an NFA, a NFABuilder and then the
+// internal compiler which uses the NFABuilder API to build an NFA. Alas, at
+// the time of writing, I kind of ran out of steam.
+
+/// A fully compiled Thompson NFA.
+///
+/// The states of the NFA are indexed by state IDs, which are how transitions
+/// are expressed.
+#[derive(Clone)]
+pub struct NFA {
+    /// The state list. This list is guaranteed to be indexable by all starting
+    /// state IDs, and it is also guaranteed to contain at most one `Match`
+    /// state for each pattern compiled into this NFA. (A pattern may not have
+    /// a corresponding `Match` state if a `Match` state is impossible to
+    /// reach.)
+    states: Vec<State>,
+    /// The anchored starting state of this NFA.
+    start_anchored: StateID,
+    /// The unanchored starting state of this NFA.
+    start_unanchored: StateID,
+    /// The starting states for each individual pattern. Starting at any
+    /// of these states will result in only an anchored search for the
+    /// corresponding pattern. The vec is indexed by pattern ID. When the NFA
+    /// contains a single regex, then `start_pattern[0]` and `start_anchored`
+    /// are always equivalent.
+    start_pattern: Vec<StateID>,
+    /// A map from PatternID to its corresponding range of capture slots. Each
+    /// range is guaranteed to be contiguous with the previous range. The
+    /// end of the last range corresponds to the total number of slots needed
+    /// for this NFA.
+    patterns_to_slots: Vec<Range<usize>>,
+    /// A map from capture name to its corresponding index. So e.g., given
+    /// a single regex like '(\w+) (\w+) (?P<word>\w+)', the capture name
+    /// 'word' for pattern ID=0 would corresponding to the index '3'. Its
+    /// corresponding slots would then be '3 * 2 = 6' and '3 * 2 + 1 = 7'.
+    capture_name_to_index: Vec<CaptureNameMap>,
+    /// A map from pattern ID to capture group index to name, if one exists.
+    /// This is effectively the inverse of 'capture_name_to_index'. The outer
+    /// vec is indexed by pattern ID, while the inner vec is index by capture
+    /// index offset for the corresponding pattern.
+    ///
+    /// The first capture group for each pattern is always unnamed and is thus
+    /// always None.
+    capture_index_to_name: Vec<Vec<Option<Arc<str>>>>,
+    /// A representation of equivalence classes over the transitions in this
+    /// NFA. Two bytes in the same equivalence class must not discriminate
+    /// between a match or a non-match. This map can be used to shrink the
+    /// total size of a DFA's transition table with a small match-time cost.
+    ///
+    /// Note that the NFA's transitions are *not* defined in terms of these
+    /// equivalence classes. The NFA's transitions are defined on the original
+    /// byte values. For the most part, this is because they wouldn't really
+    /// help the NFA much since the NFA already uses a sparse representation
+    /// to represent transitions. Byte classes are most effective in a dense
+    /// representation.
+    byte_class_set: ByteClassSet,
+    /// Various facts about this NFA, which can be used to improve failure
+    /// modes (e.g., rejecting DFA construction if an NFA has Unicode word
+    /// boundaries) or for performing optimizations (avoiding an increase in
+    /// states if there are no look-around states).
+    facts: Facts,
+    /// Heap memory used indirectly by NFA states. Since each state might use a
+    /// different amount of heap, we need to keep track of this incrementally.
+    memory_states: usize,
+}
+
+impl NFA {
+    pub fn config() -> Config {
+        Config::new()
+    }
+
+    pub fn builder() -> Builder {
+        Builder::new()
+    }
+
+    /// Returns an NFA with no states. Its match semantics are unspecified.
+    ///
+    /// An empty NFA is useful as a starting point for building one. It is
+    /// itself not intended to be used for matching. For example, its starting
+    /// state identifiers are configured to be `0`, but since it has no states,
+    /// the identifiers are invalid.
+    ///
+    /// If you need an NFA that never matches is anything and can be correctly
+    /// used for matching, use [`NFA::never_match`].
+    #[inline]
+    pub fn empty() -> NFA {
+        NFA {
+            states: vec![],
+            start_anchored: StateID::ZERO,
+            start_unanchored: StateID::ZERO,
+            start_pattern: vec![],
+            patterns_to_slots: vec![],
+            capture_name_to_index: vec![],
+            capture_index_to_name: vec![],
+            byte_class_set: ByteClassSet::empty(),
+            facts: Facts::default(),
+            memory_states: 0,
+        }
+    }
+
+    /// Returns an NFA with a single regex that always matches at every
+    /// position.
+    #[inline]
+    pub fn always_match() -> NFA {
+        let mut nfa = NFA::empty();
+        // Since we're only adding one pattern, these are guaranteed to work.
+        let start = nfa.add_match().unwrap();
+        assert_eq!(start.as_usize(), 0);
+        let pid = nfa.finish_pattern(start).unwrap();
+        assert_eq!(pid.as_usize(), 0);
+        nfa
+    }
+
+    /// Returns an NFA that never matches at any position. It contains no
+    /// regexes.
+    #[inline]
+    pub fn never_match() -> NFA {
+        let mut nfa = NFA::empty();
+        // Since we're only adding one state, this can never fail.
+        nfa.add_fail().unwrap();
+        nfa
+    }
+
+    /// Return the number of states in this NFA.
+    ///
+    /// This is guaranteed to be no bigger than [`StateID::LIMIT`].
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.states.len()
+    }
+
+    /// Returns the total number of distinct match states in this NFA.
+    /// Stated differently, this returns the total number of regex patterns
+    /// used to build this NFA.
+    ///
+    /// This may return zero if the NFA was constructed with no patterns. In
+    /// this case, and only this case, the NFA can never produce a match for
+    /// any input.
+    ///
+    /// This is guaranteed to be no bigger than [`PatternID::LIMIT`].
+    #[inline]
+    pub fn pattern_len(&self) -> usize {
+        self.start_pattern.len()
+    }
+
+    /// Returns the pattern ID of the pattern currently being compiled by this
+    /// NFA.
+    fn current_pattern_id(&self) -> PatternID {
+        // This always works because we never permit more patterns in
+        // 'start_pattern' than can be addressed by PatternID. Also, we only
+        // add a new entry to 'start_pattern' once we finish compiling a
+        // pattern. Thus, the length refers to the ID of the current pattern
+        // being compiled.
+        PatternID::new(self.start_pattern.len()).unwrap()
+    }
+
+    /// Returns the total number of capturing groups in this NFA.
+    ///
+    /// This includes the special 0th capture group that is always present and
+    /// captures the start and end offset of the entire match.
+    ///
+    /// This is a convenience routine for `nfa.capture_slot_len() / 2`.
+    #[inline]
+    pub fn capture_len(&self) -> usize {
+        let slots = self.capture_slot_len();
+        // This assert is guaranteed to pass since the NFA construction process
+        // guarantees that it is always true.
+        assert_eq!(slots % 2, 0, "capture slots must be divisible by 2");
+        slots / 2
+    }
+
+    /// Returns the total number of capturing slots in this NFA.
+    ///
+    /// This value is guaranteed to be a multiple of 2. (Where each capturing
+    /// group has precisely two capturing slots in the NFA.)
+    #[inline]
+    pub fn capture_slot_len(&self) -> usize {
+        self.patterns_to_slots.last().map_or(0, |r| r.end)
+    }
+
+    /// Return a range of capture slots for the given pattern.
+    ///
+    /// The range returned is guaranteed to be contiguous with ranges for
+    /// adjacent patterns.
+    ///
+    /// This panics if the given pattern ID is greater than or equal to the
+    /// number of patterns in this NFA.
+    #[inline]
+    pub fn pattern_slots(&self, pid: PatternID) -> Range<usize> {
+        self.patterns_to_slots[pid].clone()
+    }
+
+    /// Return the capture group index corresponding to the given name in the
+    /// given pattern. If no such capture group name exists in the given
+    /// pattern, then this returns `None`.
+    ///
+    /// If the given pattern ID is invalid, then this panics.
+    #[inline]
+    pub fn capture_name_to_index(
+        &self,
+        pid: PatternID,
+        name: &str,
+    ) -> Option<usize> {
+        assert!(pid.as_usize() < self.pattern_len(), "invalid pattern ID");
+        self.capture_name_to_index[pid].get(name).cloned()
+    }
+
+    // TODO: add iterators over capture group names.
+    // Do we also permit indexing?
+
+    /// Returns an iterator over all pattern IDs in this NFA.
+    #[inline]
+    pub fn patterns(&self) -> PatternIter {
+        PatternIter {
+            it: PatternID::iter(self.pattern_len()),
+            _marker: core::marker::PhantomData,
+        }
+    }
+
+    /// Return the ID of the initial anchored state of this NFA.
+    #[inline]
+    pub fn start_anchored(&self) -> StateID {
+        self.start_anchored
+    }
+
+    /// Set the anchored starting state ID for this NFA.
+    #[inline]
+    pub fn set_start_anchored(&mut self, id: StateID) {
+        self.start_anchored = id;
+    }
+
+    /// Return the ID of the initial unanchored state of this NFA.
+    #[inline]
+    pub fn start_unanchored(&self) -> StateID {
+        self.start_unanchored
+    }
+
+    /// Set the unanchored starting state ID for this NFA.
+    #[inline]
+    pub fn set_start_unanchored(&mut self, id: StateID) {
+        self.start_unanchored = id;
+    }
+
+    /// Return the ID of the initial anchored state for the given pattern.
+    ///
+    /// If the pattern doesn't exist in this NFA, then this panics.
+    #[inline]
+    pub fn start_pattern(&self, pid: PatternID) -> StateID {
+        self.start_pattern[pid]
+    }
+
+    /// Get the byte class set for this NFA.
+    #[inline]
+    pub fn byte_class_set(&self) -> &ByteClassSet {
+        &self.byte_class_set
+    }
+
+    /// Return a reference to the NFA state corresponding to the given ID.
+    #[inline]
+    pub fn state(&self, id: StateID) -> &State {
+        &self.states[id]
+    }
+
+    /// Returns a slice of all states in this NFA.
+    ///
+    /// The slice returned may be indexed by a `StateID` generated by `add`.
+    #[inline]
+    pub fn states(&self) -> &[State] {
+        &self.states
+    }
+
+    #[inline]
+    pub fn is_always_start_anchored(&self) -> bool {
+        self.start_anchored() == self.start_unanchored()
+    }
+
+    #[inline]
+    pub fn has_any_look(&self) -> bool {
+        self.facts.has_any_look()
+    }
+
+    #[inline]
+    pub fn has_any_anchor(&self) -> bool {
+        self.facts.has_any_anchor()
+    }
+
+    #[inline]
+    pub fn has_word_boundary(&self) -> bool {
+        self.has_word_boundary_unicode() || self.has_word_boundary_ascii()
+    }
+
+    #[inline]
+    pub fn has_word_boundary_unicode(&self) -> bool {
+        self.facts.has_word_boundary_unicode()
+    }
+
+    #[inline]
+    pub fn has_word_boundary_ascii(&self) -> bool {
+        self.facts.has_word_boundary_ascii()
+    }
+
+    /// Returns the memory usage, in bytes, of this NFA.
+    ///
+    /// This does **not** include the stack size used up by this NFA. To
+    /// compute that, use `std::mem::size_of::<NFA>()`.
+    #[inline]
+    pub fn memory_usage(&self) -> usize {
+        self.states.len() * mem::size_of::<State>()
+            + self.memory_states
+            + self.start_pattern.len() * mem::size_of::<StateID>()
+    }
+
+    // Why do we define a bunch of 'add_*' routines below instead of just
+    // defining a single 'add' routine that accepts a 'State'? Indeed, for most
+    // of the 'add_*' routines below, such a simple API would be more than
+    // appropriate. Unfortunately, adding capture states and, to a lesser
+    // extent, match states, is a bit more complex. Namely, when we add a
+    // capture state, we *really* want to know the corresponding capture
+    // group's name and index and what not, so that we can update other state
+    // inside this NFA. But, e.g., the capture group name is not and should
+    // not be included in 'State::Capture'. So what are our choices?
+    //
+    // 1) Define one 'add' and require some additional optional parameters.
+    // This feels quite ugly, and adds unnecessary complexity to more common
+    // and simpler cases.
+    //
+    // 2) Do what we do below. The sad thing is that our API is bigger with
+    // more methods. But each method is very specific and hopefully simple.
+    //
+    // 3) Define a new enum, say, 'StateWithInfo', or something that permits
+    // providing both a State and some extra ancillary info in some cases. This
+    // doesn't seem too bad to me, but seems slightly worse than (2) because of
+    // the additional type required.
+    //
+    // 4) Abandon the idea that we have to specify things like the capture
+    // group name when we add the Capture state to the NFA. We would then need
+    // to add other methods that permit the caller to add this additional state
+    // "out of band." Other than it introducing some additional complexity, I
+    // decided against this because I wanted the NFA builder API to make it
+    // as hard as possible to build a bad or invalid NFA. Using the approach
+    // below, as you'll see, permits us to do a lot of strict checking of our
+    // inputs and return an error if we see something we don't expect.
+
+    pub fn add_range(&mut self, range: Transition) -> Result<StateID, Error> {
+        self.byte_class_set.set_range(range.start, range.end);
+        self.add_state(State::Range { range })
+    }
+
+    pub fn add_sparse(
+        &mut self,
+        sparse: SparseTransitions,
+    ) -> Result<StateID, Error> {
+        for range in sparse.ranges.iter() {
+            self.byte_class_set.set_range(range.start, range.end);
+        }
+        self.add_state(State::Sparse(sparse))
+    }
+
+    pub fn add_look(
+        &mut self,
+        next: StateID,
+        look: Look,
+    ) -> Result<StateID, Error> {
+        self.facts.set_has_any_look(true);
+        look.add_to_byteset(&mut self.byte_class_set);
+        match look {
+            Look::StartLine
+            | Look::EndLine
+            | Look::StartText
+            | Look::EndText => {
+                self.facts.set_has_any_anchor(true);
+            }
+            Look::WordBoundaryUnicode | Look::WordBoundaryUnicodeNegate => {
+                self.facts.set_has_word_boundary_unicode(true);
+            }
+            Look::WordBoundaryAscii | Look::WordBoundaryAsciiNegate => {
+                self.facts.set_has_word_boundary_ascii(true);
+            }
+        }
+        self.add_state(State::Look { look, next })
+    }
+
+    pub fn add_union(
+        &mut self,
+        alternates: Box<[StateID]>,
+    ) -> Result<StateID, Error> {
+        self.add_state(State::Union { alternates })
+    }
+
+    pub fn add_capture_start(
+        &mut self,
+        next_id: StateID,
+        capture_index: u32,
+        name: Option<Arc<str>>,
+    ) -> Result<StateID, Error> {
+        let pid = self.current_pattern_id();
+        let capture_index = match usize::try_from(capture_index) {
+            Err(_) => {
+                return Err(Error::invalid_capture_index(core::usize::MAX))
+            }
+            Ok(capture_index) => capture_index,
+        };
+        // Do arithmetic to find our absolute slot index first, to make sure
+        // the index is at least possibly valid (doesn't overflow).
+        let relative_slot = match capture_index.checked_mul(2) {
+            Some(relative_slot) => relative_slot,
+            None => return Err(Error::invalid_capture_index(capture_index)),
+        };
+        let slot = match relative_slot.checked_add(self.capture_slot_len()) {
+            Some(slot) => slot,
+            None => return Err(Error::invalid_capture_index(capture_index)),
+        };
+        // Make sure we have space to insert our (pid,index)|-->name mapping.
+        if pid.as_usize() >= self.capture_index_to_name.len() {
+            // Note that we require that if you're adding capturing groups,
+            // then there must be at least one capturing group per pattern.
+            // Moreover, whenever we expand our space here, it should always
+            // first be for the first capture group (at index==0).
+            if pid.as_usize() > self.capture_index_to_name.len()
+                || capture_index > 0
+            {
+                return Err(Error::invalid_capture_index(capture_index));
+            }
+            self.capture_name_to_index.push(CaptureNameMap::new());
+            self.capture_index_to_name.push(vec![]);
+        }
+        if capture_index >= self.capture_index_to_name[pid].len() {
+            // We require that capturing groups are added in correspondence
+            // to their index. So no discontinuous indices. This is likely
+            // overly strict, but also makes it simpler to provide guarantees
+            // about our capturing group data.
+            if capture_index > self.capture_index_to_name[pid].len() {
+                return Err(Error::invalid_capture_index(capture_index));
+            }
+            self.capture_index_to_name[pid].push(None);
+        }
+        if let Some(ref name) = name {
+            self.capture_name_to_index[pid]
+                .insert(Arc::clone(name), capture_index);
+        }
+        self.capture_index_to_name[pid][capture_index] = name;
+        self.add_state(State::Capture { next: next_id, slot })
+    }
+
+    pub fn add_capture_end(
+        &mut self,
+        next_id: StateID,
+        capture_index: u32,
+    ) -> Result<StateID, Error> {
+        let pid = self.current_pattern_id();
+        let capture_index = match usize::try_from(capture_index) {
+            Err(_) => {
+                return Err(Error::invalid_capture_index(core::usize::MAX))
+            }
+            Ok(capture_index) => capture_index,
+        };
+        // If we haven't already added this capture group via a corresponding
+        // 'add_capture_start' call, then we consider the index given to be
+        // invalid.
+        if pid.as_usize() >= self.capture_index_to_name.len()
+            || capture_index >= self.capture_index_to_name[pid].len()
+        {
+            return Err(Error::invalid_capture_index(capture_index));
+        }
+        // Since we've already confirmed that this capture index is invalid
+        // and has a corresponding starting slot, we know the multiplcation
+        // has already been done and succeeded.
+        let relative_slot_start = capture_index.checked_mul(2).unwrap();
+        let relative_slot = match relative_slot_start.checked_add(1) {
+            Some(relative_slot) => relative_slot,
+            None => return Err(Error::invalid_capture_index(capture_index)),
+        };
+        let slot = match relative_slot.checked_add(self.capture_slot_len()) {
+            Some(slot) => slot,
+            None => return Err(Error::invalid_capture_index(capture_index)),
+        };
+        self.add_state(State::Capture { next: next_id, slot })
+    }
+
+    pub fn add_fail(&mut self) -> Result<StateID, Error> {
+        self.add_state(State::Fail)
+    }
+
+    /// Add a new match state to this NFA and return its state ID.
+    pub fn add_match(&mut self) -> Result<StateID, Error> {
+        let pattern_id = self.current_pattern_id();
+        let sid = self.add_state(State::Match { id: pattern_id })?;
+        Ok(sid)
+    }
+
+    /// Finish compiling the current pattern and return its identifier. The
+    /// given ID should be the state ID corresponding to the anchored starting
+    /// state for matching this pattern.
+    pub fn finish_pattern(
+        &mut self,
+        start_id: StateID,
+    ) -> Result<PatternID, Error> {
+        // We've gotta make sure that we never permit the user to add more
+        // patterns than we can identify. So if we're already at the limit,
+        // then return an error. This is somewhat non-ideal since this won't
+        // result in an error until trying to complete the compilation of a
+        // pattern instead of starting it.
+        if self.start_pattern.len() >= PatternID::LIMIT {
+            return Err(Error::too_many_patterns(
+                self.start_pattern.len().saturating_add(1),
+            ));
+        }
+        let pid = self.current_pattern_id();
+        self.start_pattern.push(start_id);
+        // Add the number of new slots created by this pattern. This is always
+        // equivalent to '2 * caps.len()', where 'caps.len()' is the number of
+        // new capturing groups introduced by the pattern we're finishing.
+        let new_cap_groups = self
+            .capture_index_to_name
+            .get(pid.as_usize())
+            .map_or(0, |caps| caps.len());
+        let new_slots = match new_cap_groups.checked_mul(2) {
+            Some(new_slots) => new_slots,
+            None => {
+                // Just return the biggest index that we know exists.
+                let index = new_cap_groups.saturating_sub(1);
+                return Err(Error::invalid_capture_index(index));
+            }
+        };
+        let slot_start = self.capture_slot_len();
+        self.patterns_to_slots.push(slot_start..(slot_start + new_slots));
+        Ok(pid)
+    }
+
+    fn add_state(&mut self, state: State) -> Result<StateID, Error> {
+        let id = StateID::new(self.states.len())
+            .map_err(|_| Error::too_many_states(self.states.len()))?;
+        self.memory_states += state.memory_usage();
+        self.states.push(state);
+        Ok(id)
+    }
+
+    /// Remap the transitions in every state of this NFA using the given map.
+    /// The given map should be indexed according to state ID namespace used by
+    /// the transitions of the states currently in this NFA.
+    ///
+    /// This may be used during the final phases of an NFA compiler, which
+    /// turns its intermediate NFA into the final NFA. Remapping may be
+    /// required to bring the state pointers from the intermediate NFA to the
+    /// final NFA.
+    pub fn remap(&mut self, old_to_new: &[StateID]) {
+        for state in &mut self.states {
+            state.remap(old_to_new);
+        }
+        self.start_anchored = old_to_new[self.start_anchored];
+        self.start_unanchored = old_to_new[self.start_unanchored];
+        for (pid, id) in self.start_pattern.iter_mut().with_pattern_ids() {
+            *id = old_to_new[*id];
+        }
+    }
+
+    /// Clear this NFA such that it has zero states and is otherwise "empty."
+    ///
+    /// An empty NFA is useful as a starting point for building one. It is
+    /// itself not intended to be used for matching. For example, its starting
+    /// state identifiers are configured to be `0`, but since it has no states,
+    /// the identifiers are invalid.
+    pub fn clear(&mut self) {
+        self.states.clear();
+        self.start_anchored = StateID::ZERO;
+        self.start_unanchored = StateID::ZERO;
+        self.start_pattern.clear();
+        self.patterns_to_slots.clear();
+        self.capture_name_to_index.clear();
+        self.capture_index_to_name.clear();
+        self.byte_class_set = ByteClassSet::empty();
+        self.facts = Facts::default();
+        self.memory_states = 0;
+    }
+}
+
+impl fmt::Debug for NFA {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        writeln!(f, "thompson::NFA(")?;
+        for (sid, state) in self.states.iter().with_state_ids() {
+            let status = if sid == self.start_anchored {
+                '^'
+            } else if sid == self.start_unanchored {
+                '>'
+            } else {
+                ' '
+            };
+            writeln!(f, "{}{:06?}: {:?}", status, sid.as_usize(), state)?;
+        }
+        if self.pattern_len() > 1 {
+            writeln!(f, "")?;
+            for pid in self.patterns() {
+                let sid = self.start_pattern(pid);
+                writeln!(
+                    f,
+                    "START({:06?}): {:?}",
+                    pid.as_usize(),
+                    sid.as_usize()
+                )?;
+            }
+        }
+        writeln!(f, "")?;
+        writeln!(
+            f,
+            "transition equivalence classes: {:?}",
+            self.byte_class_set().byte_classes()
+        )?;
+        writeln!(f, ")")?;
+        Ok(())
+    }
+}
+
+/// A state in a final compiled NFA.
+#[derive(Clone, Eq, PartialEq)]
+pub enum State {
+    /// A state that transitions to `next` if and only if the current input
+    /// byte is in the range `[start, end]` (inclusive).
+    ///
+    /// This is a special case of Sparse in that it encodes only one transition
+    /// (and therefore avoids the allocation).
+    Range { range: Transition },
+    /// A state with possibly many transitions, represented in a sparse
+    /// fashion. Transitions are ordered lexicographically by input range. As
+    /// such, this may only be used when every transition has equal priority.
+    /// (In practice, this is only used for encoding UTF-8 automata.)
+    Sparse(SparseTransitions),
+    /// A conditional epsilon transition satisfied via some sort of
+    /// look-around.
+    Look { look: Look, next: StateID },
+    /// An alternation such that there exists an epsilon transition to all
+    /// states in `alternates`, where matches found via earlier transitions
+    /// are preferred over later transitions.
+    Union { alternates: Box<[StateID]> },
+    /// An empty state that records a capture location.
+    ///
+    /// From the perspective of finite automata, this is precisely equivalent
+    /// to an epsilon transition, but serves the purpose of instructing NFA
+    /// simulations to record additional state when the finite state machine
+    /// passes through this epsilon transition.
+    ///
+    /// These transitions are treated as epsilon transitions with no additional
+    /// effects in DFAs.
+    ///
+    /// 'slot' in this context refers to the specific capture group offset that
+    /// is being recorded. Each capturing group has two slots corresponding to
+    /// the start and end of the matching portion of that group.
+    /// A fail state. When encountered, the automaton is guaranteed to never
+    /// reach a match state.
+    Capture { next: StateID, slot: usize },
+    /// A state that cannot be transitioned out of. If a search reaches this
+    /// state, then no match is possible and the search should terminate.
+    Fail,
+    /// A match state. There is exactly one such occurrence of this state for
+    /// each regex compiled into the NFA.
+    Match { id: PatternID },
+}
+
+impl State {
+    /// Returns true if and only if this state contains one or more epsilon
+    /// transitions.
+    #[inline]
+    pub fn is_epsilon(&self) -> bool {
+        match *self {
+            State::Range { .. }
+            | State::Sparse { .. }
+            | State::Fail
+            | State::Match { .. } => false,
+            State::Look { .. }
+            | State::Union { .. }
+            | State::Capture { .. } => true,
+        }
+    }
+
+    /// Returns the heap memory usage of this NFA state in bytes.
+    fn memory_usage(&self) -> usize {
+        match *self {
+            State::Range { .. }
+            | State::Look { .. }
+            | State::Capture { .. }
+            | State::Match { .. }
+            | State::Fail => 0,
+            State::Sparse(SparseTransitions { ref ranges }) => {
+                ranges.len() * mem::size_of::<Transition>()
+            }
+            State::Union { ref alternates } => {
+                alternates.len() * mem::size_of::<StateID>()
+            }
+        }
+    }
+
+    /// Remap the transitions in this state using the given map. Namely, the
+    /// given map should be indexed according to the transitions currently
+    /// in this state.
+    ///
+    /// This is used during the final phase of the NFA compiler, which turns
+    /// its intermediate NFA into the final NFA.
+    fn remap(&mut self, remap: &[StateID]) {
+        match *self {
+            State::Range { ref mut range } => range.next = remap[range.next],
+            State::Sparse(SparseTransitions { ref mut ranges }) => {
+                for r in ranges.iter_mut() {
+                    r.next = remap[r.next];
+                }
+            }
+            State::Look { ref mut next, .. } => *next = remap[*next],
+            State::Union { ref mut alternates } => {
+                for alt in alternates.iter_mut() {
+                    *alt = remap[*alt];
+                }
+            }
+            State::Capture { ref mut next, .. } => *next = remap[*next],
+            State::Fail => {}
+            State::Match { .. } => {}
+        }
+    }
+}
+
+impl fmt::Debug for State {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match *self {
+            State::Range { ref range } => range.fmt(f),
+            State::Sparse(SparseTransitions { ref ranges }) => {
+                let rs = ranges
+                    .iter()
+                    .map(|t| format!("{:?}", t))
+                    .collect::<Vec<String>>()
+                    .join(", ");
+                write!(f, "sparse({})", rs)
+            }
+            State::Look { ref look, next } => {
+                write!(f, "{:?} => {:?}", look, next.as_usize())
+            }
+            State::Union { ref alternates } => {
+                let alts = alternates
+                    .iter()
+                    .map(|id| format!("{:?}", id.as_usize()))
+                    .collect::<Vec<String>>()
+                    .join(", ");
+                write!(f, "alt({})", alts)
+            }
+            State::Capture { next, slot } => {
+                write!(f, "capture({:?}) => {:?}", slot, next.as_usize())
+            }
+            State::Fail => write!(f, "FAIL"),
+            State::Match { id } => write!(f, "MATCH({:?})", id.as_usize()),
+        }
+    }
+}
+
+/// A collection of facts about an NFA.
+///
+/// There are no real cohesive principles behind what gets put in here. For
+/// the most part, it is implementation driven.
+#[derive(Clone, Copy, Debug, Default)]
+struct Facts {
+    /// Various yes/no facts about this NFA.
+    bools: u16,
+}
+
+impl Facts {
+    define_bool!(0, has_any_look, set_has_any_look);
+    define_bool!(1, has_any_anchor, set_has_any_anchor);
+    define_bool!(2, has_word_boundary_unicode, set_has_word_boundary_unicode);
+    define_bool!(3, has_word_boundary_ascii, set_has_word_boundary_ascii);
+}
+
+/// A sequence of transitions used to represent a sparse state.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SparseTransitions {
+    pub ranges: Box<[Transition]>,
+}
+
+impl SparseTransitions {
+    pub fn matches(&self, haystack: &[u8], at: usize) -> Option<StateID> {
+        haystack.get(at).and_then(|&b| self.matches_byte(b))
+    }
+
+    pub fn matches_unit(&self, unit: alphabet::Unit) -> Option<StateID> {
+        unit.as_u8().map_or(None, |byte| self.matches_byte(byte))
+    }
+
+    pub fn matches_byte(&self, byte: u8) -> Option<StateID> {
+        for t in self.ranges.iter() {
+            if t.start > byte {
+                break;
+            } else if t.matches_byte(byte) {
+                return Some(t.next);
+            }
+        }
+        None
+
+        /*
+        // This is an alternative implementation that uses binary search. In
+        // some ad hoc experiments, like
+        //
+        //   smallishru=OpenSubtitles2018.raw.sample.smallish.ru
+        //   regex-cli find nfa thompson pikevm -b "@$smallishru" '\b\w+\b'
+        //
+        // I could not observe any improvement, and in fact, things seemed to
+        // be a bit slower.
+        self.ranges
+            .binary_search_by(|t| {
+                if t.end < byte {
+                    core::cmp::Ordering::Less
+                } else if t.start > byte {
+                    core::cmp::Ordering::Greater
+                } else {
+                    core::cmp::Ordering::Equal
+                }
+            })
+            .ok()
+            .map(|i| self.ranges[i].next)
+        */
+    }
+}
+
+/// A transition to another state, only if the given byte falls in the
+/// inclusive range specified.
+#[derive(Clone, Copy, Eq, Hash, PartialEq)]
+pub struct Transition {
+    pub start: u8,
+    pub end: u8,
+    pub next: StateID,
+}
+
+impl Transition {
+    pub fn matches(&self, haystack: &[u8], at: usize) -> bool {
+        haystack.get(at).map_or(false, |&b| self.matches_byte(b))
+    }
+
+    pub fn matches_unit(&self, unit: alphabet::Unit) -> bool {
+        unit.as_u8().map_or(false, |byte| self.matches_byte(byte))
+    }
+
+    pub fn matches_byte(&self, byte: u8) -> bool {
+        self.start <= byte && byte <= self.end
+    }
+}
+
+impl fmt::Debug for Transition {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        use crate::util::DebugByte;
+
+        let Transition { start, end, next } = *self;
+        if self.start == self.end {
+            write!(f, "{:?} => {:?}", DebugByte(start), next.as_usize())
+        } else {
+            write!(
+                f,
+                "{:?}-{:?} => {:?}",
+                DebugByte(start),
+                DebugByte(end),
+                next.as_usize(),
+            )
+        }
+    }
+}
+
+/// A conditional NFA epsilon transition.
+///
+/// A simulation of the NFA can only move through this epsilon transition if
+/// the current position satisfies some look-around property. Some assertions
+/// are look-behind (StartLine, StartText), some assertions are look-ahead
+/// (EndLine, EndText) while other assertions are both look-behind and
+/// look-ahead (WordBoundary*).
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Look {
+    /// The previous position is either `\n` or the current position is the
+    /// beginning of the haystack (i.e., at position `0`).
+    StartLine = 1 << 0,
+    /// The next position is either `\n` or the current position is the end of
+    /// the haystack (i.e., at position `haystack.len()`).
+    EndLine = 1 << 1,
+    /// The current position is the beginning of the haystack (i.e., at
+    /// position `0`).
+    StartText = 1 << 2,
+    /// The current position is the end of the haystack (i.e., at position
+    /// `haystack.len()`).
+    EndText = 1 << 3,
+    /// When tested at position `i`, where `p=decode_utf8_rev(&haystack[..i])`
+    /// and `n=decode_utf8(&haystack[i..])`, this assertion passes if and only
+    /// if `is_word(p) != is_word(n)`. If `i=0`, then `is_word(p)=false` and if
+    /// `i=haystack.len()`, then `is_word(n)=false`.
+    WordBoundaryUnicode = 1 << 4,
+    /// Same as for `WordBoundaryUnicode`, but requires that
+    /// `is_word(p) == is_word(n)`.
+    WordBoundaryUnicodeNegate = 1 << 5,
+    /// When tested at position `i`, where `p=haystack[i-1]` and
+    /// `n=haystack[i]`, this assertion passes if and only if `is_word(p)
+    /// != is_word(n)`. If `i=0`, then `is_word(p)=false` and if
+    /// `i=haystack.len()`, then `is_word(n)=false`.
+    WordBoundaryAscii = 1 << 6,
+    /// Same as for `WordBoundaryAscii`, but requires that
+    /// `is_word(p) == is_word(n)`.
+    ///
+    /// Note that it is possible for this assertion to match at positions that
+    /// split the UTF-8 encoding of a codepoint. For this reason, this may only
+    /// be used when UTF-8 mode is disable in the regex syntax.
+    WordBoundaryAsciiNegate = 1 << 7,
+}
+
+impl Look {
+    #[inline(always)]
+    pub fn matches(&self, bytes: &[u8], at: usize) -> bool {
+        match *self {
+            Look::StartLine => at == 0 || bytes[at - 1] == b'\n',
+            Look::EndLine => at == bytes.len() || bytes[at] == b'\n',
+            Look::StartText => at == 0,
+            Look::EndText => at == bytes.len(),
+            Look::WordBoundaryUnicode => {
+                let word_before = is_word_char_rev(bytes, at);
+                let word_after = is_word_char_fwd(bytes, at);
+                word_before != word_after
+            }
+            Look::WordBoundaryUnicodeNegate => {
+                // This is pretty subtle. Why do we need to do UTF-8 decoding
+                // here? Well... at time of writing, the is_word_char_{fwd,rev}
+                // routines will only return true if there is a valid UTF-8
+                // encoding of a "word" codepoint, and false in every other
+                // case (including invalid UTF-8). This means that in regions
+                // of invalid UTF-8 (which might be a subset of valid UTF-8!),
+                // it would result in \B matching. While this would be
+                // questionable in the context of truly invalid UTF-8, it is
+                // *certainly* wrong to report match boundaries that split the
+                // encoding of a codepoint. So to work around this, we ensure
+                // that we can decode a codepoint on either side of `at`. If
+                // either direction fails, then we don't permit \B to match at
+                // all.
+                //
+                // Now, this isn't exactly optimal from a perf perspective. We
+                // could try and detect this in is_word_char_{fwd,rev}, but
+                // it's not clear if it's worth it. \B is, after all, rarely
+                // used.
+                //
+                // And in particular, we do *not* have to do this with \b,
+                // because \b *requires* that at least one side of `at` be a
+                // "word" codepoint, which in turn implies one side of `at`
+                // must be valid UTF-8. This in turn implies that \b can never
+                // split a valid UTF-8 encoding of a codepoint. In the case
+                // where one side of `at` is truly invalid UTF-8 and the other
+                // side IS a word codepoint, then we want \b to match since it
+                // represents a valid UTF-8 boundary. It also makes sense. For
+                // example, you'd want \b\w+\b to match 'abc' in '\xFFabc\xFF'.
+                let word_before = at > 0
+                    && match decode_last_utf8(&bytes[..at]) {
+                        None | Some(Err(_)) => return false,
+                        Some(Ok(_)) => is_word_char_rev(bytes, at),
+                    };
+                let word_after = at < bytes.len()
+                    && match decode_utf8(&bytes[at..]) {
+                        None | Some(Err(_)) => return false,
+                        Some(Ok(_)) => is_word_char_fwd(bytes, at),
+                    };
+                word_before == word_after
+            }
+            Look::WordBoundaryAscii => {
+                let word_before = at > 0 && is_word_byte(bytes[at - 1]);
+                let word_after = at < bytes.len() && is_word_byte(bytes[at]);
+                word_before != word_after
+            }
+            Look::WordBoundaryAsciiNegate => {
+                let word_before = at > 0 && is_word_byte(bytes[at - 1]);
+                let word_after = at < bytes.len() && is_word_byte(bytes[at]);
+                word_before == word_after
+            }
+        }
+    }
+
+    /// Create a look-around assertion from its corresponding integer (as
+    /// defined in `Look`). If the given integer does not correspond to any
+    /// assertion, then None is returned.
+    fn from_int(n: u8) -> Option<Look> {
+        match n {
+            0b0000_0001 => Some(Look::StartLine),
+            0b0000_0010 => Some(Look::EndLine),
+            0b0000_0100 => Some(Look::StartText),
+            0b0000_1000 => Some(Look::EndText),
+            0b0001_0000 => Some(Look::WordBoundaryUnicode),
+            0b0010_0000 => Some(Look::WordBoundaryUnicodeNegate),
+            0b0100_0000 => Some(Look::WordBoundaryAscii),
+            0b1000_0000 => Some(Look::WordBoundaryAsciiNegate),
+            _ => None,
+        }
+    }
+
+    /// Flip the look-around assertion to its equivalent for reverse searches.
+    fn reversed(&self) -> Look {
+        match *self {
+            Look::StartLine => Look::EndLine,
+            Look::EndLine => Look::StartLine,
+            Look::StartText => Look::EndText,
+            Look::EndText => Look::StartText,
+            Look::WordBoundaryUnicode => Look::WordBoundaryUnicode,
+            Look::WordBoundaryUnicodeNegate => Look::WordBoundaryUnicodeNegate,
+            Look::WordBoundaryAscii => Look::WordBoundaryAscii,
+            Look::WordBoundaryAsciiNegate => Look::WordBoundaryAsciiNegate,
+        }
+    }
+
+    /// Split up the given byte classes into equivalence classes in a way that
+    /// is consistent with this look-around assertion.
+    fn add_to_byteset(&self, set: &mut ByteClassSet) {
+        match *self {
+            Look::StartText | Look::EndText => {}
+            Look::StartLine | Look::EndLine => {
+                set.set_range(b'\n', b'\n');
+            }
+            Look::WordBoundaryUnicode
+            | Look::WordBoundaryUnicodeNegate
+            | Look::WordBoundaryAscii
+            | Look::WordBoundaryAsciiNegate => {
+                // We need to mark all ranges of bytes whose pairs result in
+                // evaluating \b differently. This isn't technically correct
+                // for Unicode word boundaries, but DFAs can't handle those
+                // anyway, and thus, the byte classes don't need to either
+                // since they are themselves only used in DFAs.
+                let iswb = regex_syntax::is_word_byte;
+                let mut b1: u16 = 0;
+                let mut b2: u16;
+                while b1 <= 255 {
+                    b2 = b1 + 1;
+                    while b2 <= 255 && iswb(b1 as u8) == iswb(b2 as u8) {
+                        b2 += 1;
+                    }
+                    set.set_range(b1 as u8, (b2 - 1) as u8);
+                    b1 = b2;
+                }
+            }
+        }
+    }
+}
+
+/// LookSet is a memory-efficient set of look-around assertions. Callers may
+/// idempotently insert or remove any look-around assertion from a set.
+#[repr(transparent)]
+#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+pub(crate) struct LookSet {
+    set: u8,
+}
+
+impl LookSet {
+    /// Return a LookSet from its representation.
+    pub(crate) fn from_repr(repr: u8) -> LookSet {
+        LookSet { set: repr }
+    }
+
+    /// Return a mutable LookSet from a mutable pointer to its representation.
+    pub(crate) fn from_repr_mut(repr: &mut u8) -> &mut LookSet {
+        // SAFETY: This is safe since a LookSet is repr(transparent) where its
+        // repr is a u8.
+        unsafe { core::mem::transmute::<&mut u8, &mut LookSet>(repr) }
+    }
+
+    /// Return true if and only if this set is empty.
+    pub(crate) fn is_empty(&self) -> bool {
+        self.set == 0
+    }
+
+    /// Clears this set such that it has no assertions in it.
+    pub(crate) fn clear(&mut self) {
+        self.set = 0;
+    }
+
+    /// Insert the given look-around assertion into this set. If the assertion
+    /// already exists, then this is a no-op.
+    pub(crate) fn insert(&mut self, look: Look) {
+        self.set |= look as u8;
+    }
+
+    /// Remove the given look-around assertion from this set. If the assertion
+    /// is not in this set, then this is a no-op.
+    #[cfg(test)]
+    pub(crate) fn remove(&mut self, look: Look) {
+        self.set &= !(look as u8);
+    }
+
+    /// Return true if and only if the given assertion is in this set.
+    pub(crate) fn contains(&self, look: Look) -> bool {
+        (look as u8) & self.set != 0
+    }
+
+    /// Subtract the given `other` set from the `self` set and return a new
+    /// set.
+    pub(crate) fn subtract(&self, other: LookSet) -> LookSet {
+        LookSet { set: self.set & !other.set }
+    }
+
+    /// Return the intersection of the given `other` set with the `self` set
+    /// and return the resulting set.
+    pub(crate) fn intersect(&self, other: LookSet) -> LookSet {
+        LookSet { set: self.set & other.set }
+    }
+}
+
+impl core::fmt::Debug for LookSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut members = vec![];
+        for i in 0..8 {
+            let look = match Look::from_int(1 << i) {
+                None => continue,
+                Some(look) => look,
+            };
+            if self.contains(look) {
+                members.push(look);
+            }
+        }
+        f.debug_tuple("LookSet").field(&members).finish()
+    }
+}
+
+/// An iterator over all pattern IDs in an NFA.
+pub struct PatternIter<'a> {
+    it: PatternIDIter,
+    /// We explicitly associate a lifetime with this iterator even though we
+    /// don't actually borrow anything from the NFA. We do this for backward
+    /// compatibility purposes. If we ever do need to borrow something from
+    /// the NFA, then we can and just get rid of this marker without breaking
+    /// the public API.
+    _marker: core::marker::PhantomData<&'a ()>,
+}
+
+impl<'a> Iterator for PatternIter<'a> {
+    type Item = PatternID;
+
+    fn next(&mut self) -> Option<PatternID> {
+        self.it.next()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    // TODO: Replace tests using DFA with NFA matching engine once implemented.
+    use crate::dfa::{dense, Automaton};
+
+    #[test]
+    fn always_match() {
+        let nfa = NFA::always_match();
+        let dfa = dense::Builder::new().build_from_nfa(&nfa).unwrap();
+        let find = |input, start, end| {
+            dfa.find_leftmost_fwd_at(None, None, input, start, end)
+                .unwrap()
+                .map(|m| m.offset())
+        };
+
+        assert_eq!(Some(0), find(b"", 0, 0));
+        assert_eq!(Some(0), find(b"a", 0, 1));
+        assert_eq!(Some(1), find(b"a", 1, 1));
+        assert_eq!(Some(0), find(b"ab", 0, 2));
+        assert_eq!(Some(1), find(b"ab", 1, 2));
+        assert_eq!(Some(2), find(b"ab", 2, 2));
+    }
+
+    #[test]
+    fn never_match() {
+        let nfa = NFA::never_match();
+        let dfa = dense::Builder::new().build_from_nfa(&nfa).unwrap();
+        let find = |input, start, end| {
+            dfa.find_leftmost_fwd_at(None, None, input, start, end)
+                .unwrap()
+                .map(|m| m.offset())
+        };
+
+        assert_eq!(None, find(b"", 0, 0));
+        assert_eq!(None, find(b"a", 0, 1));
+        assert_eq!(None, find(b"a", 1, 1));
+        assert_eq!(None, find(b"ab", 0, 2));
+        assert_eq!(None, find(b"ab", 1, 2));
+        assert_eq!(None, find(b"ab", 2, 2));
+    }
+
+    #[test]
+    fn look_set() {
+        let mut f = LookSet::default();
+        assert!(!f.contains(Look::StartText));
+        assert!(!f.contains(Look::EndText));
+        assert!(!f.contains(Look::StartLine));
+        assert!(!f.contains(Look::EndLine));
+        assert!(!f.contains(Look::WordBoundaryUnicode));
+        assert!(!f.contains(Look::WordBoundaryUnicodeNegate));
+        assert!(!f.contains(Look::WordBoundaryAscii));
+        assert!(!f.contains(Look::WordBoundaryAsciiNegate));
+
+        f.insert(Look::StartText);
+        assert!(f.contains(Look::StartText));
+        f.remove(Look::StartText);
+        assert!(!f.contains(Look::StartText));
+
+        f.insert(Look::EndText);
+        assert!(f.contains(Look::EndText));
+        f.remove(Look::EndText);
+        assert!(!f.contains(Look::EndText));
+
+        f.insert(Look::StartLine);
+        assert!(f.contains(Look::StartLine));
+        f.remove(Look::StartLine);
+        assert!(!f.contains(Look::StartLine));
+
+        f.insert(Look::EndLine);
+        assert!(f.contains(Look::EndLine));
+        f.remove(Look::EndLine);
+        assert!(!f.contains(Look::EndLine));
+
+        f.insert(Look::WordBoundaryUnicode);
+        assert!(f.contains(Look::WordBoundaryUnicode));
+        f.remove(Look::WordBoundaryUnicode);
+        assert!(!f.contains(Look::WordBoundaryUnicode));
+
+        f.insert(Look::WordBoundaryUnicodeNegate);
+        assert!(f.contains(Look::WordBoundaryUnicodeNegate));
+        f.remove(Look::WordBoundaryUnicodeNegate);
+        assert!(!f.contains(Look::WordBoundaryUnicodeNegate));
+
+        f.insert(Look::WordBoundaryAscii);
+        assert!(f.contains(Look::WordBoundaryAscii));
+        f.remove(Look::WordBoundaryAscii);
+        assert!(!f.contains(Look::WordBoundaryAscii));
+
+        f.insert(Look::WordBoundaryAsciiNegate);
+        assert!(f.contains(Look::WordBoundaryAsciiNegate));
+        f.remove(Look::WordBoundaryAsciiNegate);
+        assert!(!f.contains(Look::WordBoundaryAsciiNegate));
+    }
+
+    #[test]
+    fn look_matches_start_line() {
+        let look = Look::StartLine;
+
+        assert!(look.matches(B(""), 0));
+        assert!(look.matches(B("\n"), 0));
+        assert!(look.matches(B("\n"), 1));
+        assert!(look.matches(B("a"), 0));
+        assert!(look.matches(B("\na"), 1));
+
+        assert!(!look.matches(B("a"), 1));
+        assert!(!look.matches(B("a\na"), 1));
+    }
+
+    #[test]
+    fn look_matches_end_line() {
+        let look = Look::EndLine;
+
+        assert!(look.matches(B(""), 0));
+        assert!(look.matches(B("\n"), 1));
+        assert!(look.matches(B("\na"), 0));
+        assert!(look.matches(B("\na"), 2));
+        assert!(look.matches(B("a\na"), 1));
+
+        assert!(!look.matches(B("a"), 0));
+        assert!(!look.matches(B("\na"), 1));
+        assert!(!look.matches(B("a\na"), 0));
+        assert!(!look.matches(B("a\na"), 2));
+    }
+
+    #[test]
+    fn look_matches_start_text() {
+        let look = Look::StartText;
+
+        assert!(look.matches(B(""), 0));
+        assert!(look.matches(B("\n"), 0));
+        assert!(look.matches(B("a"), 0));
+
+        assert!(!look.matches(B("\n"), 1));
+        assert!(!look.matches(B("\na"), 1));
+        assert!(!look.matches(B("a"), 1));
+        assert!(!look.matches(B("a\na"), 1));
+    }
+
+    #[test]
+    fn look_matches_end_text() {
+        let look = Look::EndText;
+
+        assert!(look.matches(B(""), 0));
+        assert!(look.matches(B("\n"), 1));
+        assert!(look.matches(B("\na"), 2));
+
+        assert!(!look.matches(B("\na"), 0));
+        assert!(!look.matches(B("a\na"), 1));
+        assert!(!look.matches(B("a"), 0));
+        assert!(!look.matches(B("\na"), 1));
+        assert!(!look.matches(B("a\na"), 0));
+        assert!(!look.matches(B("a\na"), 2));
+    }
+
+    #[test]
+    fn look_matches_word_unicode() {
+        let look = Look::WordBoundaryUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(look.matches(B("a"), 0));
+        assert!(look.matches(B("a"), 1));
+        assert!(look.matches(B("a "), 1));
+        assert!(look.matches(B(" a "), 1));
+        assert!(look.matches(B(" a "), 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(look.matches(B("𝛃"), 0));
+        assert!(look.matches(B("𝛃"), 4));
+        assert!(look.matches(B("𝛃 "), 4));
+        assert!(look.matches(B(" 𝛃 "), 1));
+        assert!(look.matches(B(" 𝛃 "), 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(look.matches(B("𝛃𐆀"), 0));
+        assert!(look.matches(B("𝛃𐆀"), 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!look.matches(B(""), 0));
+        assert!(!look.matches(B("ab"), 1));
+        assert!(!look.matches(B("a "), 2));
+        assert!(!look.matches(B(" a "), 0));
+        assert!(!look.matches(B(" a "), 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!look.matches(B("𝛃b"), 4));
+        assert!(!look.matches(B("𝛃 "), 5));
+        assert!(!look.matches(B(" 𝛃 "), 0));
+        assert!(!look.matches(B(" 𝛃 "), 6));
+        assert!(!look.matches(B("𝛃"), 1));
+        assert!(!look.matches(B("𝛃"), 2));
+        assert!(!look.matches(B("𝛃"), 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!look.matches(B("𝛃𐆀"), 1));
+        assert!(!look.matches(B("𝛃𐆀"), 2));
+        assert!(!look.matches(B("𝛃𐆀"), 3));
+        assert!(!look.matches(B("𝛃𐆀"), 5));
+        assert!(!look.matches(B("𝛃𐆀"), 6));
+        assert!(!look.matches(B("𝛃𐆀"), 7));
+        assert!(!look.matches(B("𝛃𐆀"), 8));
+    }
+
+    #[test]
+    fn look_matches_word_ascii() {
+        let look = Look::WordBoundaryAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(look.matches(B("a"), 0));
+        assert!(look.matches(B("a"), 1));
+        assert!(look.matches(B("a "), 1));
+        assert!(look.matches(B(" a "), 1));
+        assert!(look.matches(B(" a "), 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(!look.matches(B("𝛃"), 0));
+        assert!(!look.matches(B("𝛃"), 4));
+        assert!(!look.matches(B("𝛃 "), 4));
+        assert!(!look.matches(B(" 𝛃 "), 1));
+        assert!(!look.matches(B(" 𝛃 "), 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(!look.matches(B("𝛃𐆀"), 0));
+        assert!(!look.matches(B("𝛃𐆀"), 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!look.matches(B(""), 0));
+        assert!(!look.matches(B("ab"), 1));
+        assert!(!look.matches(B("a "), 2));
+        assert!(!look.matches(B(" a "), 0));
+        assert!(!look.matches(B(" a "), 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(look.matches(B("𝛃b"), 4));
+        assert!(!look.matches(B("𝛃 "), 5));
+        assert!(!look.matches(B(" 𝛃 "), 0));
+        assert!(!look.matches(B(" 𝛃 "), 6));
+        assert!(!look.matches(B("𝛃"), 1));
+        assert!(!look.matches(B("𝛃"), 2));
+        assert!(!look.matches(B("𝛃"), 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!look.matches(B("𝛃𐆀"), 1));
+        assert!(!look.matches(B("𝛃𐆀"), 2));
+        assert!(!look.matches(B("𝛃𐆀"), 3));
+        assert!(!look.matches(B("𝛃𐆀"), 5));
+        assert!(!look.matches(B("𝛃𐆀"), 6));
+        assert!(!look.matches(B("𝛃𐆀"), 7));
+        assert!(!look.matches(B("𝛃𐆀"), 8));
+    }
+
+    #[test]
+    fn look_matches_word_unicode_negate() {
+        let look = Look::WordBoundaryUnicodeNegate;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!look.matches(B("a"), 0));
+        assert!(!look.matches(B("a"), 1));
+        assert!(!look.matches(B("a "), 1));
+        assert!(!look.matches(B(" a "), 1));
+        assert!(!look.matches(B(" a "), 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(!look.matches(B("𝛃"), 0));
+        assert!(!look.matches(B("𝛃"), 4));
+        assert!(!look.matches(B("𝛃 "), 4));
+        assert!(!look.matches(B(" 𝛃 "), 1));
+        assert!(!look.matches(B(" 𝛃 "), 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(!look.matches(B("𝛃𐆀"), 0));
+        assert!(!look.matches(B("𝛃𐆀"), 4));
+
+        // Non word boundaries for ASCII.
+        assert!(look.matches(B(""), 0));
+        assert!(look.matches(B("ab"), 1));
+        assert!(look.matches(B("a "), 2));
+        assert!(look.matches(B(" a "), 0));
+        assert!(look.matches(B(" a "), 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(look.matches(B("𝛃b"), 4));
+        assert!(look.matches(B("𝛃 "), 5));
+        assert!(look.matches(B(" 𝛃 "), 0));
+        assert!(look.matches(B(" 𝛃 "), 6));
+        // These don't match because they could otherwise return an offset that
+        // splits the UTF-8 encoding of a codepoint.
+        assert!(!look.matches(B("𝛃"), 1));
+        assert!(!look.matches(B("𝛃"), 2));
+        assert!(!look.matches(B("𝛃"), 3));
+
+        // Non word boundaries with non-ASCII codepoints. These also don't
+        // match because they could otherwise return an offset that splits the
+        // UTF-8 encoding of a codepoint.
+        assert!(!look.matches(B("𝛃𐆀"), 1));
+        assert!(!look.matches(B("𝛃𐆀"), 2));
+        assert!(!look.matches(B("𝛃𐆀"), 3));
+        assert!(!look.matches(B("𝛃𐆀"), 5));
+        assert!(!look.matches(B("𝛃𐆀"), 6));
+        assert!(!look.matches(B("𝛃𐆀"), 7));
+        // But this one does, since 𐆀 isn't a word codepoint, and 8 is the end
+        // of the haystack. So the "end" of the haystack isn't a word and 𐆀
+        // isn't a word, thus, \B matches.
+        assert!(look.matches(B("𝛃𐆀"), 8));
+    }
+
+    #[test]
+    fn look_matches_word_ascii_negate() {
+        let look = Look::WordBoundaryAsciiNegate;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!look.matches(B("a"), 0));
+        assert!(!look.matches(B("a"), 1));
+        assert!(!look.matches(B("a "), 1));
+        assert!(!look.matches(B(" a "), 1));
+        assert!(!look.matches(B(" a "), 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(look.matches(B("𝛃"), 0));
+        assert!(look.matches(B("𝛃"), 4));
+        assert!(look.matches(B("𝛃 "), 4));
+        assert!(look.matches(B(" 𝛃 "), 1));
+        assert!(look.matches(B(" 𝛃 "), 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(look.matches(B("𝛃𐆀"), 0));
+        assert!(look.matches(B("𝛃𐆀"), 4));
+
+        // Non word boundaries for ASCII.
+        assert!(look.matches(B(""), 0));
+        assert!(look.matches(B("ab"), 1));
+        assert!(look.matches(B("a "), 2));
+        assert!(look.matches(B(" a "), 0));
+        assert!(look.matches(B(" a "), 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!look.matches(B("𝛃b"), 4));
+        assert!(look.matches(B("𝛃 "), 5));
+        assert!(look.matches(B(" 𝛃 "), 0));
+        assert!(look.matches(B(" 𝛃 "), 6));
+        assert!(look.matches(B("𝛃"), 1));
+        assert!(look.matches(B("𝛃"), 2));
+        assert!(look.matches(B("𝛃"), 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(look.matches(B("𝛃𐆀"), 1));
+        assert!(look.matches(B("𝛃𐆀"), 2));
+        assert!(look.matches(B("𝛃𐆀"), 3));
+        assert!(look.matches(B("𝛃𐆀"), 5));
+        assert!(look.matches(B("𝛃𐆀"), 6));
+        assert!(look.matches(B("𝛃𐆀"), 7));
+        assert!(look.matches(B("𝛃𐆀"), 8));
+    }
+
+    fn B<'a, T: 'a + ?Sized + AsRef<[u8]>>(string: &'a T) -> &'a [u8] {
+        string.as_ref()
+    }
+}
diff --git a/vendor/regex-automata/src/nfa/thompson/pikevm.rs b/vendor/regex-automata/src/nfa/thompson/pikevm.rs
new file mode 100644
index 000000000..7572f9f10
--- /dev/null
+++ b/vendor/regex-automata/src/nfa/thompson/pikevm.rs
@@ -0,0 +1,554 @@
+use alloc::{sync::Arc, vec, vec::Vec};
+
+use crate::{
+    nfa::thompson::{self, Error, State, NFA},
+    util::{
+        id::{PatternID, StateID},
+        matchtypes::MultiMatch,
+        sparse_set::SparseSet,
+    },
+};
+
+#[derive(Clone, Copy, Debug, Default)]
+pub struct Config {
+    anchored: Option<bool>,
+    utf8: Option<bool>,
+}
+
+impl Config {
+    /// Return a new default PikeVM configuration.
+    pub fn new() -> Config {
+        Config::default()
+    }
+
+    pub fn anchored(mut self, yes: bool) -> Config {
+        self.anchored = Some(yes);
+        self
+    }
+
+    pub fn utf8(mut self, yes: bool) -> Config {
+        self.utf8 = Some(yes);
+        self
+    }
+
+    pub fn get_anchored(&self) -> bool {
+        self.anchored.unwrap_or(false)
+    }
+
+    pub fn get_utf8(&self) -> bool {
+        self.utf8.unwrap_or(true)
+    }
+
+    pub(crate) fn overwrite(self, o: Config) -> Config {
+        Config {
+            anchored: o.anchored.or(self.anchored),
+            utf8: o.utf8.or(self.utf8),
+        }
+    }
+}
+
+/// A builder for a PikeVM.
+#[derive(Clone, Debug)]
+pub struct Builder {
+    config: Config,
+    thompson: thompson::Builder,
+}
+
+impl Builder {
+    /// Create a new PikeVM builder with its default configuration.
+    pub fn new() -> Builder {
+        Builder {
+            config: Config::default(),
+            thompson: thompson::Builder::new(),
+        }
+    }
+
+    pub fn build(&self, pattern: &str) -> Result<PikeVM, Error> {
+        self.build_many(&[pattern])
+    }
+
+    pub fn build_many<P: AsRef<str>>(
+        &self,
+        patterns: &[P],
+    ) -> Result<PikeVM, Error> {
+        let nfa = self.thompson.build_many(patterns)?;
+        self.build_from_nfa(Arc::new(nfa))
+    }
+
+    pub fn build_from_nfa(&self, nfa: Arc<NFA>) -> Result<PikeVM, Error> {
+        // TODO: Check that this is correct.
+        // if !cfg!(all(
+        // feature = "dfa",
+        // feature = "syntax",
+        // feature = "unicode-perl"
+        // )) {
+        if !cfg!(feature = "syntax") {
+            if nfa.has_word_boundary_unicode() {
+                return Err(Error::unicode_word_unavailable());
+            }
+        }
+        Ok(PikeVM { config: self.config, nfa })
+    }
+
+    pub fn configure(&mut self, config: Config) -> &mut Builder {
+        self.config = self.config.overwrite(config);
+        self
+    }
+
+    /// Set the syntax configuration for this builder using
+    /// [`SyntaxConfig`](crate::SyntaxConfig).
+    ///
+    /// This permits setting things like case insensitivity, Unicode and multi
+    /// line mode.
+    ///
+    /// These settings only apply when constructing a PikeVM directly from a
+    /// pattern.
+    pub fn syntax(
+        &mut self,
+        config: crate::util::syntax::SyntaxConfig,
+    ) -> &mut Builder {
+        self.thompson.syntax(config);
+        self
+    }
+
+    /// Set the Thompson NFA configuration for this builder using
+    /// [`nfa::thompson::Config`](crate::nfa::thompson::Config).
+    ///
+    /// This permits setting things like if additional time should be spent
+    /// shrinking the size of the NFA.
+    ///
+    /// These settings only apply when constructing a PikeVM directly from a
+    /// pattern.
+    pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder {
+        self.thompson.configure(config);
+        self
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct PikeVM {
+    config: Config,
+    nfa: Arc<NFA>,
+}
+
+impl PikeVM {
+    pub fn new(pattern: &str) -> Result<PikeVM, Error> {
+        PikeVM::builder().build(pattern)
+    }
+
+    pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<PikeVM, Error> {
+        PikeVM::builder().build_many(patterns)
+    }
+
+    pub fn config() -> Config {
+        Config::new()
+    }
+
+    pub fn builder() -> Builder {
+        Builder::new()
+    }
+
+    pub fn create_cache(&self) -> Cache {
+        Cache::new(self.nfa())
+    }
+
+    pub fn create_captures(&self) -> Captures {
+        Captures::new(self.nfa())
+    }
+
+    pub fn nfa(&self) -> &Arc<NFA> {
+        &self.nfa
+    }
+
+    pub fn find_leftmost_iter<'r, 'c, 't>(
+        &'r self,
+        cache: &'c mut Cache,
+        haystack: &'t [u8],
+    ) -> FindLeftmostMatches<'r, 'c, 't> {
+        FindLeftmostMatches::new(self, cache, haystack)
+    }
+
+    // BREADCRUMBS:
+    //
+    // 1) Don't forget about prefilters.
+    //
+    // 2) Consider the case of using a PikeVM with an NFA that has Capture
+    // states, but where we don't want to track capturing groups (other than
+    // group 0). This potentially saves a lot of copying around and what not. I
+    // believe the current regex crate does this, for example. The interesting
+    // bit here is how to handle the case of multiple patterns...
+    //
+    // 3) Permit the caller to specify a pattern ID to run an anchored-only
+    // search on.
+    //
+    // 4) How to do overlapping? The way multi-regex support works in the regex
+    // crate currently is to run the PikeVM until either we reach the end of
+    // the haystack or when we know all regexes have matched. The latter case
+    // is probably quite rare, so the common case is likely that we're always
+    // searching the entire input. The question is: can we emulate that with
+    // our typical 'overlapping' APIs on DFAs? I believe we can. If so, then
+    // all we need to do is provide an overlapping API on the PikeVM that
+    // roughly matches the ones we provide on DFAs. For those APIs, the only
+    // thing they need over non-overlapping APIs is "caller state." For DFAs,
+    // the caller state is simple: it contains the last state visited and the
+    // last match reported. For the PikeVM (and NFAs in general), the "last
+    // state" is actually a *set* of NFA states. So I think what happens here
+    // is that we can just force the `Cache` to subsume this role. We'll still
+    // need some additional state to track the last match reported though.
+    // Because when two or more patterns match at the same location, we need a
+    // way to know to iterate over them. Although maybe it's not match index we
+    // need, but the state index of the last NFA state processed in the cache.
+    // Then we just pick up where we left off. There might be another match
+    // state, in which case, we report it.
+
+    pub fn find_leftmost_at(
+        &self,
+        cache: &mut Cache,
+        haystack: &[u8],
+        start: usize,
+        end: usize,
+        caps: &mut Captures,
+    ) -> Option<MultiMatch> {
+        let anchored =
+            self.config.get_anchored() || self.nfa.is_always_start_anchored();
+        let mut at = start;
+        let mut matched_pid = None;
+        cache.clear();
+        'LOOP: loop {
+            if cache.clist.set.is_empty() {
+                if matched_pid.is_some() || (anchored && at > start) {
+                    break 'LOOP;
+                }
+                // TODO: prefilter
+            }
+            if (!anchored && matched_pid.is_none())
+                || cache.clist.set.is_empty()
+            {
+                self.epsilon_closure(
+                    &mut cache.clist,
+                    &mut caps.slots,
+                    &mut cache.stack,
+                    self.nfa.start_anchored(),
+                    haystack,
+                    at,
+                );
+            }
+            for i in 0..cache.clist.set.len() {
+                let sid = cache.clist.set.get(i);
+                let pid = match self.step(
+                    &mut cache.nlist,
+                    &mut caps.slots,
+                    cache.clist.caps(sid),
+                    &mut cache.stack,
+                    sid,
+                    haystack,
+                    at,
+                ) {
+                    None => continue,
+                    Some(pid) => pid,
+                };
+                matched_pid = Some(pid);
+                break;
+            }
+            if at >= end {
+                break;
+            }
+            at += 1;
+            cache.swap();
+            cache.nlist.set.clear();
+        }
+        matched_pid.map(|pid| {
+            let slots = self.nfa.pattern_slots(pid);
+            let (start, end) = (slots.start, slots.start + 1);
+            MultiMatch::new(
+                pid,
+                caps.slots[start].unwrap(),
+                caps.slots[end].unwrap(),
+            )
+        })
+    }
+
+    #[inline(always)]
+    fn step(
+        &self,
+        nlist: &mut Threads,
+        slots: &mut [Slot],
+        thread_caps: &mut [Slot],
+        stack: &mut Vec<FollowEpsilon>,
+        sid: StateID,
+        haystack: &[u8],
+        at: usize,
+    ) -> Option<PatternID> {
+        match *self.nfa.state(sid) {
+            State::Fail
+            | State::Look { .. }
+            | State::Union { .. }
+            | State::Capture { .. } => None,
+            State::Range { ref range } => {
+                if range.matches(haystack, at) {
+                    self.epsilon_closure(
+                        nlist,
+                        thread_caps,
+                        stack,
+                        range.next,
+                        haystack,
+                        at + 1,
+                    );
+                }
+                None
+            }
+            State::Sparse(ref sparse) => {
+                if let Some(next) = sparse.matches(haystack, at) {
+                    self.epsilon_closure(
+                        nlist,
+                        thread_caps,
+                        stack,
+                        next,
+                        haystack,
+                        at + 1,
+                    );
+                }
+                None
+            }
+            State::Match { id } => {
+                slots.copy_from_slice(thread_caps);
+                Some(id)
+            }
+        }
+    }
+
+    #[inline(always)]
+    fn epsilon_closure(
+        &self,
+        nlist: &mut Threads,
+        thread_caps: &mut [Slot],
+        stack: &mut Vec<FollowEpsilon>,
+        sid: StateID,
+        haystack: &[u8],
+        at: usize,
+    ) {
+        stack.push(FollowEpsilon::StateID(sid));
+        while let Some(frame) = stack.pop() {
+            match frame {
+                FollowEpsilon::StateID(sid) => {
+                    self.epsilon_closure_step(
+                        nlist,
+                        thread_caps,
+                        stack,
+                        sid,
+                        haystack,
+                        at,
+                    );
+                }
+                FollowEpsilon::Capture { slot, pos } => {
+                    thread_caps[slot] = pos;
+                }
+            }
+        }
+    }
+
+    #[inline(always)]
+    fn epsilon_closure_step(
+        &self,
+        nlist: &mut Threads,
+        thread_caps: &mut [Slot],
+        stack: &mut Vec<FollowEpsilon>,
+        mut sid: StateID,
+        haystack: &[u8],
+        at: usize,
+    ) {
+        loop {
+            if !nlist.set.insert(sid) {
+                return;
+            }
+            match *self.nfa.state(sid) {
+                State::Fail
+                | State::Range { .. }
+                | State::Sparse { .. }
+                | State::Match { .. } => {
+                    let t = &mut nlist.caps(sid);
+                    t.copy_from_slice(thread_caps);
+                    return;
+                }
+                State::Look { look, next } => {
+                    if !look.matches(haystack, at) {
+                        return;
+                    }
+                    sid = next;
+                }
+                State::Union { ref alternates } => {
+                    sid = match alternates.get(0) {
+                        None => return,
+                        Some(&sid) => sid,
+                    };
+                    stack.extend(
+                        alternates[1..]
+                            .iter()
+                            .copied()
+                            .rev()
+                            .map(FollowEpsilon::StateID),
+                    );
+                }
+                State::Capture { next, slot } => {
+                    if slot < thread_caps.len() {
+                        stack.push(FollowEpsilon::Capture {
+                            slot,
+                            pos: thread_caps[slot],
+                        });
+                        thread_caps[slot] = Some(at);
+                    }
+                    sid = next;
+                }
+            }
+        }
+    }
+}
+
+/// An iterator over all non-overlapping leftmost matches for a particular
+/// infallible search.
+///
+/// The iterator yields a [`MultiMatch`] value until no more matches could be
+/// found. If the underlying search returns an error, then this panics.
+///
+/// The lifetime variables are as follows:
+///
+/// * `'r` is the lifetime of the regular expression itself.
+/// * `'c` is the lifetime of the mutable cache used during search.
+/// * `'t` is the lifetime of the text being searched.
+#[derive(Debug)]
+pub struct FindLeftmostMatches<'r, 'c, 't> {
+    vm: &'r PikeVM,
+    cache: &'c mut Cache,
+    // scanner: Option<prefilter::Scanner<'r>>,
+    text: &'t [u8],
+    last_end: usize,
+    last_match: Option<usize>,
+}
+
+impl<'r, 'c, 't> FindLeftmostMatches<'r, 'c, 't> {
+    fn new(
+        vm: &'r PikeVM,
+        cache: &'c mut Cache,
+        text: &'t [u8],
+    ) -> FindLeftmostMatches<'r, 'c, 't> {
+        FindLeftmostMatches { vm, cache, text, last_end: 0, last_match: None }
+    }
+}
+
+impl<'r, 'c, 't> Iterator for FindLeftmostMatches<'r, 'c, 't> {
+    // type Item = Captures;
+    type Item = MultiMatch;
+
+    // fn next(&mut self) -> Option<Captures> {
+    fn next(&mut self) -> Option<MultiMatch> {
+        if self.last_end > self.text.len() {
+            return None;
+        }
+        let mut caps = self.vm.create_captures();
+        let m = self.vm.find_leftmost_at(
+            self.cache,
+            self.text,
+            self.last_end,
+            self.text.len(),
+            &mut caps,
+        )?;
+        if m.is_empty() {
+            // This is an empty match. To ensure we make progress, start
+            // the next search at the smallest possible starting position
+            // of the next match following this one.
+            self.last_end = if self.vm.config.get_utf8() {
+                crate::util::next_utf8(self.text, m.end())
+            } else {
+                m.end() + 1
+            };
+            // Don't accept empty matches immediately following a match.
+            // Just move on to the next match.
+            if Some(m.end()) == self.last_match {
+                return self.next();
+            }
+        } else {
+            self.last_end = m.end();
+        }
+        self.last_match = Some(m.end());
+        Some(m)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Captures {
+    slots: Vec<Slot>,
+}
+
+impl Captures {
+    pub fn new(nfa: &NFA) -> Captures {
+        Captures { slots: vec![None; nfa.capture_slot_len()] }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Cache {
+    stack: Vec<FollowEpsilon>,
+    clist: Threads,
+    nlist: Threads,
+}
+
+type Slot = Option<usize>;
+
+#[derive(Clone, Debug)]
+struct Threads {
+    set: SparseSet,
+    caps: Vec<Slot>,
+    slots_per_thread: usize,
+}
+
+#[derive(Clone, Debug)]
+enum FollowEpsilon {
+    StateID(StateID),
+    Capture { slot: usize, pos: Slot },
+}
+
+impl Cache {
+    pub fn new(nfa: &NFA) -> Cache {
+        Cache {
+            stack: vec![],
+            clist: Threads::new(nfa),
+            nlist: Threads::new(nfa),
+        }
+    }
+
+    fn clear(&mut self) {
+        self.stack.clear();
+        self.clist.set.clear();
+        self.nlist.set.clear();
+    }
+
+    fn swap(&mut self) {
+        core::mem::swap(&mut self.clist, &mut self.nlist);
+    }
+}
+
+impl Threads {
+    fn new(nfa: &NFA) -> Threads {
+        let mut threads = Threads {
+            set: SparseSet::new(0),
+            caps: vec![],
+            slots_per_thread: 0,
+        };
+        threads.resize(nfa);
+        threads
+    }
+
+    fn resize(&mut self, nfa: &NFA) {
+        if nfa.states().len() == self.set.capacity() {
+            return;
+        }
+        self.slots_per_thread = nfa.capture_slot_len();
+        self.set.resize(nfa.states().len());
+        self.caps.resize(self.slots_per_thread * nfa.states().len(), None);
+    }
+
+    fn caps(&mut self, sid: StateID) -> &mut [Slot] {
+        let i = sid.as_usize() * self.slots_per_thread;
+        &mut self.caps[i..i + self.slots_per_thread]
+    }
+}
diff --git a/vendor/regex-automata/src/nfa/range_trie.rs b/vendor/regex-automata/src/nfa/thompson/range_trie.rs
index 50767c7c6..92f36ce3a 100644
--- a/vendor/regex-automata/src/nfa/range_trie.rs
+++ b/vendor/regex-automata/src/nfa/thompson/range_trie.rs
@@ -60,7 +60,7 @@
 // Another approach, however, is to reuse an algorithm for constructing a
 // *minimal* DFA from a sorted sequence of inputs. I don't want to go into
 // the full details here, but I explain it in more depth in my blog post on
-// FSTs[1]. Note that the algorithm not invented by me, but was published
+// FSTs[1]. Note that the algorithm was not invented by me, but was published
 // in paper by Daciuk et al. in 2000 called "Incremental Construction of
 // MinimalAcyclic Finite-State Automata." Like the suffix cache approach above,
 // it is also possible to control the amount of extra memory one uses, although
@@ -76,10 +76,11 @@
 //     [BC-BF][80-BF]
 //     [BC-BF][90-BF]
 //
-// Then Daciuk's algorithm also would not work, since there is nothing to
-// handle the fact that the ranges overlap. They would need to be split apart.
-// Thankfully, Thompson's algorithm for producing byte ranges for Unicode
-// codepoint ranges meets both of our requirements.
+// Then Daciuk's algorithm would not work, since there is nothing to handle the
+// fact that the ranges overlap. They would need to be split apart. Thankfully,
+// Thompson's algorithm for producing byte ranges for Unicode codepoint ranges
+// meets both of our requirements. (A proof for this eludes me, but it appears
+// true.)
 //
 // ... however, we would also like to be able to compile UTF-8 automata in
 // reverse. We want this because in order to find the starting location of a
@@ -139,11 +140,9 @@
 // [1] - https://blog.burntsushi.net/transducers/
 // [2] - https://www.mitpressjournals.org/doi/pdfplus/10.1162/089120100561601
 
-use std::cell::RefCell;
-use std::fmt;
-use std::mem;
-use std::ops::RangeInclusive;
-use std::u32;
+use core::{cell::RefCell, fmt, mem, ops::RangeInclusive, u32};
+
+use alloc::{format, string::String, vec, vec::Vec};
 
 use regex_syntax::utf8::Utf8Range;
 
@@ -188,8 +187,8 @@ pub struct RangeTrie {
     /// particular order.
     states: Vec<State>,
     /// A free-list of states. When a range trie is cleared, all of its states
-    /// are added to list. Creating a new state reuses states from this list
-    /// before allocating a new one.
+    /// are added to this list. Creating a new state reuses states from this
+    /// list before allocating a new one.
     free: Vec<State>,
     /// A stack for traversing this trie to yield sequences of byte ranges in
     /// lexicographic order.
@@ -197,7 +196,7 @@ pub struct RangeTrie {
     /// A bufer that stores the current sequence during iteration.
     iter_ranges: RefCell<Vec<Utf8Range>>,
     /// A stack used for traversing the trie in order to (deeply) duplicate
-    /// a state.
+    /// a state. States are recursively duplicated when ranges are split.
     dupe_stack: Vec<NextDupe>,
     /// A stack used for traversing the trie during insertion of a new
     /// sequence of byte ranges.
@@ -249,7 +248,10 @@ impl RangeTrie {
     /// Iterate over all of the sequences of byte ranges in this trie, and
     /// call the provided function for each sequence. Iteration occurs in
     /// lexicographic order.
-    pub fn iter<F: FnMut(&[Utf8Range])>(&self, mut f: F) {
+    pub fn iter<E, F: FnMut(&[Utf8Range]) -> Result<(), E>>(
+        &self,
+        mut f: F,
+    ) -> Result<(), E> {
         let mut stack = self.iter_stack.borrow_mut();
         stack.clear();
         let mut ranges = self.iter_ranges.borrow_mut();
@@ -264,7 +266,7 @@ impl RangeTrie {
             // here, but at the cost of more stack pushes.
             loop {
                 let state = self.state(state_id);
-                // If we're visited all transitions in this state, then pop
+                // If we've visited all transitions in this state, then pop
                 // back to the parent state.
                 if tidx >= state.transitions.len() {
                     ranges.pop();
@@ -274,7 +276,7 @@ impl RangeTrie {
                 let t = &state.transitions[tidx];
                 ranges.push(t.range);
                 if t.next_id == FINAL {
-                    f(&ranges);
+                    f(&ranges)?;
                     ranges.pop();
                     tidx += 1;
                 } else {
@@ -288,6 +290,7 @@ impl RangeTrie {
                 }
             }
         }
+        Ok(())
     }
 
     /// Inserts a new sequence of ranges into this trie.
@@ -455,8 +458,8 @@ impl RangeTrie {
     /// the given state ID and the returned state ID share nothing.
     ///
     /// This is useful during range trie insertion when a new range overlaps
-    /// with an existing range that is bigger than the new one. The part of
-    /// the existing range that does *not* overlap with the new one is that
+    /// with an existing range that is bigger than the new one. The part
+    /// of the existing range that does *not* overlap with the new one is
     /// duplicated so that adding the new range to the overlap doesn't disturb
     /// the non-overlapping portion.
     ///
@@ -594,7 +597,7 @@ impl State {
         // Benchmarks suggest that binary search is just a bit faster than
         // straight linear search. Specifically when using the debug tool:
         //
-        //   hyperfine "regex-automata-debug debug -acqr '\w{40} ecurB'"
+        //   hyperfine "regex-cli debug nfa thompson --quiet --reverse '\w{90} ecurB'"
         binary_search(&self.transitions, |t| range.start <= t.range.end)
     }
 
@@ -865,7 +868,7 @@ impl Split {
 }
 
 impl fmt::Debug for RangeTrie {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         writeln!(f, "")?;
         for (i, state) in self.states.iter().enumerate() {
             let status = if i == FINAL as usize { '*' } else { ' ' };
@@ -876,7 +879,7 @@ impl fmt::Debug for RangeTrie {
 }
 
 impl fmt::Debug for State {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let rs = self
             .transitions
             .iter()
@@ -888,7 +891,7 @@ impl fmt::Debug for State {
 }
 
 impl fmt::Debug for Transition {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         if self.range.start == self.range.end {
             write!(f, "{:02X} => {:02X}", self.range.start, self.next_id)
         } else {
@@ -908,7 +911,7 @@ fn intersects(r1: Utf8Range, r2: Utf8Range) -> bool {
 
 #[cfg(test)]
 mod tests {
-    use std::ops::RangeInclusive;
+    use core::ops::RangeInclusive;
 
     use regex_syntax::utf8::Utf8Range;