From 9918693037dce8aa4bb6f08741b6812923486c18 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 19 Jun 2024 11:26:03 +0200
Subject: Merging upstream version 1.76.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 vendor/regex-automata/src/dfa/accel.rs     |  13 +-
 vendor/regex-automata/src/dfa/automaton.rs | 190 ++++++++++++++++---
 vendor/regex-automata/src/dfa/dense.rs     | 138 +++++++-------
 vendor/regex-automata/src/dfa/mod.rs       |   2 +-
 vendor/regex-automata/src/dfa/onepass.rs   |  18 +-
 vendor/regex-automata/src/dfa/regex.rs     |   2 +-
 vendor/regex-automata/src/dfa/search.rs    |  10 -
 vendor/regex-automata/src/dfa/sparse.rs    | 293 ++++++++++++++---------------
 8 files changed, 396 insertions(+), 270 deletions(-)

(limited to 'vendor/regex-automata/src/dfa')

diff --git a/vendor/regex-automata/src/dfa/accel.rs b/vendor/regex-automata/src/dfa/accel.rs
index 5ea2423dd..c0ba18ea8 100644
--- a/vendor/regex-automata/src/dfa/accel.rs
+++ b/vendor/regex-automata/src/dfa/accel.rs
@@ -6,15 +6,16 @@
 // non-Unicode regexes. For example, consider '(?-u)[^a]+a'. We can look at its
 // DFA with regex-cli:
 //
-//     $ regex-cli debug dfa dense '(?-u)[^a]+a' -BbC
-//     dense::DFA(
+//     $ regex-cli debug dense dfa -p '(?-u)[^a]+a' -BbC --no-table
 //     D 000000:
 //     Q 000001:
 //      *000002:
-//     A 000003: \x00-` => 3, a => 5, b-\xFF => 3
-//      >000004: \x00-` => 3, a => 4, b-\xFF => 3
-//       000005: \x00-\xFF => 2, EOI => 2
-//     )
+//     A 000003: \x00-` => 3, a => 8, b-\xFF => 3
+//     A 000004: \x00-` => 4, a => 7, b-\xFF => 4
+//       000005: \x00-` => 4, b-\xFF => 4
+//       000006: \x00-` => 3, a => 6, b-\xFF => 3
+//       000007: \x00-\xFF => 2, EOI => 2
+//       000008: \x00-\xFF => 2, EOI => 2
 //
 // In particular, state 3 is accelerated (shown via the 'A' indicator) since
 // the only way to leave that state once entered is to see an 'a' byte. If
diff --git a/vendor/regex-automata/src/dfa/automaton.rs b/vendor/regex-automata/src/dfa/automaton.rs
index 7e2be9a15..fcfcf2997 100644
--- a/vendor/regex-automata/src/dfa/automaton.rs
+++ b/vendor/regex-automata/src/dfa/automaton.rs
@@ -7,6 +7,7 @@ use crate::{
         prefilter::Prefilter,
         primitives::{PatternID, StateID},
         search::{Anchored, HalfMatch, Input, MatchError},
+        start,
     },
 };
 
@@ -226,8 +227,8 @@ pub unsafe trait Automaton {
     /// ```
     fn next_eoi_state(&self, current: StateID) -> StateID;
 
-    /// Return the ID of the start state for this lazy DFA when executing a
-    /// forward search.
+    /// Return the ID of the start state for this DFA for the given starting
+    /// configuration.
     ///
     /// Unlike typical DFA implementations, the start state for DFAs in this
     /// crate is dependent on a few different factors:
@@ -235,12 +236,41 @@ pub unsafe trait Automaton {
     /// * The [`Anchored`] mode of the search. Unanchored, anchored and
     /// anchored searches for a specific [`PatternID`] all use different start
     /// states.
-    /// * The position at which the search begins, via [`Input::start`]. This
-    /// and the byte immediately preceding the start of the search (if one
-    /// exists) influence which look-behind assertions are true at the start
-    /// of the search. This in turn influences which start state is selected.
-    /// * Whether the search is a forward or reverse search. This routine can
-    /// only be used for forward searches.
+    /// * Whether a "look-behind" byte exists. For example, the `^` anchor
+    /// matches if and only if there is no look-behind byte.
+    /// * The specific value of that look-behind byte. For example, a `(?m:^)`
+    /// assertion only matches when there is either no look-behind byte, or
+    /// when the look-behind byte is a line terminator.
+    ///
+    /// The [starting configuration](start::Config) provides the above
+    /// information.
+    ///
+    /// This routine can be used for either forward or reverse searches.
+    /// Although, as a convenience, if you have an [`Input`], then it may
+    /// be more succinct to use [`Automaton::start_state_forward`] or
+    /// [`Automaton::start_state_reverse`]. Note, for example, that the
+    /// convenience routines return a [`MatchError`] on failure where as this
+    /// routine returns a [`StartError`].
+    ///
+    /// # Errors
+    ///
+    /// This may return a [`StartError`] if the search needs to give up when
+    /// determining the start state (for example, if it sees a "quit" byte).
+    /// This can also return an error if the given configuration contains an
+    /// unsupported [`Anchored`] configuration.
+    fn start_state(
+        &self,
+        config: &start::Config,
+    ) -> Result<StateID, StartError>;
+
+    /// Return the ID of the start state for this DFA when executing a forward
+    /// search.
+    ///
+    /// This is a convenience routine for calling [`Automaton::start_state`]
+    /// that converts the given [`Input`] to a [start
+    /// configuration](start::Config). Additionally, if an error occurs, it is
+    /// converted from a [`StartError`] to a [`MatchError`] using the offset
+    /// information in the given [`Input`].
     ///
     /// # Errors
     ///
@@ -251,23 +281,30 @@ pub unsafe trait Automaton {
     fn start_state_forward(
         &self,
         input: &Input<'_>,
-    ) -> Result<StateID, MatchError>;
+    ) -> Result<StateID, MatchError> {
+        let config = start::Config::from_input_forward(input);
+        self.start_state(&config).map_err(|err| match err {
+            StartError::Quit { byte } => {
+                let offset = input
+                    .start()
+                    .checked_sub(1)
+                    .expect("no quit in start without look-behind");
+                MatchError::quit(byte, offset)
+            }
+            StartError::UnsupportedAnchored { mode } => {
+                MatchError::unsupported_anchored(mode)
+            }
+        })
+    }
 
-    /// Return the ID of the start state for this lazy DFA when executing a
-    /// reverse search.
+    /// Return the ID of the start state for this DFA when executing a reverse
+    /// search.
     ///
-    /// Unlike typical DFA implementations, the start state for DFAs in this
-    /// crate is dependent on a few different factors:
-    ///
-    /// * The [`Anchored`] mode of the search. Unanchored, anchored and
-    /// anchored searches for a specific [`PatternID`] all use different start
-    /// states.
-    /// * The position at which the search begins, via [`Input::start`]. This
-    /// and the byte immediately preceding the start of the search (if one
-    /// exists) influence which look-behind assertions are true at the start
-    /// of the search. This in turn influences which start state is selected.
-    /// * Whether the search is a forward or reverse search. This routine can
-    /// only be used for reverse searches.
+    /// This is a convenience routine for calling [`Automaton::start_state`]
+    /// that converts the given [`Input`] to a [start
+    /// configuration](start::Config). Additionally, if an error occurs, it is
+    /// converted from a [`StartError`] to a [`MatchError`] using the offset
+    /// information in the given [`Input`].
     ///
     /// # Errors
     ///
@@ -278,7 +315,18 @@ pub unsafe trait Automaton {
     fn start_state_reverse(
         &self,
         input: &Input<'_>,
-    ) -> Result<StateID, MatchError>;
+    ) -> Result<StateID, MatchError> {
+        let config = start::Config::from_input_reverse(input);
+        self.start_state(&config).map_err(|err| match err {
+            StartError::Quit { byte } => {
+                let offset = input.end();
+                MatchError::quit(byte, offset)
+            }
+            StartError::UnsupportedAnchored { mode } => {
+                MatchError::unsupported_anchored(mode)
+            }
+        })
+    }
 
     /// If this DFA has a universal starting state for the given anchor mode
     /// and the DFA supports universal starting states, then this returns that
@@ -1084,7 +1132,7 @@ pub unsafe trait Automaton {
     /// // implementation defined.
     /// //
     /// // N.B. We get '3' by inspecting the state machine using 'regex-cli'.
-    /// // e.g., try `regex-cli debug dfa dense '[^abc]+a' -BbUC`.
+    /// // e.g., try `regex-cli debug dense dfa -p '[^abc]+a' -BbUC`.
     /// let id = StateID::new(3 * dfa.stride()).unwrap();
     /// let accelerator = dfa.accelerator(id);
     /// // The `[^abc]+` sub-expression permits [a, b, c] to be accelerated.
@@ -1798,6 +1846,14 @@ unsafe impl<'a, A: Automaton + ?Sized> Automaton for &'a A {
         (**self).next_eoi_state(current)
     }
 
+    #[inline]
+    fn start_state(
+        &self,
+        config: &start::Config,
+    ) -> Result<StateID, StartError> {
+        (**self).start_state(config)
+    }
+
     #[inline]
     fn start_state_forward(
         &self,
@@ -2015,6 +2071,90 @@ impl OverlappingState {
     }
 }
 
+/// An error that can occur when computing the start state for a search.
+///
+/// Computing a start state can fail for a few reasons, either based on
+/// incorrect configuration or even based on whether the look-behind byte
+/// triggers a quit state. Typically one does not need to handle this error
+/// if you're using [`Automaton::start_state_forward`] (or its reverse
+/// counterpart), as that routine automatically converts `StartError` to a
+/// [`MatchError`] for you.
+///
+/// This error may be returned by the [`Automaton::start_state`] routine.
+///
+/// This error implements the `std::error::Error` trait when the `std` feature
+/// is enabled.
+///
+/// This error is marked as non-exhaustive. New variants may be added in a
+/// semver compatible release.
+#[non_exhaustive]
+#[derive(Clone, Debug)]
+pub enum StartError {
+    /// An error that occurs when a starting configuration's look-behind byte
+    /// is in this DFA's quit set.
+    Quit {
+        /// The quit byte that was found.
+        byte: u8,
+    },
+    /// An error that occurs when the caller requests an anchored mode that
+    /// isn't supported by the DFA.
+    UnsupportedAnchored {
+        /// The anchored mode given that is unsupported.
+        mode: Anchored,
+    },
+}
+
+impl StartError {
+    pub(crate) fn quit(byte: u8) -> StartError {
+        StartError::Quit { byte }
+    }
+
+    pub(crate) fn unsupported_anchored(mode: Anchored) -> StartError {
+        StartError::UnsupportedAnchored { mode }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for StartError {}
+
+impl core::fmt::Display for StartError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match *self {
+            StartError::Quit { byte } => write!(
+                f,
+                "error computing start state because the look-behind byte \
+                 {:?} triggered a quit state",
+                crate::util::escape::DebugByte(byte),
+            ),
+            StartError::UnsupportedAnchored { mode: Anchored::Yes } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     anchored searches are not supported or enabled"
+                )
+            }
+            StartError::UnsupportedAnchored { mode: Anchored::No } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     unanchored searches are not supported or enabled"
+                )
+            }
+            StartError::UnsupportedAnchored {
+                mode: Anchored::Pattern(pid),
+            } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     anchored searches for a specific pattern ({}) \
+                     are not supported or enabled",
+                    pid.as_usize(),
+                )
+            }
+        }
+    }
+}
+
 /// Runs the given overlapping `search` function (forwards or backwards) until
 /// a match is found whose offset does not split a codepoint.
 ///
diff --git a/vendor/regex-automata/src/dfa/dense.rs b/vendor/regex-automata/src/dfa/dense.rs
index 6da865f97..fd96bc878 100644
--- a/vendor/regex-automata/src/dfa/dense.rs
+++ b/vendor/regex-automata/src/dfa/dense.rs
@@ -30,7 +30,7 @@ use crate::{
 use crate::{
     dfa::{
         accel::Accels,
-        automaton::{fmt_state_indicator, Automaton},
+        automaton::{fmt_state_indicator, Automaton, StartError},
         special::Special,
         start::StartKind,
         DEAD,
@@ -40,8 +40,8 @@ use crate::{
         int::{Pointer, Usize},
         prefilter::Prefilter,
         primitives::{PatternID, StateID},
-        search::{Anchored, Input, MatchError},
-        start::{Start, StartByteMap},
+        search::Anchored,
+        start::{self, Start, StartByteMap},
         wire::{self, DeserializeError, Endian, SerializeError},
     },
 };
@@ -66,8 +66,9 @@ const VERSION: u32 = 2;
 ///
 /// The default configuration guarantees that a search will never return
 /// a "quit" error, although it is possible for a search to fail if
-/// [`Config::starts_for_each_pattern`] wasn't enabled (which it is not by
-/// default) and an [`Anchored::Pattern`] mode is requested via [`Input`].
+/// [`Config::starts_for_each_pattern`] wasn't enabled (which it is
+/// not by default) and an [`Anchored::Pattern`] mode is requested via
+/// [`Input`](crate::Input).
 #[cfg(feature = "dfa-build")]
 #[derive(Clone, Debug, Default)]
 pub struct Config {
@@ -113,8 +114,7 @@ impl Config {
     /// make searching slower than it otherwise would be if the transitions
     /// that leave accelerated states are traversed frequently.
     ///
-    /// See [`Automaton::accelerator`](crate::dfa::Automaton::accelerator) for
-    /// an example.
+    /// See [`Automaton::accelerator`] for an example.
     ///
     /// This is enabled by default.
     pub fn accelerate(mut self, yes: bool) -> Config {
@@ -882,20 +882,20 @@ impl Config {
     /// # if !cfg!(target_pointer_width = "64") { return Ok(()); } // see #1039
     /// use regex_automata::{dfa::{dense, Automaton}, Input};
     ///
-    /// // 600KB isn't enough!
+    /// // 700KB isn't enough!
     /// dense::Builder::new()
     ///     .configure(dense::Config::new()
-    ///         .determinize_size_limit(Some(600_000))
+    ///         .determinize_size_limit(Some(700_000))
     ///     )
     ///     .build(r"\w{20}")
     ///     .unwrap_err();
     ///
-    /// // ... but 700KB probably is!
+    /// // ... but 800KB probably is!
     /// // (Note that auxiliary storage sizes aren't necessarily stable between
     /// // releases.)
     /// let dfa = dense::Builder::new()
     ///     .configure(dense::Config::new()
-    ///         .determinize_size_limit(Some(700_000))
+    ///         .determinize_size_limit(Some(800_000))
     ///     )
     ///     .build(r"\w{20}")?;
     /// let haystack = "A".repeat(20).into_bytes();
@@ -1228,13 +1228,14 @@ impl Builder {
         } else {
             let mut set = nfa.byte_class_set().clone();
             // It is important to distinguish any "quit" bytes from all other
-            // bytes. Otherwise, a non-quit byte may end up in the same class
-            // as a quit byte, and thus cause the DFA stop when it shouldn't.
+            // bytes. Otherwise, a non-quit byte may end up in the same
+            // class as a quit byte, and thus cause the DFA to stop when it
+            // shouldn't.
             //
             // Test case:
             //
-            //   regex-cli find hybrid regex -w @conn.json.1000x.log \
-            //     '^#' '\b10\.55\.182\.100\b'
+            //   regex-cli find match dense --unicode-word-boundary \
+            //     -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log
             if !quitset.is_empty() {
                 set.add_set(&quitset);
             }
@@ -2345,6 +2346,24 @@ impl<'a> DFA<&'a [u32]> {
         dfa.accels.validate()?;
         // N.B. dfa.special doesn't have a way to do unchecked deserialization,
         // so it has already been validated.
+        for state in dfa.states() {
+            // If the state is an accel state, then it must have a non-empty
+            // accelerator.
+            if dfa.is_accel_state(state.id()) {
+                let index = dfa.accelerator_index(state.id());
+                if index >= dfa.accels.len() {
+                    return Err(DeserializeError::generic(
+                        "found DFA state with invalid accelerator index",
+                    ));
+                }
+                let needles = dfa.accels.needles(index);
+                if !(1 <= needles.len() && needles.len() <= 3) {
+                    return Err(DeserializeError::generic(
+                        "accelerator needles has invalid length",
+                    ));
+                }
+            }
+        }
         Ok((dfa, nread))
     }
 
@@ -2885,31 +2904,33 @@ impl OwnedDFA {
     fn set_universal_starts(&mut self) {
         assert_eq!(6, Start::len(), "expected 6 start configurations");
 
-        let start_id = |dfa: &mut OwnedDFA, inp: &Input<'_>, start: Start| {
+        let start_id = |dfa: &mut OwnedDFA,
+                        anchored: Anchored,
+                        start: Start| {
             // This OK because we only call 'start' under conditions
             // in which we know it will succeed.
-            dfa.st.start(inp, start).expect("valid Input configuration")
+            dfa.st.start(anchored, start).expect("valid Input configuration")
         };
         if self.start_kind().has_unanchored() {
-            let inp = Input::new("").anchored(Anchored::No);
-            let sid = start_id(self, &inp, Start::NonWordByte);
-            if sid == start_id(self, &inp, Start::WordByte)
-                && sid == start_id(self, &inp, Start::Text)
-                && sid == start_id(self, &inp, Start::LineLF)
-                && sid == start_id(self, &inp, Start::LineCR)
-                && sid == start_id(self, &inp, Start::CustomLineTerminator)
+            let anchor = Anchored::No;
+            let sid = start_id(self, anchor, Start::NonWordByte);
+            if sid == start_id(self, anchor, Start::WordByte)
+                && sid == start_id(self, anchor, Start::Text)
+                && sid == start_id(self, anchor, Start::LineLF)
+                && sid == start_id(self, anchor, Start::LineCR)
+                && sid == start_id(self, anchor, Start::CustomLineTerminator)
             {
                 self.st.universal_start_unanchored = Some(sid);
             }
         }
         if self.start_kind().has_anchored() {
-            let inp = Input::new("").anchored(Anchored::Yes);
-            let sid = start_id(self, &inp, Start::NonWordByte);
-            if sid == start_id(self, &inp, Start::WordByte)
-                && sid == start_id(self, &inp, Start::Text)
-                && sid == start_id(self, &inp, Start::LineLF)
-                && sid == start_id(self, &inp, Start::LineCR)
-                && sid == start_id(self, &inp, Start::CustomLineTerminator)
+            let anchor = Anchored::Yes;
+            let sid = start_id(self, anchor, Start::NonWordByte);
+            if sid == start_id(self, anchor, Start::WordByte)
+                && sid == start_id(self, anchor, Start::Text)
+                && sid == start_id(self, anchor, Start::LineLF)
+                && sid == start_id(self, anchor, Start::LineCR)
+                && sid == start_id(self, anchor, Start::CustomLineTerminator)
             {
                 self.st.universal_start_anchored = Some(sid);
             }
@@ -3216,35 +3237,21 @@ unsafe impl<T: AsRef<[u32]>> Automaton for DFA<T> {
     }
 
     #[cfg_attr(feature = "perf-inline", inline(always))]
-    fn start_state_forward(
-        &self,
-        input: &Input<'_>,
-    ) -> Result<StateID, MatchError> {
-        if !self.quitset.is_empty() && input.start() > 0 {
-            let offset = input.start() - 1;
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
-            }
-        }
-        let start = self.st.start_map.fwd(&input);
-        self.st.start(input, start)
-    }
-
-    #[cfg_attr(feature = "perf-inline", inline(always))]
-    fn start_state_reverse(
+    fn start_state(
         &self,
-        input: &Input<'_>,
-    ) -> Result<StateID, MatchError> {
-        if !self.quitset.is_empty() && input.end() < input.haystack().len() {
-            let offset = input.end();
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
+        config: &start::Config,
+    ) -> Result<StateID, StartError> {
+        let anchored = config.get_anchored();
+        let start = match config.get_look_behind() {
+            None => Start::Text,
+            Some(byte) => {
+                if !self.quitset.is_empty() && self.quitset.contains(byte) {
+                    return Err(StartError::quit(byte));
+                }
+                self.st.start_map.get(byte)
             }
-        }
-        let start = self.st.start_map.rev(&input);
-        self.st.start(input, start)
+        };
+        self.st.start(anchored, start)
     }
 
     #[cfg_attr(feature = "perf-inline", inline(always))]
@@ -4180,28 +4187,27 @@ impl<T: AsRef<[u32]>> StartTable<T> {
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn start(
         &self,
-        input: &Input<'_>,
+        anchored: Anchored,
         start: Start,
-    ) -> Result<StateID, MatchError> {
+    ) -> Result<StateID, StartError> {
         let start_index = start.as_usize();
-        let mode = input.get_anchored();
-        let index = match mode {
+        let index = match anchored {
             Anchored::No => {
                 if !self.kind.has_unanchored() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 start_index
             }
             Anchored::Yes => {
                 if !self.kind.has_anchored() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 self.stride + start_index
             }
             Anchored::Pattern(pid) => {
                 let len = match self.pattern_len {
                     None => {
-                        return Err(MatchError::unsupported_anchored(mode))
+                        return Err(StartError::unsupported_anchored(anchored))
                     }
                     Some(len) => len,
                 };
@@ -5086,6 +5092,8 @@ impl core::fmt::Display for BuildError {
 
 #[cfg(all(test, feature = "syntax", feature = "dfa-build"))]
 mod tests {
+    use crate::{Input, MatchError};
+
     use super::*;
 
     #[test]
diff --git a/vendor/regex-automata/src/dfa/mod.rs b/vendor/regex-automata/src/dfa/mod.rs
index 4bb870435..fd58cac23 100644
--- a/vendor/regex-automata/src/dfa/mod.rs
+++ b/vendor/regex-automata/src/dfa/mod.rs
@@ -320,7 +320,7 @@ dramatically.
 
 #[cfg(feature = "dfa-search")]
 pub use crate::dfa::{
-    automaton::{Automaton, OverlappingState},
+    automaton::{Automaton, OverlappingState, StartError},
     start::StartKind,
 };
 
diff --git a/vendor/regex-automata/src/dfa/onepass.rs b/vendor/regex-automata/src/dfa/onepass.rs
index 44691d0c8..e62bbd383 100644
--- a/vendor/regex-automata/src/dfa/onepass.rs
+++ b/vendor/regex-automata/src/dfa/onepass.rs
@@ -2581,10 +2581,11 @@ impl Cache {
 
 /// Represents a single transition in a one-pass DFA.
 ///
-/// The high 24 bits corresponds to the state ID. The low 48 bits corresponds
-/// to the transition epsilons, which contains the slots that should be saved
-/// when this transition is followed and the conditional epsilon transitions
-/// that must be satisfied in order to follow this transition.
+/// The high 21 bits corresponds to the state ID. The bit following corresponds
+/// to the special "match wins" flag. The remaining low 42 bits corresponds to
+/// the transition epsilons, which contains the slots that should be saved when
+/// this transition is followed and the conditional epsilon transitions that
+/// must be satisfied in order to follow this transition.
 #[derive(Clone, Copy, Eq, PartialEq)]
 struct Transition(u64);
 
@@ -2741,7 +2742,7 @@ impl PatternEpsilons {
     fn set_epsilons(self, epsilons: Epsilons) -> PatternEpsilons {
         PatternEpsilons(
             (self.0 & PatternEpsilons::PATTERN_ID_MASK)
-                | u64::from(epsilons.0),
+                | (u64::from(epsilons.0) & PatternEpsilons::EPSILONS_MASK),
         )
     }
 }
@@ -2814,12 +2815,15 @@ impl Epsilons {
 
     /// Return the set of look-around assertions in these epsilon transitions.
     fn looks(self) -> LookSet {
-        LookSet { bits: (self.0 & Epsilons::LOOK_MASK).low_u16() }
+        LookSet { bits: (self.0 & Epsilons::LOOK_MASK).low_u32() }
     }
 
     /// Set the look-around assertions on these epsilon transitions.
     fn set_looks(self, look_set: LookSet) -> Epsilons {
-        Epsilons((self.0 & Epsilons::SLOT_MASK) | u64::from(look_set.bits))
+        Epsilons(
+            (self.0 & Epsilons::SLOT_MASK)
+                | (u64::from(look_set.bits) & Epsilons::LOOK_MASK),
+        )
     }
 }
 
diff --git a/vendor/regex-automata/src/dfa/regex.rs b/vendor/regex-automata/src/dfa/regex.rs
index f39c1c055..5e7e6e38a 100644
--- a/vendor/regex-automata/src/dfa/regex.rs
+++ b/vendor/regex-automata/src/dfa/regex.rs
@@ -853,7 +853,7 @@ impl Builder {
     }
 
     /// Set the dense DFA compilation configuration for this builder using
-    /// [`dense::Config`](dense::Config).
+    /// [`dense::Config`].
     ///
     /// This permits setting things like whether the underlying DFAs should
     /// be minimized.
diff --git a/vendor/regex-automata/src/dfa/search.rs b/vendor/regex-automata/src/dfa/search.rs
index 8c012a594..5a82261f9 100644
--- a/vendor/regex-automata/src/dfa/search.rs
+++ b/vendor/regex-automata/src/dfa/search.rs
@@ -176,7 +176,6 @@ fn find_fwd_imp<A: Automaton + ?Sized>(
                 // It's important that this is a debug_assert, since this can
                 // actually be tripped even if DFA::from_bytes succeeds and
                 // returns a supposedly valid DFA.
-                debug_assert!(dfa.is_quit_state(sid));
                 return Err(MatchError::quit(input.haystack()[at], at));
             }
         }
@@ -297,7 +296,6 @@ fn find_rev_imp<A: Automaton + ?Sized>(
             } else if dfa.is_dead_state(sid) {
                 return Ok(mat);
             } else {
-                debug_assert!(dfa.is_quit_state(sid));
                 return Err(MatchError::quit(input.haystack()[at], at));
             }
         }
@@ -422,7 +420,6 @@ fn find_overlapping_fwd_imp<A: Automaton + ?Sized>(
             } else if dfa.is_dead_state(sid) {
                 return Ok(());
             } else {
-                debug_assert!(dfa.is_quit_state(sid));
                 return Err(MatchError::quit(
                     input.haystack()[state.at],
                     state.at,
@@ -526,7 +523,6 @@ pub(crate) fn find_overlapping_rev<A: Automaton + ?Sized>(
             } else if dfa.is_dead_state(sid) {
                 return Ok(());
             } else {
-                debug_assert!(dfa.is_quit_state(sid));
                 return Err(MatchError::quit(
                     input.haystack()[state.at],
                     state.at,
@@ -600,9 +596,6 @@ fn eoi_fwd<A: Automaton + ?Sized>(
                 let pattern = dfa.match_pattern(*sid, 0);
                 *mat = Some(HalfMatch::new(pattern, input.haystack().len()));
             }
-            // N.B. We don't have to check 'is_quit' here because the EOI
-            // transition can never lead to a quit state.
-            debug_assert!(!dfa.is_quit_state(*sid));
         }
     }
     Ok(())
@@ -631,9 +624,6 @@ fn eoi_rev<A: Automaton + ?Sized>(
             let pattern = dfa.match_pattern(*sid, 0);
             *mat = Some(HalfMatch::new(pattern, 0));
         }
-        // N.B. We don't have to check 'is_quit' here because the EOI
-        // transition can never lead to a quit state.
-        debug_assert!(!dfa.is_quit_state(*sid));
     }
     Ok(())
 }
diff --git a/vendor/regex-automata/src/dfa/sparse.rs b/vendor/regex-automata/src/dfa/sparse.rs
index 5d8ec2340..d461e0a0f 100644
--- a/vendor/regex-automata/src/dfa/sparse.rs
+++ b/vendor/regex-automata/src/dfa/sparse.rs
@@ -3,13 +3,12 @@ Types and routines specific to sparse DFAs.
 
 This module is the home of [`sparse::DFA`](DFA).
 
-Unlike the [`dense`](super::dense) module, this module does not contain a
-builder or configuration specific for sparse DFAs. Instead, the intended
-way to build a sparse DFA is either by using a default configuration with
-its constructor [`sparse::DFA::new`](DFA::new), or by first configuring the
-construction of a dense DFA with [`dense::Builder`](super::dense::Builder)
-and then calling [`dense::DFA::to_sparse`](super::dense::DFA::to_sparse). For
-example, this configures a sparse DFA to do an overlapping search:
+Unlike the [`dense`] module, this module does not contain a builder or
+configuration specific for sparse DFAs. Instead, the intended way to build a
+sparse DFA is either by using a default configuration with its constructor
+[`sparse::DFA::new`](DFA::new), or by first configuring the construction of a
+dense DFA with [`dense::Builder`] and then calling [`dense::DFA::to_sparse`].
+For example, this configures a sparse DFA to do an overlapping search:
 
 ```
 use regex_automata::{
@@ -52,7 +51,7 @@ use alloc::{vec, vec::Vec};
 use crate::dfa::dense::{self, BuildError};
 use crate::{
     dfa::{
-        automaton::{fmt_state_indicator, Automaton},
+        automaton::{fmt_state_indicator, Automaton, StartError},
         dense::Flags,
         special::Special,
         StartKind, DEAD,
@@ -63,8 +62,8 @@ use crate::{
         int::{Pointer, Usize, U16, U32},
         prefilter::Prefilter,
         primitives::{PatternID, StateID},
-        search::{Anchored, Input, MatchError},
-        start::{Start, StartByteMap},
+        search::Anchored,
+        start::{self, Start, StartByteMap},
         wire::{self, DeserializeError, Endian, SerializeError},
     },
 };
@@ -74,18 +73,17 @@ const VERSION: u32 = 2;
 
 /// A sparse deterministic finite automaton (DFA) with variable sized states.
 ///
-/// In contrast to a [dense::DFA](crate::dfa::dense::DFA), a sparse DFA uses
-/// a more space efficient representation for its transitions. Consequently,
-/// sparse DFAs may use much less memory than dense DFAs, but this comes at a
-/// price. In particular, reading the more space efficient transitions takes
-/// more work, and consequently, searching using a sparse DFA is typically
-/// slower than a dense DFA.
+/// In contrast to a [dense::DFA], a sparse DFA uses a more space efficient
+/// representation for its transitions. Consequently, sparse DFAs may use much
+/// less memory than dense DFAs, but this comes at a price. In particular,
+/// reading the more space efficient transitions takes more work, and
+/// consequently, searching using a sparse DFA is typically slower than a dense
+/// DFA.
 ///
 /// A sparse DFA can be built using the default configuration via the
-/// [`DFA::new`] constructor. Otherwise, one can configure various aspects
-/// of a dense DFA via [`dense::Builder`](crate::dfa::dense::Builder),
-/// and then convert a dense DFA to a sparse DFA using
-/// [`dense::DFA::to_sparse`](crate::dfa::dense::DFA::to_sparse).
+/// [`DFA::new`] constructor. Otherwise, one can configure various aspects of a
+/// dense DFA via [`dense::Builder`], and then convert a dense DFA to a sparse
+/// DFA using [`dense::DFA::to_sparse`].
 ///
 /// In general, a sparse DFA supports all the same search operations as a dense
 /// DFA.
@@ -140,11 +138,9 @@ impl DFA<Vec<u8>> {
     /// Parse the given regular expression using a default configuration and
     /// return the corresponding sparse DFA.
     ///
-    /// If you want a non-default configuration, then use
-    /// the [`dense::Builder`](crate::dfa::dense::Builder)
-    /// to set your own configuration, and then call
-    /// [`dense::DFA::to_sparse`](crate::dfa::dense::DFA::to_sparse) to create
-    /// a sparse DFA.
+    /// If you want a non-default configuration, then use the
+    /// [`dense::Builder`] to set your own configuration, and then call
+    /// [`dense::DFA::to_sparse`] to create a sparse DFA.
     ///
     /// # Example
     ///
@@ -167,11 +163,9 @@ impl DFA<Vec<u8>> {
     /// Parse the given regular expressions using a default configuration and
     /// return the corresponding multi-DFA.
     ///
-    /// If you want a non-default configuration, then use
-    /// the [`dense::Builder`](crate::dfa::dense::Builder)
-    /// to set your own configuration, and then call
-    /// [`dense::DFA::to_sparse`](crate::dfa::dense::DFA::to_sparse) to create
-    /// a sparse DFA.
+    /// If you want a non-default configuration, then use the
+    /// [`dense::Builder`] to set your own configuration, and then call
+    /// [`dense::DFA::to_sparse`] to create a sparse DFA.
     ///
     /// # Example
     ///
@@ -511,10 +505,9 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// * [`DFA::from_bytes`]
     /// * [`DFA::from_bytes_unchecked`]
     ///
-    /// Note that unlike a [`dense::DFA`](crate::dfa::dense::DFA)'s
-    /// serialization methods, this does not add any initial padding to the
-    /// returned bytes. Padding isn't required for sparse DFAs since they have
-    /// no alignment requirements.
+    /// Note that unlike a [`dense::DFA`]'s serialization methods, this does
+    /// not add any initial padding to the returned bytes. Padding isn't
+    /// required for sparse DFAs since they have no alignment requirements.
     ///
     /// # Example
     ///
@@ -553,10 +546,9 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// * [`DFA::from_bytes`]
     /// * [`DFA::from_bytes_unchecked`]
     ///
-    /// Note that unlike a [`dense::DFA`](crate::dfa::dense::DFA)'s
-    /// serialization methods, this does not add any initial padding to the
-    /// returned bytes. Padding isn't required for sparse DFAs since they have
-    /// no alignment requirements.
+    /// Note that unlike a [`dense::DFA`]'s serialization methods, this does
+    /// not add any initial padding to the returned bytes. Padding isn't
+    /// required for sparse DFAs since they have no alignment requirements.
     ///
     /// # Example
     ///
@@ -595,10 +587,9 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// * [`DFA::from_bytes`]
     /// * [`DFA::from_bytes_unchecked`]
     ///
-    /// Note that unlike a [`dense::DFA`](crate::dfa::dense::DFA)'s
-    /// serialization methods, this does not add any initial padding to the
-    /// returned bytes. Padding isn't required for sparse DFAs since they have
-    /// no alignment requirements.
+    /// Note that unlike a [`dense::DFA`]'s serialization methods, this does
+    /// not add any initial padding to the returned bytes. Padding isn't
+    /// required for sparse DFAs since they have no alignment requirements.
     ///
     /// Generally speaking, native endian format should only be used when
     /// you know that the target you're compiling the DFA for matches the
@@ -903,9 +894,9 @@ impl<'a> DFA<&'a [u8]> {
     ///
     /// If any of the above are not true, then an error will be returned.
     ///
-    /// Note that unlike deserializing a
-    /// [`dense::DFA`](crate::dfa::dense::DFA), deserializing a sparse DFA has
-    /// no alignment requirements. That is, an alignment of `1` is valid.
+    /// Note that unlike deserializing a [`dense::DFA`], deserializing a sparse
+    /// DFA has no alignment requirements. That is, an alignment of `1` is
+    /// valid.
     ///
     /// # Panics
     ///
@@ -1001,8 +992,8 @@ impl<'a> DFA<&'a [u8]> {
         // (by trying to decode every state) and start state ID list below. If
         // either validation fails, then we return an error.
         let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? };
-        dfa.tt.validate(&dfa.special)?;
-        dfa.st.validate(&dfa.special, &dfa.tt)?;
+        let seen = dfa.tt.validate(&dfa.special)?;
+        dfa.st.validate(&dfa.special, &seen)?;
         // N.B. dfa.special doesn't have a way to do unchecked deserialization,
         // so it has already been validated.
         Ok((dfa, nread))
@@ -1207,35 +1198,21 @@ unsafe impl<T: AsRef<[u8]>> Automaton for DFA<T> {
     }
 
     #[inline]
-    fn start_state_forward(
+    fn start_state(
         &self,
-        input: &Input<'_>,
-    ) -> Result<StateID, MatchError> {
-        if !self.quitset.is_empty() && input.start() > 0 {
-            let offset = input.start() - 1;
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
-            }
-        }
-        let start = self.st.start_map.fwd(&input);
-        self.st.start(input, start)
-    }
-
-    #[inline]
-    fn start_state_reverse(
-        &self,
-        input: &Input<'_>,
-    ) -> Result<StateID, MatchError> {
-        if !self.quitset.is_empty() && input.end() < input.haystack().len() {
-            let offset = input.end();
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
+        config: &start::Config,
+    ) -> Result<StateID, StartError> {
+        let anchored = config.get_anchored();
+        let start = match config.get_look_behind() {
+            None => Start::Text,
+            Some(byte) => {
+                if !self.quitset.is_empty() && self.quitset.contains(byte) {
+                    return Err(StartError::quit(byte));
+                }
+                self.st.start_map.get(byte)
             }
-        }
-        let start = self.st.start_map.rev(&input);
-        self.st.start(input, start)
+        };
+        self.st.start(anchored, start)
     }
 
     #[inline]
@@ -1411,63 +1388,8 @@ impl<T: AsRef<[u8]>> Transitions<T> {
     ///
     /// That is, every state ID can be used to correctly index a state in this
     /// table.
-    fn validate(&self, sp: &Special) -> Result<(), DeserializeError> {
-        // In order to validate everything, we not only need to make sure we
-        // can decode every state, but that every transition in every state
-        // points to a valid state. There are many duplicative transitions, so
-        // we record state IDs that we've verified so that we don't redo the
-        // decoding work.
-        //
-        // Except, when in no_std mode, we don't have dynamic memory allocation
-        // available to us, so we skip this optimization. It's not clear
-        // whether doing something more clever is worth it just yet. If you're
-        // profiling this code and need it to run faster, please file an issue.
-        //
-        // OK, so we also use this to record the set of valid state IDs. Since
-        // it is possible for a transition to point to an invalid state ID that
-        // still (somehow) deserializes to a valid state. So we need to make
-        // sure our transitions are limited to actually correct state IDs.
-        // The problem is, I'm not sure how to do this verification step in
-        // no-std no-alloc mode. I think we'd *have* to store the set of valid
-        // state IDs in the DFA itself. For now, we don't do this verification
-        // in no-std no-alloc mode. The worst thing that can happen is an
-        // incorrect result. But no panics or memory safety problems should
-        // result. Because we still do validate that the state itself is
-        // "valid" in the sense that everything it points to actually exists.
-        //
-        // ---AG
-        struct Seen {
-            #[cfg(feature = "alloc")]
-            set: alloc::collections::BTreeSet<StateID>,
-            #[cfg(not(feature = "alloc"))]
-            set: core::marker::PhantomData<StateID>,
-        }
-
-        #[cfg(feature = "alloc")]
-        impl Seen {
-            fn new() -> Seen {
-                Seen { set: alloc::collections::BTreeSet::new() }
-            }
-            fn insert(&mut self, id: StateID) {
-                self.set.insert(id);
-            }
-            fn contains(&self, id: &StateID) -> bool {
-                self.set.contains(id)
-            }
-        }
-
-        #[cfg(not(feature = "alloc"))]
-        impl Seen {
-            fn new() -> Seen {
-                Seen { set: core::marker::PhantomData }
-            }
-            fn insert(&mut self, _id: StateID) {}
-            fn contains(&self, _id: &StateID) -> bool {
-                false
-            }
-        }
-
-        let mut verified: Seen = Seen::new();
+    fn validate(&self, sp: &Special) -> Result<Seen, DeserializeError> {
+        let mut verified = Seen::new();
         // We need to make sure that we decode the correct number of states.
         // Otherwise, an empty set of transitions would validate even if the
         // recorded state length is non-empty.
@@ -1544,7 +1466,7 @@ impl<T: AsRef<[u8]>> Transitions<T> {
                 "mismatching sparse state length",
             ));
         }
-        Ok(())
+        Ok(verified)
     }
 
     /// Converts these transitions to a borrowed value.
@@ -1682,7 +1604,7 @@ impl<T: AsRef<[u8]>> Transitions<T> {
             let state = &state[nr..];
             if npats == 0 {
                 return Err(DeserializeError::generic(
-                    "state marked as a match, but has no pattern IDs",
+                    "state marked as a match, but pattern length is zero",
                 ));
             }
 
@@ -1704,6 +1626,21 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         } else {
             (&[][..], state)
         };
+        if is_match && pattern_ids.is_empty() {
+            return Err(DeserializeError::generic(
+                "state marked as a match, but has no pattern IDs",
+            ));
+        }
+        if sp.is_match_state(id) && pattern_ids.is_empty() {
+            return Err(DeserializeError::generic(
+                "state marked special as a match, but has no pattern IDs",
+            ));
+        }
+        if sp.is_match_state(id) != is_match {
+            return Err(DeserializeError::generic(
+                "whether state is a match or not is inconsistent",
+            ));
+        }
 
         // Now read this state's accelerator info. The first byte is the length
         // of the accelerator, which is typically 0 (for no acceleration) but
@@ -2084,28 +2021,19 @@ impl<T: AsRef<[u8]>> StartTable<T> {
     fn validate(
         &self,
         sp: &Special,
-        trans: &Transitions<T>,
+        seen: &Seen,
     ) -> Result<(), DeserializeError> {
         for (id, _, _) in self.iter() {
+            if !seen.contains(&id) {
+                return Err(DeserializeError::generic(
+                    "found invalid start state ID",
+                ));
+            }
             if sp.is_match_state(id) {
                 return Err(DeserializeError::generic(
                     "start states cannot be match states",
                 ));
             }
-            // Confirm that the start state points to a valid state.
-            let state = trans.try_state(sp, id)?;
-            // And like for the transition table, confirm that the transitions
-            // on all start states themselves point to a valid state.
-            //
-            // It'd probably be better to integrate this validation with the
-            // transition table, or otherwise store a sorted sequence of all
-            // valid state IDs in the sparse DFA itself. That way, we could
-            // check that every pointer to a state corresponds precisely to a
-            // correct and valid state.
-            for i in 0..state.ntrans {
-                let to = state.next_at(i);
-                let _ = trans.try_state(sp, to)?;
-            }
         }
         Ok(())
     }
@@ -2145,28 +2073,27 @@ impl<T: AsRef<[u8]>> StartTable<T> {
     /// panics.
     fn start(
         &self,
-        input: &Input<'_>,
+        anchored: Anchored,
         start: Start,
-    ) -> Result<StateID, MatchError> {
+    ) -> Result<StateID, StartError> {
         let start_index = start.as_usize();
-        let mode = input.get_anchored();
-        let index = match mode {
+        let index = match anchored {
             Anchored::No => {
                 if !self.kind.has_unanchored() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 start_index
             }
             Anchored::Yes => {
                 if !self.kind.has_anchored() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 self.stride + start_index
             }
             Anchored::Pattern(pid) => {
                 let len = match self.pattern_len {
                     None => {
-                        return Err(MatchError::unsupported_anchored(mode))
+                        return Err(StartError::unsupported_anchored(anchored))
                     }
                     Some(len) => len,
                 };
@@ -2561,6 +2488,62 @@ impl<'a> fmt::Debug for StateMut<'a> {
     }
 }
 
+// In order to validate everything, we not only need to make sure we
+// can decode every state, but that every transition in every state
+// points to a valid state. There are many duplicative transitions, so
+// we record state IDs that we've verified so that we don't redo the
+// decoding work.
+//
+// Except, when in no_std mode, we don't have dynamic memory allocation
+// available to us, so we skip this optimization. It's not clear
+// whether doing something more clever is worth it just yet. If you're
+// profiling this code and need it to run faster, please file an issue.
+//
+// OK, so we also use this to record the set of valid state IDs. Since
+// it is possible for a transition to point to an invalid state ID that
+// still (somehow) deserializes to a valid state. So we need to make
+// sure our transitions are limited to actually correct state IDs.
+// The problem is, I'm not sure how to do this verification step in
+// no-std no-alloc mode. I think we'd *have* to store the set of valid
+// state IDs in the DFA itself. For now, we don't do this verification
+// in no-std no-alloc mode. The worst thing that can happen is an
+// incorrect result. But no panics or memory safety problems should
+// result. Because we still do validate that the state itself is
+// "valid" in the sense that everything it points to actually exists.
+//
+// ---AG
+#[derive(Debug)]
+struct Seen {
+    #[cfg(feature = "alloc")]
+    set: alloc::collections::BTreeSet<StateID>,
+    #[cfg(not(feature = "alloc"))]
+    set: core::marker::PhantomData<StateID>,
+}
+
+#[cfg(feature = "alloc")]
+impl Seen {
+    fn new() -> Seen {
+        Seen { set: alloc::collections::BTreeSet::new() }
+    }
+    fn insert(&mut self, id: StateID) {
+        self.set.insert(id);
+    }
+    fn contains(&self, id: &StateID) -> bool {
+        self.set.contains(id)
+    }
+}
+
+#[cfg(not(feature = "alloc"))]
+impl Seen {
+    fn new() -> Seen {
+        Seen { set: core::marker::PhantomData }
+    }
+    fn insert(&mut self, _id: StateID) {}
+    fn contains(&self, _id: &StateID) -> bool {
+        true
+    }
+}
+
 /*
 /// A binary search routine specialized specifically to a sparse DFA state's
 /// transitions. Specifically, the transitions are defined as a set of pairs
-- 
cgit v1.2.3