summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/src/dfa/dense.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:26:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:26:03 +0000
commit9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /vendor/regex-automata/src/dfa/dense.rs
parentReleasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
downloadrustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz
rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip
Merging upstream version 1.76.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-automata/src/dfa/dense.rs')
-rw-r--r--vendor/regex-automata/src/dfa/dense.rs138
1 files changed, 73 insertions, 65 deletions
diff --git a/vendor/regex-automata/src/dfa/dense.rs b/vendor/regex-automata/src/dfa/dense.rs
index 6da865f97..fd96bc878 100644
--- a/vendor/regex-automata/src/dfa/dense.rs
+++ b/vendor/regex-automata/src/dfa/dense.rs
@@ -30,7 +30,7 @@ use crate::{
use crate::{
dfa::{
accel::Accels,
- automaton::{fmt_state_indicator, Automaton},
+ automaton::{fmt_state_indicator, Automaton, StartError},
special::Special,
start::StartKind,
DEAD,
@@ -40,8 +40,8 @@ use crate::{
int::{Pointer, Usize},
prefilter::Prefilter,
primitives::{PatternID, StateID},
- search::{Anchored, Input, MatchError},
- start::{Start, StartByteMap},
+ search::Anchored,
+ start::{self, Start, StartByteMap},
wire::{self, DeserializeError, Endian, SerializeError},
},
};
@@ -66,8 +66,9 @@ const VERSION: u32 = 2;
///
/// The default configuration guarantees that a search will never return
/// a "quit" error, although it is possible for a search to fail if
-/// [`Config::starts_for_each_pattern`] wasn't enabled (which it is not by
-/// default) and an [`Anchored::Pattern`] mode is requested via [`Input`].
+/// [`Config::starts_for_each_pattern`] wasn't enabled (which it is
+/// not by default) and an [`Anchored::Pattern`] mode is requested via
+/// [`Input`](crate::Input).
#[cfg(feature = "dfa-build")]
#[derive(Clone, Debug, Default)]
pub struct Config {
@@ -113,8 +114,7 @@ impl Config {
/// make searching slower than it otherwise would be if the transitions
/// that leave accelerated states are traversed frequently.
///
- /// See [`Automaton::accelerator`](crate::dfa::Automaton::accelerator) for
- /// an example.
+ /// See [`Automaton::accelerator`] for an example.
///
/// This is enabled by default.
pub fn accelerate(mut self, yes: bool) -> Config {
@@ -882,20 +882,20 @@ impl Config {
/// # if !cfg!(target_pointer_width = "64") { return Ok(()); } // see #1039
/// use regex_automata::{dfa::{dense, Automaton}, Input};
///
- /// // 600KB isn't enough!
+ /// // 700KB isn't enough!
/// dense::Builder::new()
/// .configure(dense::Config::new()
- /// .determinize_size_limit(Some(600_000))
+ /// .determinize_size_limit(Some(700_000))
/// )
/// .build(r"\w{20}")
/// .unwrap_err();
///
- /// // ... but 700KB probably is!
+ /// // ... but 800KB probably is!
/// // (Note that auxiliary storage sizes aren't necessarily stable between
/// // releases.)
/// let dfa = dense::Builder::new()
/// .configure(dense::Config::new()
- /// .determinize_size_limit(Some(700_000))
+ /// .determinize_size_limit(Some(800_000))
/// )
/// .build(r"\w{20}")?;
/// let haystack = "A".repeat(20).into_bytes();
@@ -1228,13 +1228,14 @@ impl Builder {
} else {
let mut set = nfa.byte_class_set().clone();
// It is important to distinguish any "quit" bytes from all other
- // bytes. Otherwise, a non-quit byte may end up in the same class
- // as a quit byte, and thus cause the DFA stop when it shouldn't.
+ // bytes. Otherwise, a non-quit byte may end up in the same
+ // class as a quit byte, and thus cause the DFA to stop when it
+ // shouldn't.
//
// Test case:
//
- // regex-cli find hybrid regex -w @conn.json.1000x.log \
- // '^#' '\b10\.55\.182\.100\b'
+ // regex-cli find match dense --unicode-word-boundary \
+ // -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log
if !quitset.is_empty() {
set.add_set(&quitset);
}
@@ -2345,6 +2346,24 @@ impl<'a> DFA<&'a [u32]> {
dfa.accels.validate()?;
// N.B. dfa.special doesn't have a way to do unchecked deserialization,
// so it has already been validated.
+ for state in dfa.states() {
+ // If the state is an accel state, then it must have a non-empty
+ // accelerator.
+ if dfa.is_accel_state(state.id()) {
+ let index = dfa.accelerator_index(state.id());
+ if index >= dfa.accels.len() {
+ return Err(DeserializeError::generic(
+ "found DFA state with invalid accelerator index",
+ ));
+ }
+ let needles = dfa.accels.needles(index);
+ if !(1 <= needles.len() && needles.len() <= 3) {
+ return Err(DeserializeError::generic(
+ "accelerator needles has invalid length",
+ ));
+ }
+ }
+ }
Ok((dfa, nread))
}
@@ -2885,31 +2904,33 @@ impl OwnedDFA {
fn set_universal_starts(&mut self) {
assert_eq!(6, Start::len(), "expected 6 start configurations");
- let start_id = |dfa: &mut OwnedDFA, inp: &Input<'_>, start: Start| {
+ let start_id = |dfa: &mut OwnedDFA,
+ anchored: Anchored,
+ start: Start| {
// This OK because we only call 'start' under conditions
// in which we know it will succeed.
- dfa.st.start(inp, start).expect("valid Input configuration")
+ dfa.st.start(anchored, start).expect("valid Input configuration")
};
if self.start_kind().has_unanchored() {
- let inp = Input::new("").anchored(Anchored::No);
- let sid = start_id(self, &inp, Start::NonWordByte);
- if sid == start_id(self, &inp, Start::WordByte)
- && sid == start_id(self, &inp, Start::Text)
- && sid == start_id(self, &inp, Start::LineLF)
- && sid == start_id(self, &inp, Start::LineCR)
- && sid == start_id(self, &inp, Start::CustomLineTerminator)
+ let anchor = Anchored::No;
+ let sid = start_id(self, anchor, Start::NonWordByte);
+ if sid == start_id(self, anchor, Start::WordByte)
+ && sid == start_id(self, anchor, Start::Text)
+ && sid == start_id(self, anchor, Start::LineLF)
+ && sid == start_id(self, anchor, Start::LineCR)
+ && sid == start_id(self, anchor, Start::CustomLineTerminator)
{
self.st.universal_start_unanchored = Some(sid);
}
}
if self.start_kind().has_anchored() {
- let inp = Input::new("").anchored(Anchored::Yes);
- let sid = start_id(self, &inp, Start::NonWordByte);
- if sid == start_id(self, &inp, Start::WordByte)
- && sid == start_id(self, &inp, Start::Text)
- && sid == start_id(self, &inp, Start::LineLF)
- && sid == start_id(self, &inp, Start::LineCR)
- && sid == start_id(self, &inp, Start::CustomLineTerminator)
+ let anchor = Anchored::Yes;
+ let sid = start_id(self, anchor, Start::NonWordByte);
+ if sid == start_id(self, anchor, Start::WordByte)
+ && sid == start_id(self, anchor, Start::Text)
+ && sid == start_id(self, anchor, Start::LineLF)
+ && sid == start_id(self, anchor, Start::LineCR)
+ && sid == start_id(self, anchor, Start::CustomLineTerminator)
{
self.st.universal_start_anchored = Some(sid);
}
@@ -3216,35 +3237,21 @@ unsafe impl<T: AsRef<[u32]>> Automaton for DFA<T> {
}
#[cfg_attr(feature = "perf-inline", inline(always))]
- fn start_state_forward(
- &self,
- input: &Input<'_>,
- ) -> Result<StateID, MatchError> {
- if !self.quitset.is_empty() && input.start() > 0 {
- let offset = input.start() - 1;
- let byte = input.haystack()[offset];
- if self.quitset.contains(byte) {
- return Err(MatchError::quit(byte, offset));
- }
- }
- let start = self.st.start_map.fwd(&input);
- self.st.start(input, start)
- }
-
- #[cfg_attr(feature = "perf-inline", inline(always))]
- fn start_state_reverse(
+ fn start_state(
&self,
- input: &Input<'_>,
- ) -> Result<StateID, MatchError> {
- if !self.quitset.is_empty() && input.end() < input.haystack().len() {
- let offset = input.end();
- let byte = input.haystack()[offset];
- if self.quitset.contains(byte) {
- return Err(MatchError::quit(byte, offset));
+ config: &start::Config,
+ ) -> Result<StateID, StartError> {
+ let anchored = config.get_anchored();
+ let start = match config.get_look_behind() {
+ None => Start::Text,
+ Some(byte) => {
+ if !self.quitset.is_empty() && self.quitset.contains(byte) {
+ return Err(StartError::quit(byte));
+ }
+ self.st.start_map.get(byte)
}
- }
- let start = self.st.start_map.rev(&input);
- self.st.start(input, start)
+ };
+ self.st.start(anchored, start)
}
#[cfg_attr(feature = "perf-inline", inline(always))]
@@ -4180,28 +4187,27 @@ impl<T: AsRef<[u32]>> StartTable<T> {
#[cfg_attr(feature = "perf-inline", inline(always))]
fn start(
&self,
- input: &Input<'_>,
+ anchored: Anchored,
start: Start,
- ) -> Result<StateID, MatchError> {
+ ) -> Result<StateID, StartError> {
let start_index = start.as_usize();
- let mode = input.get_anchored();
- let index = match mode {
+ let index = match anchored {
Anchored::No => {
if !self.kind.has_unanchored() {
- return Err(MatchError::unsupported_anchored(mode));
+ return Err(StartError::unsupported_anchored(anchored));
}
start_index
}
Anchored::Yes => {
if !self.kind.has_anchored() {
- return Err(MatchError::unsupported_anchored(mode));
+ return Err(StartError::unsupported_anchored(anchored));
}
self.stride + start_index
}
Anchored::Pattern(pid) => {
let len = match self.pattern_len {
None => {
- return Err(MatchError::unsupported_anchored(mode))
+ return Err(StartError::unsupported_anchored(anchored))
}
Some(len) => len,
};
@@ -5086,6 +5092,8 @@ impl core::fmt::Display for BuildError {
#[cfg(all(test, feature = "syntax", feature = "dfa-build"))]
mod tests {
+ use crate::{Input, MatchError};
+
use super::*;
#[test]