diff options
Diffstat (limited to 'vendor/regex-automata/tests/hybrid/api.rs')
-rw-r--r-- | vendor/regex-automata/tests/hybrid/api.rs | 140 |
1 files changed, 58 insertions, 82 deletions
diff --git a/vendor/regex-automata/tests/hybrid/api.rs b/vendor/regex-automata/tests/hybrid/api.rs index 9a834dbb8..e82d808e3 100644 --- a/vendor/regex-automata/tests/hybrid/api.rs +++ b/vendor/regex-automata/tests/hybrid/api.rs @@ -1,25 +1,29 @@ use std::error::Error; use regex_automata::{ - hybrid::{ - dfa::{self, DFA}, - regex::Regex, - OverlappingState, - }, + hybrid::dfa::{OverlappingState, DFA}, nfa::thompson, - HalfMatch, MatchError, MatchKind, MultiMatch, + HalfMatch, Input, MatchError, }; -use crate::util::{BunkPrefilter, SubstringPrefilter}; - // Tests that too many cache resets cause the lazy DFA to quit. // // We only test this on 64-bit because the test is gingerly crafted based on // implementation details of cache sizes. It's not a great test because of // that, but it does check some interesting properties around how positions are // reported when a search "gives up." +// +// NOTE: If you change something in lazy DFA implementation that causes this +// test to fail by reporting different "gave up" positions, then it's generally +// okay to update the positions in the test below as long as you're sure your +// changes are correct. Namely, it is expected that if there are changes in the +// cache size (or changes in how big things are inside the cache), then its +// utilization may change slightly and thus impact where a search gives up. +// Precisely where a search gives up is not an API guarantee, so changing the +// offsets here is OK. #[test] #[cfg(target_pointer_width = "64")] +#[cfg(not(miri))] fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { // This is a carefully chosen regex. The idea is to pick one that requires // some decent number of states (hence the bounded repetition). But we @@ -27,9 +31,16 @@ fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { // non-ASCII letter so that we can check that no new states are created // once the cache is full. Namely, if we fill up the cache on a haystack // of 'a's, then in order to match one 'β', a new state will need to be - // created since a 'β' is encoded with multiple bytes. Since there's no - // room for this state, the search should quit at the very first position. - let pattern = r"[aβ]{100}"; + // created since a 'β' is encoded with multiple bytes. + // + // So we proceed by "filling" up the cache by searching a haystack of just + // 'a's. The cache won't have enough room to add enough states to find the + // match (because of the bounded repetition), which should result in it + // giving up before it finds a match. + // + // Since there's now no more room to create states, we search a haystack + // of 'β' and confirm that it gives up immediately. + let pattern = r"[aβ]{99}"; let dfa = DFA::builder() .configure( // Configure it so that we have the minimum cache capacity @@ -39,38 +50,53 @@ fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { .cache_capacity(0) .minimum_cache_clear_count(Some(0)), ) + .thompson(thompson::NFA::config()) .build(pattern)?; let mut cache = dfa.create_cache(); let haystack = "a".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 25 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err.clone())); - assert_eq!(dfa.find_leftmost_fwd(&mut cache, &haystack), Err(err.clone())); + let err = MatchError::gave_up(25); + // Notice that we make the same amount of progress in each search! That's + // because the cache is reused and already has states to handle the first + // N bytes. + assert_eq!( + Err(err.clone()), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); assert_eq!( - dfa.find_overlapping_fwd( + Err(err.clone()), + dfa.try_search_overlapping_fwd( &mut cache, - &haystack, + &Input::new(&haystack), &mut OverlappingState::start() ), - Err(err.clone()) ); let haystack = "β".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 0 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + let err = MatchError::gave_up(2); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); // no need to test that other find routines quit, since we did that above // OK, if we reset the cache, then we should be able to create more states // and make more progress with searching for betas. cache.reset(&dfa); - let err = MatchError::GaveUp { offset: 26 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + let err = MatchError::gave_up(27); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); // ... switching back to ASCII still makes progress since it just needs to // set transitions on existing states! let haystack = "a".repeat(101).into_bytes(); - let err = MatchError::GaveUp { offset: 13 }; - assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err)); + let err = MatchError::gave_up(13); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); Ok(()) } @@ -84,20 +110,16 @@ fn quit_fwd() -> Result<(), Box<dyn Error>> { let mut cache = dfa.create_cache(); assert_eq!( - dfa.find_earliest_fwd(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + dfa.try_search_fwd(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), ); assert_eq!( - dfa.find_leftmost_fwd(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_overlapping_fwd( + dfa.try_search_overlapping_fwd( &mut cache, - b"abcxyz", + &Input::new(b"abcxyz"), &mut OverlappingState::start() ), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + Err(MatchError::quit(b'x', 3)), ); Ok(()) @@ -113,12 +135,8 @@ fn quit_rev() -> Result<(), Box<dyn Error>> { let mut cache = dfa.create_cache(); assert_eq!( - dfa.find_earliest_rev(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) - ); - assert_eq!( - dfa.find_leftmost_rev(&mut cache, b"abcxyz"), - Err(MatchError::Quit { byte: b'x', offset: 3 }) + dfa.try_search_rev(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), ); Ok(()) @@ -145,51 +163,9 @@ fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { let dfa = DFA::builder().configure(config).build(r"\b")?; let mut cache = dfa.create_cache(); let expected = HalfMatch::must(0, 1); - assert_eq!(dfa.find_leftmost_fwd(&mut cache, b" a"), Ok(Some(expected))); - Ok(()) -} - -// Tests that we can provide a prefilter to a Regex, and the search reports -// correct results. -#[test] -fn prefilter_works() -> Result<(), Box<dyn Error>> { - let mut re = Regex::new(r"a[0-9]+").unwrap(); - re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); - let mut cache = re.create_cache(); - - let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; - let matches: Vec<(usize, usize)> = re - .find_leftmost_iter(&mut cache, text) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!( - matches, - vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] - ); - Ok(()) -} - -// This test confirms that a prefilter is active by using a prefilter that -// reports false negatives. -#[test] -fn prefilter_is_active() -> Result<(), Box<dyn Error>> { - let text = b"za123"; - let mut re = Regex::new(r"a[0-9]+").unwrap(); - let mut cache = re.create_cache(); - - re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a")))); - assert_eq!( - re.find_leftmost(&mut cache, b"za123"), - Some(MultiMatch::must(0, 1, 5)) - ); assert_eq!( - re.find_leftmost(&mut cache, b"a123"), - Some(MultiMatch::must(0, 0, 4)) + Ok(Some(expected)), + dfa.try_search_fwd(&mut cache, &Input::new(" a")), ); - re.set_prefilter(Some(Box::new(BunkPrefilter::new()))); - assert_eq!(re.find_leftmost(&mut cache, b"za123"), None); - // This checks that the prefilter is used when first starting the search, - // instead of waiting until at least one transition has occurred. - assert_eq!(re.find_leftmost(&mut cache, b"a123"), None); Ok(()) } |