diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/regex-automata/tests/hybrid/api.rs | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/regex-automata/tests/hybrid/api.rs')
-rw-r--r-- | third_party/rust/regex-automata/tests/hybrid/api.rs | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/tests/hybrid/api.rs b/third_party/rust/regex-automata/tests/hybrid/api.rs new file mode 100644 index 0000000000..e82d808e34 --- /dev/null +++ b/third_party/rust/regex-automata/tests/hybrid/api.rs @@ -0,0 +1,171 @@ +use std::error::Error; + +use regex_automata::{ + hybrid::dfa::{OverlappingState, DFA}, + nfa::thompson, + HalfMatch, Input, MatchError, +}; + +// Tests that too many cache resets cause the lazy DFA to quit. +// +// We only test this on 64-bit because the test is gingerly crafted based on +// implementation details of cache sizes. It's not a great test because of +// that, but it does check some interesting properties around how positions are +// reported when a search "gives up." +// +// NOTE: If you change something in lazy DFA implementation that causes this +// test to fail by reporting different "gave up" positions, then it's generally +// okay to update the positions in the test below as long as you're sure your +// changes are correct. Namely, it is expected that if there are changes in the +// cache size (or changes in how big things are inside the cache), then its +// utilization may change slightly and thus impact where a search gives up. +// Precisely where a search gives up is not an API guarantee, so changing the +// offsets here is OK. +#[test] +#[cfg(target_pointer_width = "64")] +#[cfg(not(miri))] +fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> { + // This is a carefully chosen regex. The idea is to pick one that requires + // some decent number of states (hence the bounded repetition). But we + // specifically choose to create a class with an ASCII letter and a + // non-ASCII letter so that we can check that no new states are created + // once the cache is full. Namely, if we fill up the cache on a haystack + // of 'a's, then in order to match one 'β', a new state will need to be + // created since a 'β' is encoded with multiple bytes. + // + // So we proceed by "filling" up the cache by searching a haystack of just + // 'a's. The cache won't have enough room to add enough states to find the + // match (because of the bounded repetition), which should result in it + // giving up before it finds a match. + // + // Since there's now no more room to create states, we search a haystack + // of 'β' and confirm that it gives up immediately. + let pattern = r"[aβ]{99}"; + let dfa = DFA::builder() + .configure( + // Configure it so that we have the minimum cache capacity + // possible. And that if any resets occur, the search quits. + DFA::config() + .skip_cache_capacity_check(true) + .cache_capacity(0) + .minimum_cache_clear_count(Some(0)), + ) + .thompson(thompson::NFA::config()) + .build(pattern)?; + let mut cache = dfa.create_cache(); + + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::gave_up(25); + // Notice that we make the same amount of progress in each search! That's + // because the cache is reused and already has states to handle the first + // N bytes. + assert_eq!( + Err(err.clone()), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + assert_eq!( + Err(err.clone()), + dfa.try_search_overlapping_fwd( + &mut cache, + &Input::new(&haystack), + &mut OverlappingState::start() + ), + ); + + let haystack = "β".repeat(101).into_bytes(); + let err = MatchError::gave_up(2); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + // no need to test that other find routines quit, since we did that above + + // OK, if we reset the cache, then we should be able to create more states + // and make more progress with searching for betas. + cache.reset(&dfa); + let err = MatchError::gave_up(27); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + + // ... switching back to ASCII still makes progress since it just needs to + // set transitions on existing states! + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::gave_up(13); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + + Ok(()) +} + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box<dyn Error>> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .build("[[:word:]]+$")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.try_search_fwd(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), + ); + assert_eq!( + dfa.try_search_overlapping_fwd( + &mut cache, + &Input::new(b"abcxyz"), + &mut OverlappingState::start() + ), + Err(MatchError::quit(b'x', 3)), + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box<dyn Error>> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.try_search_rev(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + DFA::config().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { + let mut config = DFA::config(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = DFA::builder().configure(config).build(r"\b")?; + let mut cache = dfa.create_cache(); + let expected = HalfMatch::must(0, 1); + assert_eq!( + Ok(Some(expected)), + dfa.try_search_fwd(&mut cache, &Input::new(" a")), + ); + Ok(()) +} |