diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 18:31:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 18:31:44 +0000 |
commit | c23a457e72abe608715ac76f076f47dc42af07a5 (patch) | |
tree | 2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/regex-automata-0.2.0/tests/dfa/api.rs | |
parent | Releasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip |
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-automata-0.2.0/tests/dfa/api.rs')
-rw-r--r-- | vendor/regex-automata-0.2.0/tests/dfa/api.rs | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/vendor/regex-automata-0.2.0/tests/dfa/api.rs b/vendor/regex-automata-0.2.0/tests/dfa/api.rs new file mode 100644 index 000000000..80d7d704c --- /dev/null +++ b/vendor/regex-automata-0.2.0/tests/dfa/api.rs @@ -0,0 +1,133 @@ +use std::error::Error; + +use regex_automata::{ + dfa::{dense, regex::Regex, Automaton, OverlappingState}, + nfa::thompson, + HalfMatch, MatchError, MatchKind, MultiMatch, +}; + +use crate::util::{BunkPrefilter, SubstringPrefilter}; + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box<dyn Error>> { + let dfa = dense::Builder::new() + .configure(dense::Config::new().quit(b'x', true)) + .build("[[:word:]]+$")?; + + assert_eq!( + dfa.find_earliest_fwd(b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_leftmost_fwd(b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_overlapping_fwd(b"abcxyz", &mut OverlappingState::start()), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box<dyn Error>> { + let dfa = dense::Builder::new() + .configure(dense::Config::new().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + + assert_eq!( + dfa.find_earliest_rev(b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + assert_eq!( + dfa.find_leftmost_rev(b"abcxyz"), + Err(MatchError::Quit { byte: b'x', offset: 3 }) + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// Tests that if we attempt an overlapping search using a regex without a +// reverse DFA compiled with 'starts_for_each_pattern', then we get a panic. +#[test] +#[should_panic] +fn incorrect_config_overlapping_search_panics() { + let forward = dense::DFA::new(r"abca").unwrap(); + let reverse = dense::Builder::new() + .configure( + dense::Config::new() + .anchored(true) + .match_kind(MatchKind::All) + .starts_for_each_pattern(false), + ) + .thompson(thompson::Config::new().reverse(true)) + .build(r"abca") + .unwrap(); + + let re = Regex::builder().build_from_dfas(forward, reverse); + let haystack = "bar abcabcabca abca foo".as_bytes(); + re.find_overlapping(haystack, &mut OverlappingState::start()); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> { + let mut config = dense::Config::new(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = dense::Builder::new().configure(config).build(r"\b")?; + let expected = HalfMatch::must(0, 1); + assert_eq!(dfa.find_leftmost_fwd(b" a"), Ok(Some(expected))); + Ok(()) +} + +// Tests that we can provide a prefilter to a Regex, and the search reports +// correct results. +#[test] +fn prefilter_works() -> Result<(), Box<dyn Error>> { + let re = Regex::new(r"a[0-9]+") + .unwrap() + .with_prefilter(SubstringPrefilter::new("a")); + let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; + let matches: Vec<(usize, usize)> = + re.find_leftmost_iter(text).map(|m| (m.start(), m.end())).collect(); + assert_eq!( + matches, + vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] + ); + Ok(()) +} + +// This test confirms that a prefilter is active by using a prefilter that +// reports false negatives. +#[test] +fn prefilter_is_active() -> Result<(), Box<dyn Error>> { + let text = b"za123"; + let re = Regex::new(r"a[0-9]+") + .unwrap() + .with_prefilter(SubstringPrefilter::new("a")); + assert_eq!(re.find_leftmost(b"za123"), Some(MultiMatch::must(0, 1, 5))); + assert_eq!(re.find_leftmost(b"a123"), Some(MultiMatch::must(0, 0, 4))); + let re = re.with_prefilter(BunkPrefilter::new()); + assert_eq!(re.find_leftmost(b"za123"), None); + // This checks that the prefilter is used when first starting the search, + // instead of waiting until at least one transition has occurred. + assert_eq!(re.find_leftmost(b"a123"), None); + Ok(()) +} |