From 5363f350887b1e5b5dd21a86f88c8af9d7fea6da Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:18:25 +0200 Subject: Merging upstream version 1.67.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/regex-automata/tests/hybrid/suite.rs | 212 ++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 vendor/regex-automata/tests/hybrid/suite.rs (limited to 'vendor/regex-automata/tests/hybrid/suite.rs') diff --git a/vendor/regex-automata/tests/hybrid/suite.rs b/vendor/regex-automata/tests/hybrid/suite.rs new file mode 100644 index 000000000..d60570d84 --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/suite.rs @@ -0,0 +1,212 @@ +use regex_automata::{ + hybrid::{ + dfa::DFA, + regex::{self, Regex}, + }, + nfa::thompson, + MatchKind, SyntaxConfig, +}; +use regex_syntax as syntax; + +use regex_test::{ + bstr::{BString, ByteSlice}, + CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests, + SearchKind as TestSearchKind, TestResult, TestRunner, +}; + +use crate::{suite, Result}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA with NFA shrinking disabled. +/// +/// This is actually the typical configuration one wants for a lazy DFA. NFA +/// shrinking is mostly only advantageous when building a full DFA since it +/// can sharply decrease the amount of time determinization takes. But NFA +/// shrinking is itself otherwise fairly expensive. Since a lazy DFA has +/// no compilation time (other than for building the NFA of course) before +/// executing a search, it's usually worth it to forgo NFA shrinking. +#[test] +fn no_nfa_shrink() -> Result<()> { + let mut builder = Regex::builder(); + builder.thompson(thompson::Config::new().shrink(false)); + TestRunner::new()? + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled. +#[test] +fn starts_for_each_pattern() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().starts_for_each_pattern(true)); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when byte classes are disabled. +/// +/// N.B. Disabling byte classes doesn't avoid any indirection at search time. +/// All it does is cause every byte value to be its own distinct equivalence +/// class. +#[test] +fn no_byte_classes() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().byte_classes(false)); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests that hybrid NFA/DFA never clears its cache for any test with the +/// default capacity. +/// +/// N.B. If a regex suite test is added that causes the cache to be cleared, +/// then this should just skip that test. (Which can be done by calling the +/// 'blacklist' method on 'TestRunner'.) +#[test] +fn no_cache_clearing() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().minimum_cache_clear_count(Some(0))); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when the minimum cache capacity is set. +#[test] +fn min_cache_capacity() -> Result<()> { + let mut builder = Regex::builder(); + builder + .dfa(DFA::config().cache_capacity(0).skip_cache_capacity_check(true)); + TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +fn compiler( + mut builder: regex::Builder, +) -> impl FnMut(&RegexTest, &[BString]) -> Result { + move |test, regexes| { + let regexes = regexes + .iter() + .map(|r| r.to_str().map(|s| s.to_string())) + .collect::, _>>()?; + + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + let mut thompson = thompson::Builder::new(); + thompson.syntax(config_syntax(test)).configure(config_thompson(test)); + if let Ok(nfa) = thompson.build_many(®exes) { + let non_ascii = test.input().iter().any(|&b| !b.is_ascii()); + if nfa.has_word_boundary_unicode() && non_ascii { + return Ok(CompiledRegex::skip()); + } + } + if !configure_regex_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> Vec { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &Regex, + cache: &mut regex::Cache, + test: &RegexTest, +) -> Vec { + let is_match = if re.is_match(cache, test.input()) { + TestResult::matched() + } else { + TestResult::no_match() + }; + let is_match = is_match.name("is_match"); + + let find_matches = match test.search_kind() { + TestSearchKind::Earliest => { + let it = re + .find_earliest_iter(cache, test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_earliest_iter") + } + TestSearchKind::Leftmost => { + let it = re + .find_leftmost_iter(cache, test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_leftmost_iter") + } + TestSearchKind::Overlapping => { + let it = re + .find_overlapping_iter(cache, test.input()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + start: m.start(), + end: m.end(), + }); + TestResult::matches(it).name("find_overlapping_iter") + } + }; + vec![is_match, find_matches] +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_regex_builder( + test: &RegexTest, + builder: &mut regex::Builder, +) -> bool { + let match_kind = match test.match_kind() { + TestMatchKind::All => MatchKind::All, + TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst, + TestMatchKind::LeftmostLongest => return false, + }; + + let dense_config = DFA::config() + .anchored(test.anchored()) + .match_kind(match_kind) + .unicode_word_boundary(true); + let regex_config = Regex::config().utf8(test.utf8()); + builder + .configure(regex_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)) + .dfa(dense_config); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + thompson::Config::new().utf8(test.utf8()) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> SyntaxConfig { + SyntaxConfig::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) +} -- cgit v1.2.3