diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/regex-automata/tests/nfa | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/regex-automata/tests/nfa')
6 files changed, 384 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/tests/nfa/mod.rs b/third_party/rust/regex-automata/tests/nfa/mod.rs new file mode 100644 index 0000000000..3268621473 --- /dev/null +++ b/third_party/rust/regex-automata/tests/nfa/mod.rs @@ -0,0 +1 @@ +mod thompson; diff --git a/third_party/rust/regex-automata/tests/nfa/thompson/backtrack/mod.rs b/third_party/rust/regex-automata/tests/nfa/thompson/backtrack/mod.rs new file mode 100644 index 0000000000..9d6ab475ef --- /dev/null +++ b/third_party/rust/regex-automata/tests/nfa/thompson/backtrack/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/third_party/rust/regex-automata/tests/nfa/thompson/backtrack/suite.rs b/third_party/rust/regex-automata/tests/nfa/thompson/backtrack/suite.rs new file mode 100644 index 0000000000..bce0eef408 --- /dev/null +++ b/third_party/rust/regex-automata/tests/nfa/thompson/backtrack/suite.rs @@ -0,0 +1,213 @@ +use { + anyhow::Result, + regex_automata::{ + nfa::thompson::{ + self, + backtrack::{self, BoundedBacktracker}, + NFA, + }, + util::{prefilter::Prefilter, syntax}, + Input, + }, + regex_test::{ + CompiledRegex, Match, MatchKind, RegexTest, SearchKind, Span, + TestResult, TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures}; + +/// Tests the default configuration of the bounded backtracker. +#[test] +fn default() -> Result<()> { + let builder = BoundedBacktracker::builder(); + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + // At the time of writing, every regex search in the test suite fits + // into the backtracker's default visited capacity (except for the + // blacklisted tests below). If regexes are added that blow that capacity, + // then they should be blacklisted here. A tempting alternative is to + // automatically skip them by checking the haystack length against + // BoundedBacktracker::max_haystack_len, but that could wind up hiding + // interesting failure modes. e.g., If the visited capacity is somehow + // wrong or smaller than it should be. + runner.blacklist("expensive/backtrack-blow-visited-capacity"); + runner.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the backtracker with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + // We can always select leftmost-first here because the backtracker + // only supports leftmost-first matching. + let pre = Prefilter::from_hirs_prefix( + regex_automata::MatchKind::LeftmostFirst, + &hirs, + ); + let mut builder = BoundedBacktracker::builder(); + builder.configure(BoundedBacktracker::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.blacklist("expensive/backtrack-blow-visited-capacity"); + runner.test_iter(suite()?.iter(), my_compiler).assert(); + Ok(()) +} + +/// Tests the bounded backtracker when its visited capacity is set to its +/// minimum amount. +#[test] +fn min_visited_capacity() -> Result<()> { + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner + .test_iter(suite()?.iter(), move |test, regexes| { + let nfa = NFA::compiler() + .configure(config_thompson(test)) + .syntax(config_syntax(test)) + .build_many(®exes)?; + let mut builder = BoundedBacktracker::builder(); + if !configure_backtrack_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + // Setup the bounded backtracker so that its visited capacity is + // the absolute minimum required for the test's haystack. + builder.configure(BoundedBacktracker::config().visited_capacity( + backtrack::min_visited_capacity( + &nfa, + &Input::new(test.haystack()), + ), + )); + + let re = builder.build_from_nfa(nfa)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + }) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: backtrack::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { + move |test, regexes| { + if !configure_backtrack_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &BoundedBacktracker, + cache: &mut backtrack::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => { + let input = input.earliest(true); + TestResult::matched(re.try_is_match(cache, input).unwrap()) + } + }, + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => TestResult::matches( + re.try_find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|result| result.unwrap()) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + }, + "captures" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => TestResult::captures( + re.try_captures_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|result| result.unwrap()) + .map(|caps| testify_captures(&caps)), + ), + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_backtrack_builder( + test: &RegexTest, + builder: &mut backtrack::Builder, +) -> bool { + match (test.search_kind(), test.match_kind()) { + // For testing the standard search APIs. This is the only supported + // configuration for the backtracker. + (SearchKind::Leftmost, MatchKind::LeftmostFirst) => {} + // Overlapping APIs not supported at all for backtracker. + (SearchKind::Overlapping, _) => return false, + // Backtracking doesn't really support the notion of 'earliest'. + // Namely, backtracking already works by returning as soon as it knows + // it has found a match. It just so happens that this corresponds to + // the standard 'leftmost' formulation. + // + // The 'earliest' definition in this crate does indeed permit this + // behavior, so this is "fine," but our test suite specifically looks + // for the earliest position at which a match is known, which our + // finite automata based regex engines have no problem providing. So + // for backtracking, we just skip these tests. + (SearchKind::Earliest, _) => return false, + // For backtracking, 'all' semantics don't really make sense. + (_, MatchKind::All) => return false, + // Not supported at all in regex-automata. + (_, MatchKind::LeftmostLongest) => return false, + }; + let backtrack_config = BoundedBacktracker::config(); + builder + .configure(backtrack_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/third_party/rust/regex-automata/tests/nfa/thompson/mod.rs b/third_party/rust/regex-automata/tests/nfa/thompson/mod.rs new file mode 100644 index 0000000000..b2558f7049 --- /dev/null +++ b/third_party/rust/regex-automata/tests/nfa/thompson/mod.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "nfa-backtrack")] +mod backtrack; +#[cfg(feature = "nfa-pikevm")] +mod pikevm; diff --git a/third_party/rust/regex-automata/tests/nfa/thompson/pikevm/mod.rs b/third_party/rust/regex-automata/tests/nfa/thompson/pikevm/mod.rs new file mode 100644 index 0000000000..9d6ab475ef --- /dev/null +++ b/third_party/rust/regex-automata/tests/nfa/thompson/pikevm/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/third_party/rust/regex-automata/tests/nfa/thompson/pikevm/suite.rs b/third_party/rust/regex-automata/tests/nfa/thompson/pikevm/suite.rs new file mode 100644 index 0000000000..d32842a156 --- /dev/null +++ b/third_party/rust/regex-automata/tests/nfa/thompson/pikevm/suite.rs @@ -0,0 +1,162 @@ +use { + anyhow::Result, + regex_automata::{ + nfa::thompson::{ + self, + pikevm::{self, PikeVM}, + }, + util::{prefilter::Prefilter, syntax}, + PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures, untestify_kind}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = PikeVM::builder(); + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the PikeVM with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = PikeVM::builder(); + builder.configure(PikeVM::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.test_iter(suite()?.iter(), my_compiler).assert(); + Ok(()) +} + +fn compiler( + mut builder: pikevm::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result<CompiledRegex> { + move |test, regexes| { + if !configure_pikevm_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &PikeVM, + cache: &mut pikevm::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(cache, input)), + "find" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .find_iter(cache, input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Leftmost => { + let it = re + .find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Overlapping => { + let mut patset = PatternSet::new(re.get_nfa().pattern_len()); + re.which_overlapping_matches(cache, &input, &mut patset); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .captures_iter(cache, input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Leftmost => { + let it = re + .captures_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // There is no overlapping PikeVM API that supports captures. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_pikevm_builder( + test: &RegexTest, + builder: &mut pikevm::Builder, +) -> bool { + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + let pikevm_config = PikeVM::config().match_kind(match_kind); + builder + .configure(pikevm_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} |