From 5363f350887b1e5b5dd21a86f88c8af9d7fea6da Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 17 Apr 2024 14:18:25 +0200
Subject: Merging upstream version 1.67.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 vendor/regex-automata/tests/hybrid/suite.rs | 212 ++++++++++++++++++++++++++++
 1 file changed, 212 insertions(+)
 create mode 100644 vendor/regex-automata/tests/hybrid/suite.rs

(limited to 'vendor/regex-automata/tests/hybrid/suite.rs')

diff --git a/vendor/regex-automata/tests/hybrid/suite.rs b/vendor/regex-automata/tests/hybrid/suite.rs
new file mode 100644
index 000000000..d60570d84
--- /dev/null
+++ b/vendor/regex-automata/tests/hybrid/suite.rs
@@ -0,0 +1,212 @@
+use regex_automata::{
+    hybrid::{
+        dfa::DFA,
+        regex::{self, Regex},
+    },
+    nfa::thompson,
+    MatchKind, SyntaxConfig,
+};
+use regex_syntax as syntax;
+
+use regex_test::{
+    bstr::{BString, ByteSlice},
+    CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
+    SearchKind as TestSearchKind, TestResult, TestRunner,
+};
+
+use crate::{suite, Result};
+
+/// Tests the default configuration of the hybrid NFA/DFA.
+#[test]
+fn default() -> Result<()> {
+    let builder = Regex::builder();
+    TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
+    Ok(())
+}
+
+/// Tests the hybrid NFA/DFA with NFA shrinking disabled.
+///
+/// This is actually the typical configuration one wants for a lazy DFA. NFA
+/// shrinking is mostly only advantageous when building a full DFA since it
+/// can sharply decrease the amount of time determinization takes. But NFA
+/// shrinking is itself otherwise fairly expensive. Since a lazy DFA has
+/// no compilation time (other than for building the NFA of course) before
+/// executing a search, it's usually worth it to forgo NFA shrinking.
+#[test]
+fn no_nfa_shrink() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder.thompson(thompson::Config::new().shrink(false));
+    TestRunner::new()?
+        // Without NFA shrinking, this test blows the default cache capacity.
+        .blacklist("expensive/regression-many-repeat-no-stack-overflow")
+        .test_iter(suite()?.iter(), compiler(builder))
+        .assert();
+    Ok(())
+}
+
+/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled.
+#[test]
+fn starts_for_each_pattern() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder.dfa(DFA::config().starts_for_each_pattern(true));
+    TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
+    Ok(())
+}
+
+/// Tests the hybrid NFA/DFA when byte classes are disabled.
+///
+/// N.B. Disabling byte classes doesn't avoid any indirection at search time.
+/// All it does is cause every byte value to be its own distinct equivalence
+/// class.
+#[test]
+fn no_byte_classes() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder.dfa(DFA::config().byte_classes(false));
+    TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
+    Ok(())
+}
+
+/// Tests that hybrid NFA/DFA never clears its cache for any test with the
+/// default capacity.
+///
+/// N.B. If a regex suite test is added that causes the cache to be cleared,
+/// then this should just skip that test. (Which can be done by calling the
+/// 'blacklist' method on 'TestRunner'.)
+#[test]
+fn no_cache_clearing() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder.dfa(DFA::config().minimum_cache_clear_count(Some(0)));
+    TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
+    Ok(())
+}
+
+/// Tests the hybrid NFA/DFA when the minimum cache capacity is set.
+#[test]
+fn min_cache_capacity() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder
+        .dfa(DFA::config().cache_capacity(0).skip_cache_capacity_check(true));
+    TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
+    Ok(())
+}
+
+fn compiler(
+    mut builder: regex::Builder,
+) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
+    move |test, regexes| {
+        let regexes = regexes
+            .iter()
+            .map(|r| r.to_str().map(|s| s.to_string()))
+            .collect::<std::result::Result<Vec<String>, _>>()?;
+
+        // Check if our regex contains things that aren't supported by DFAs.
+        // That is, Unicode word boundaries when searching non-ASCII text.
+        let mut thompson = thompson::Builder::new();
+        thompson.syntax(config_syntax(test)).configure(config_thompson(test));
+        if let Ok(nfa) = thompson.build_many(&regexes) {
+            let non_ascii = test.input().iter().any(|&b| !b.is_ascii());
+            if nfa.has_word_boundary_unicode() && non_ascii {
+                return Ok(CompiledRegex::skip());
+            }
+        }
+        if !configure_regex_builder(test, &mut builder) {
+            return Ok(CompiledRegex::skip());
+        }
+        let re = builder.build_many(&regexes)?;
+        let mut cache = re.create_cache();
+        Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+            run_test(&re, &mut cache, test)
+        }))
+    }
+}
+
+fn run_test(
+    re: &Regex,
+    cache: &mut regex::Cache,
+    test: &RegexTest,
+) -> Vec<TestResult> {
+    let is_match = if re.is_match(cache, test.input()) {
+        TestResult::matched()
+    } else {
+        TestResult::no_match()
+    };
+    let is_match = is_match.name("is_match");
+
+    let find_matches = match test.search_kind() {
+        TestSearchKind::Earliest => {
+            let it = re
+                .find_earliest_iter(cache, test.input())
+                .take(test.match_limit().unwrap_or(std::usize::MAX))
+                .map(|m| Match {
+                    id: m.pattern().as_usize(),
+                    start: m.start(),
+                    end: m.end(),
+                });
+            TestResult::matches(it).name("find_earliest_iter")
+        }
+        TestSearchKind::Leftmost => {
+            let it = re
+                .find_leftmost_iter(cache, test.input())
+                .take(test.match_limit().unwrap_or(std::usize::MAX))
+                .map(|m| Match {
+                    id: m.pattern().as_usize(),
+                    start: m.start(),
+                    end: m.end(),
+                });
+            TestResult::matches(it).name("find_leftmost_iter")
+        }
+        TestSearchKind::Overlapping => {
+            let it = re
+                .find_overlapping_iter(cache, test.input())
+                .take(test.match_limit().unwrap_or(std::usize::MAX))
+                .map(|m| Match {
+                    id: m.pattern().as_usize(),
+                    start: m.start(),
+                    end: m.end(),
+                });
+            TestResult::matches(it).name("find_overlapping_iter")
+        }
+    };
+    vec![is_match, find_matches]
+}
+
+/// Configures the given regex builder with all relevant settings on the given
+/// regex test.
+///
+/// If the regex test has a setting that is unsupported, then this returns
+/// false (implying the test should be skipped).
+fn configure_regex_builder(
+    test: &RegexTest,
+    builder: &mut regex::Builder,
+) -> bool {
+    let match_kind = match test.match_kind() {
+        TestMatchKind::All => MatchKind::All,
+        TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst,
+        TestMatchKind::LeftmostLongest => return false,
+    };
+
+    let dense_config = DFA::config()
+        .anchored(test.anchored())
+        .match_kind(match_kind)
+        .unicode_word_boundary(true);
+    let regex_config = Regex::config().utf8(test.utf8());
+    builder
+        .configure(regex_config)
+        .syntax(config_syntax(test))
+        .thompson(config_thompson(test))
+        .dfa(dense_config);
+    true
+}
+
+/// Configuration of a Thompson NFA compiler from a regex test.
+fn config_thompson(test: &RegexTest) -> thompson::Config {
+    thompson::Config::new().utf8(test.utf8())
+}
+
+/// Configuration of the regex parser from a regex test.
+fn config_syntax(test: &RegexTest) -> SyntaxConfig {
+    SyntaxConfig::new()
+        .case_insensitive(test.case_insensitive())
+        .unicode(test.unicode())
+        .utf8(test.utf8())
+}
-- 
cgit v1.2.3