Merging upstream version 1.67.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:18:25 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:18:25 +0000
commit: 5363f350887b1e5b5dd21a86f88c8af9d7fea6da (patch)
tree: 35ca005eb6e0e9a1ba3bb5dbc033209ad445dc17 /vendor/regex-automata/tests/nfa
parent: Adding debian version 1.66.0+dfsg1-1. (diff)
download: rustc-5363f350887b1e5b5dd21a86f88c8af9d7fea6da.tar.xz
rustc-5363f350887b1e5b5dd21a86f88c8af9d7fea6da.zip
5 files changed, 304 insertions, 0 deletions
diff --git a/vendor/regex-automata/tests/nfa/mod.rs b/vendor/regex-automata/tests/nfa/mod.rs
new file mode 100644
index 000000000..326862147
--- /dev/null
+++ b/vendor/regex-automata/tests/nfa/mod.rs
@@ -0,0 +1 @@
+mod thompson;
diff --git a/vendor/regex-automata/tests/nfa/thompson/mod.rs b/vendor/regex-automata/tests/nfa/thompson/mod.rs
new file mode 100644
index 000000000..3a03f52ce
--- /dev/null
+++ b/vendor/regex-automata/tests/nfa/thompson/mod.rs
@@ -0,0 +1 @@
+mod pikevm;
diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs
new file mode 100644
index 000000000..c8199f709
--- /dev/null
+++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/api.rs
@@ -0,0 +1,191 @@
+/*
+use std::error::Error;
+
+use regex_automata::{
+    hybrid::{
+        dfa::{self, DFA},
+        regex::Regex,
+        OverlappingState,
+    },
+    nfa::thompson,
+    HalfMatch, MatchError, MatchKind, MultiMatch,
+};
+
+use crate::util::{BunkPrefilter, SubstringPrefilter};
+
+// Tests that too many cache resets cause the lazy DFA to quit.
+#[test]
+fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> {
+    // This is a carefully chosen regex. The idea is to pick one that requires
+    // some decent number of states (hence the bounded repetition). But we
+    // specifically choose to create a class with an ASCII letter and a
+    // non-ASCII letter so that we can check that no new states are created
+    // once the cache is full. Namely, if we fill up the cache on a haystack
+    // of 'a's, then in order to match one 'β', a new state will need to be
+    // created since a 'β' is encoded with multiple bytes. Since there's no
+    // room for this state, the search should quit at the very first position.
+    let pattern = r"[aβ]{100}";
+    let dfa = DFA::builder()
+        .configure(
+            // Configure it so that we have the minimum cache capacity
+            // possible. And that if any resets occur, the search quits.
+            DFA::config()
+                .skip_cache_capacity_check(true)
+                .cache_capacity(0)
+                .minimum_cache_clear_count(Some(0)),
+        )
+        .build(pattern)?;
+    let mut cache = dfa.create_cache();
+
+    let haystack = "a".repeat(101).into_bytes();
+    let err = MatchError::GaveUp { offset: 25 };
+    assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err.clone()));
+    assert_eq!(dfa.find_leftmost_fwd(&mut cache, &haystack), Err(err.clone()));
+    assert_eq!(
+        dfa.find_overlapping_fwd(
+            &mut cache,
+            &haystack,
+            &mut OverlappingState::start()
+        ),
+        Err(err.clone())
+    );
+
+    let haystack = "β".repeat(101).into_bytes();
+    let err = MatchError::GaveUp { offset: 0 };
+    assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err));
+    // no need to test that other find routines quit, since we did that above
+
+    // OK, if we reset the cache, then we should be able to create more states
+    // and make more progress with searching for betas.
+    cache.reset(&dfa);
+    let err = MatchError::GaveUp { offset: 26 };
+    assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err));
+
+    // ... switching back to ASCII still makes progress since it just needs to
+    // set transitions on existing states!
+    let haystack = "a".repeat(101).into_bytes();
+    let err = MatchError::GaveUp { offset: 13 };
+    assert_eq!(dfa.find_earliest_fwd(&mut cache, &haystack), Err(err));
+
+    Ok(())
+}
+
+// Tests that quit bytes in the forward direction work correctly.
+#[test]
+fn quit_fwd() -> Result<(), Box<dyn Error>> {
+    let dfa = DFA::builder()
+        .configure(DFA::config().quit(b'x', true))
+        .build("[[:word:]]+$")?;
+    let mut cache = dfa.create_cache();
+
+    assert_eq!(
+        dfa.find_earliest_fwd(&mut cache, b"abcxyz"),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+    assert_eq!(
+        dfa.find_leftmost_fwd(&mut cache, b"abcxyz"),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+    assert_eq!(
+        dfa.find_overlapping_fwd(
+            &mut cache,
+            b"abcxyz",
+            &mut OverlappingState::start()
+        ),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+
+    Ok(())
+}
+
+// Tests that quit bytes in the reverse direction work correctly.
+#[test]
+fn quit_rev() -> Result<(), Box<dyn Error>> {
+    let dfa = DFA::builder()
+        .configure(DFA::config().quit(b'x', true))
+        .thompson(thompson::Config::new().reverse(true))
+        .build("^[[:word:]]+")?;
+    let mut cache = dfa.create_cache();
+
+    assert_eq!(
+        dfa.find_earliest_rev(&mut cache, b"abcxyz"),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+    assert_eq!(
+        dfa.find_leftmost_rev(&mut cache, b"abcxyz"),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+
+    Ok(())
+}
+
+// Tests that if we heuristically enable Unicode word boundaries but then
+// instruct that a non-ASCII byte should NOT be a quit byte, then the builder
+// will panic.
+#[test]
+#[should_panic]
+fn quit_panics() {
+    DFA::config().unicode_word_boundary(true).quit(b'\xFF', false);
+}
+
+// This tests an intesting case where even if the Unicode word boundary option
+// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode
+// word boundaries to be enabled.
+#[test]
+fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
+    let mut config = DFA::config();
+    for b in 0x80..=0xFF {
+        config = config.quit(b, true);
+    }
+    let dfa = DFA::builder().configure(config).build(r"\b")?;
+    let mut cache = dfa.create_cache();
+    let expected = HalfMatch::must(0, 1);
+    assert_eq!(dfa.find_leftmost_fwd(&mut cache, b" a"), Ok(Some(expected)));
+    Ok(())
+}
+
+// Tests that we can provide a prefilter to a Regex, and the search reports
+// correct results.
+#[test]
+fn prefilter_works() -> Result<(), Box<dyn Error>> {
+    let mut re = Regex::new(r"a[0-9]+").unwrap();
+    re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a"))));
+    let mut cache = re.create_cache();
+
+    let text = b"foo abc foo a1a2a3 foo a123 bar aa456";
+    let matches: Vec<(usize, usize)> = re
+        .find_leftmost_iter(&mut cache, text)
+        .map(|m| (m.start(), m.end()))
+        .collect();
+    assert_eq!(
+        matches,
+        vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),]
+    );
+    Ok(())
+}
+
+// This test confirms that a prefilter is active by using a prefilter that
+// reports false negatives.
+#[test]
+fn prefilter_is_active() -> Result<(), Box<dyn Error>> {
+    let text = b"za123";
+    let mut re = Regex::new(r"a[0-9]+").unwrap();
+    let mut cache = re.create_cache();
+
+    re.set_prefilter(Some(Box::new(SubstringPrefilter::new("a"))));
+    assert_eq!(
+        re.find_leftmost(&mut cache, b"za123"),
+        Some(MultiMatch::must(0, 1, 5))
+    );
+    assert_eq!(
+        re.find_leftmost(&mut cache, b"a123"),
+        Some(MultiMatch::must(0, 0, 4))
+    );
+    re.set_prefilter(Some(Box::new(BunkPrefilter::new())));
+    assert_eq!(re.find_leftmost(&mut cache, b"za123"), None);
+    // This checks that the prefilter is used when first starting the search,
+    // instead of waiting until at least one transition has occurred.
+    assert_eq!(re.find_leftmost(&mut cache, b"a123"), None);
+    Ok(())
+}
+*/
diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs
new file mode 100644
index 000000000..f4299510c
--- /dev/null
+++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs
@@ -0,0 +1,2 @@
+mod api;
+mod suite;
diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs
new file mode 100644
index 000000000..e5505d59a
--- /dev/null
+++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs
@@ -0,0 +1,109 @@
+use regex_automata::{
+    nfa::thompson::{
+        self,
+        pikevm::{self, PikeVM},
+    },
+    MatchKind, SyntaxConfig,
+};
+use regex_syntax as syntax;
+
+use regex_test::{
+    bstr::{BString, ByteSlice},
+    CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
+    SearchKind as TestSearchKind, TestResult, TestRunner,
+};
+
+use crate::{suite, Result};
+
+/// Tests the default configuration of the hybrid NFA/DFA.
+#[test]
+fn default() -> Result<()> {
+    let builder = PikeVM::builder();
+    TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
+    Ok(())
+}
+
+fn compiler(
+    mut builder: pikevm::Builder,
+) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
+    move |test, regexes| {
+        let regexes = regexes
+            .iter()
+            .map(|r| r.to_str().map(|s| s.to_string()))
+            .collect::<std::result::Result<Vec<String>, _>>()?;
+        if !configure_pikevm_builder(test, &mut builder) {
+            return Ok(CompiledRegex::skip());
+        }
+        let re = builder.build_many(&regexes)?;
+        let mut cache = re.create_cache();
+        Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+            run_test(&re, &mut cache, test)
+        }))
+    }
+}
+
+fn run_test(
+    re: &PikeVM,
+    cache: &mut pikevm::Cache,
+    test: &RegexTest,
+) -> Vec<TestResult> {
+    // let is_match = if re.is_match(cache, test.input()) {
+    // TestResult::matched()
+    // } else {
+    // TestResult::no_match()
+    // };
+    // let is_match = is_match.name("is_match");
+
+    let find_matches = match test.search_kind() {
+        TestSearchKind::Earliest => {
+            TestResult::skip().name("find_earliest_iter")
+        }
+        TestSearchKind::Leftmost => {
+            let it = re
+                .find_leftmost_iter(cache, test.input())
+                .take(test.match_limit().unwrap_or(std::usize::MAX))
+                .map(|m| Match {
+                    id: m.pattern().as_usize(),
+                    start: m.start(),
+                    end: m.end(),
+                });
+            TestResult::matches(it).name("find_leftmost_iter")
+        }
+        TestSearchKind::Overlapping => {
+            TestResult::skip().name("find_overlapping_iter")
+        }
+    };
+    // vec![is_match, find_matches]
+    vec![find_matches]
+}
+
+/// Configures the given regex builder with all relevant settings on the given
+/// regex test.
+///
+/// If the regex test has a setting that is unsupported, then this returns
+/// false (implying the test should be skipped).
+fn configure_pikevm_builder(
+    test: &RegexTest,
+    builder: &mut pikevm::Builder,
+) -> bool {
+    let pikevm_config =
+        PikeVM::config().anchored(test.anchored()).utf8(test.utf8());
+    builder
+        .configure(pikevm_config)
+        .syntax(config_syntax(test))
+        .thompson(config_thompson(test));
+    true
+}
+
+/// Configuration of a Thompson NFA compiler from a regex test.
+fn config_thompson(test: &RegexTest) -> thompson::Config {
+    thompson::Config::new().utf8(test.utf8())
+}
+
+/// Configuration of the regex parser from a regex test.
+fn config_syntax(test: &RegexTest) -> SyntaxConfig {
+    SyntaxConfig::new()
+        .case_insensitive(test.case_insensitive())
+        .unicode(test.unicode())
+        .utf8(test.utf8())
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:18:25 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:18:25 +0000
commit	5363f350887b1e5b5dd21a86f88c8af9d7fea6da (patch)
tree	35ca005eb6e0e9a1ba3bb5dbc033209ad445dc17 /vendor/regex-automata/tests/nfa
parent	Adding debian version 1.66.0+dfsg1-1. (diff)
download	rustc-5363f350887b1e5b5dd21a86f88c8af9d7fea6da.tar.xz rustc-5363f350887b1e5b5dd21a86f88c8af9d7fea6da.zip