Merging upstream version 1.74.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 18:31:44 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 18:31:44 +0000
commit: c23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree: 2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/regex-automata-0.2.0/tests/dfa
parent: Releasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
download: rustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
3 files changed, 415 insertions, 0 deletions
diff --git a/vendor/regex-automata-0.2.0/tests/dfa/api.rs b/vendor/regex-automata-0.2.0/tests/dfa/api.rs
new file mode 100644
index 000000000..80d7d704c
--- /dev/null
+++ b/vendor/regex-automata-0.2.0/tests/dfa/api.rs
@@ -0,0 +1,133 @@
+use std::error::Error;
+
+use regex_automata::{
+    dfa::{dense, regex::Regex, Automaton, OverlappingState},
+    nfa::thompson,
+    HalfMatch, MatchError, MatchKind, MultiMatch,
+};
+
+use crate::util::{BunkPrefilter, SubstringPrefilter};
+
+// Tests that quit bytes in the forward direction work correctly.
+#[test]
+fn quit_fwd() -> Result<(), Box<dyn Error>> {
+    let dfa = dense::Builder::new()
+        .configure(dense::Config::new().quit(b'x', true))
+        .build("[[:word:]]+$")?;
+
+    assert_eq!(
+        dfa.find_earliest_fwd(b"abcxyz"),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+    assert_eq!(
+        dfa.find_leftmost_fwd(b"abcxyz"),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+    assert_eq!(
+        dfa.find_overlapping_fwd(b"abcxyz", &mut OverlappingState::start()),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+
+    Ok(())
+}
+
+// Tests that quit bytes in the reverse direction work correctly.
+#[test]
+fn quit_rev() -> Result<(), Box<dyn Error>> {
+    let dfa = dense::Builder::new()
+        .configure(dense::Config::new().quit(b'x', true))
+        .thompson(thompson::Config::new().reverse(true))
+        .build("^[[:word:]]+")?;
+
+    assert_eq!(
+        dfa.find_earliest_rev(b"abcxyz"),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+    assert_eq!(
+        dfa.find_leftmost_rev(b"abcxyz"),
+        Err(MatchError::Quit { byte: b'x', offset: 3 })
+    );
+
+    Ok(())
+}
+
+// Tests that if we heuristically enable Unicode word boundaries but then
+// instruct that a non-ASCII byte should NOT be a quit byte, then the builder
+// will panic.
+#[test]
+#[should_panic]
+fn quit_panics() {
+    dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false);
+}
+
+// Tests that if we attempt an overlapping search using a regex without a
+// reverse DFA compiled with 'starts_for_each_pattern', then we get a panic.
+#[test]
+#[should_panic]
+fn incorrect_config_overlapping_search_panics() {
+    let forward = dense::DFA::new(r"abca").unwrap();
+    let reverse = dense::Builder::new()
+        .configure(
+            dense::Config::new()
+                .anchored(true)
+                .match_kind(MatchKind::All)
+                .starts_for_each_pattern(false),
+        )
+        .thompson(thompson::Config::new().reverse(true))
+        .build(r"abca")
+        .unwrap();
+
+    let re = Regex::builder().build_from_dfas(forward, reverse);
+    let haystack = "bar abcabcabca abca foo".as_bytes();
+    re.find_overlapping(haystack, &mut OverlappingState::start());
+}
+
+// This tests an intesting case where even if the Unicode word boundary option
+// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode
+// word boundaries to be enabled.
+#[test]
+fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
+    let mut config = dense::Config::new();
+    for b in 0x80..=0xFF {
+        config = config.quit(b, true);
+    }
+    let dfa = dense::Builder::new().configure(config).build(r"\b")?;
+    let expected = HalfMatch::must(0, 1);
+    assert_eq!(dfa.find_leftmost_fwd(b" a"), Ok(Some(expected)));
+    Ok(())
+}
+
+// Tests that we can provide a prefilter to a Regex, and the search reports
+// correct results.
+#[test]
+fn prefilter_works() -> Result<(), Box<dyn Error>> {
+    let re = Regex::new(r"a[0-9]+")
+        .unwrap()
+        .with_prefilter(SubstringPrefilter::new("a"));
+    let text = b"foo abc foo a1a2a3 foo a123 bar aa456";
+    let matches: Vec<(usize, usize)> =
+        re.find_leftmost_iter(text).map(|m| (m.start(), m.end())).collect();
+    assert_eq!(
+        matches,
+        vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),]
+    );
+    Ok(())
+}
+
+// This test confirms that a prefilter is active by using a prefilter that
+// reports false negatives.
+#[test]
+fn prefilter_is_active() -> Result<(), Box<dyn Error>> {
+    let text = b"za123";
+    let re = Regex::new(r"a[0-9]+")
+        .unwrap()
+        .with_prefilter(SubstringPrefilter::new("a"));
+    assert_eq!(re.find_leftmost(b"za123"), Some(MultiMatch::must(0, 1, 5)));
+    assert_eq!(re.find_leftmost(b"a123"), Some(MultiMatch::must(0, 0, 4)));
+    let re = re.with_prefilter(BunkPrefilter::new());
+    assert_eq!(re.find_leftmost(b"za123"), None);
+    // This checks that the prefilter is used when first starting the search,
+    // instead of waiting until at least one transition has occurred.
+    assert_eq!(re.find_leftmost(b"a123"), None);
+    Ok(())
+}
diff --git a/vendor/regex-automata-0.2.0/tests/dfa/mod.rs b/vendor/regex-automata-0.2.0/tests/dfa/mod.rs
new file mode 100644
index 000000000..f4299510c
--- /dev/null
+++ b/vendor/regex-automata-0.2.0/tests/dfa/mod.rs
@@ -0,0 +1,2 @@
+mod api;
+mod suite;
diff --git a/vendor/regex-automata-0.2.0/tests/dfa/suite.rs b/vendor/regex-automata-0.2.0/tests/dfa/suite.rs
new file mode 100644
index 000000000..426ae346d
--- /dev/null
+++ b/vendor/regex-automata-0.2.0/tests/dfa/suite.rs
@@ -0,0 +1,280 @@
+use regex_automata::{
+    dfa::{self, dense, regex::Regex, sparse, Automaton},
+    nfa::thompson,
+    MatchKind, SyntaxConfig,
+};
+use regex_syntax as syntax;
+
+use regex_test::{
+    bstr::{BString, ByteSlice},
+    CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
+    SearchKind as TestSearchKind, TestResult, TestRunner,
+};
+
+use crate::{suite, Result};
+
+/// Runs the test suite with the default configuration.
+#[test]
+fn unminimized_default() -> Result<()> {
+    let builder = Regex::builder();
+    TestRunner::new()?
+        .test_iter(suite()?.iter(), dense_compiler(builder))
+        .assert();
+    Ok(())
+}
+
+/// Runs the test suite with byte classes disabled.
+#[test]
+fn unminimized_no_byte_class() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder.dense(dense::Config::new().byte_classes(false));
+
+    TestRunner::new()?
+        .test_iter(suite()?.iter(), dense_compiler(builder))
+        .assert();
+    Ok(())
+}
+
+/// Runs the test suite with NFA shrinking disabled.
+#[test]
+fn unminimized_no_nfa_shrink() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder.thompson(thompson::Config::new().shrink(false));
+
+    TestRunner::new()?
+        .test_iter(suite()?.iter(), dense_compiler(builder))
+        .assert();
+    Ok(())
+}
+
+/// Runs the test suite on a minimized DFA with an otherwise default
+/// configuration.
+#[test]
+fn minimized_default() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder.dense(dense::Config::new().minimize(true));
+    TestRunner::new()?
+        // These regexes tend to be too big. Minimization takes... forever.
+        .blacklist("expensive")
+        .test_iter(suite()?.iter(), dense_compiler(builder))
+        .assert();
+    Ok(())
+}
+
+/// Runs the test suite on a minimized DFA with byte classes disabled.
+#[test]
+fn minimized_no_byte_class() -> Result<()> {
+    let mut builder = Regex::builder();
+    builder.dense(dense::Config::new().minimize(true).byte_classes(false));
+
+    TestRunner::new()?
+        // These regexes tend to be too big. Minimization takes... forever.
+        .blacklist("expensive")
+        .test_iter(suite()?.iter(), dense_compiler(builder))
+        .assert();
+    Ok(())
+}
+
+/// Runs the test suite on a sparse unminimized DFA.
+#[test]
+fn sparse_unminimized_default() -> Result<()> {
+    let builder = Regex::builder();
+    TestRunner::new()?
+        .test_iter(suite()?.iter(), sparse_compiler(builder))
+        .assert();
+    Ok(())
+}
+
+/// Another basic sanity test that checks we can serialize and then deserialize
+/// a regex, and that the resulting regex can be used for searching correctly.
+#[test]
+fn serialization_unminimized_default() -> Result<()> {
+    let builder = Regex::builder();
+    let my_compiler = |builder| {
+        compiler(builder, |builder, re| {
+            let builder = builder.clone();
+            let (fwd_bytes, _) = re.forward().to_bytes_native_endian();
+            let (rev_bytes, _) = re.reverse().to_bytes_native_endian();
+            Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+                let fwd: dense::DFA<&[u32]> =
+                    dense::DFA::from_bytes(&fwd_bytes).unwrap().0;
+                let rev: dense::DFA<&[u32]> =
+                    dense::DFA::from_bytes(&rev_bytes).unwrap().0;
+                let re = builder.build_from_dfas(fwd, rev);
+
+                run_test(&re, test)
+            }))
+        })
+    };
+    TestRunner::new()?
+        .test_iter(suite()?.iter(), my_compiler(builder))
+        .assert();
+    Ok(())
+}
+
+/// A basic sanity test that checks we can serialize and then deserialize a
+/// regex using sparse DFAs, and that the resulting regex can be used for
+/// searching correctly.
+#[test]
+fn sparse_serialization_unminimized_default() -> Result<()> {
+    let builder = Regex::builder();
+    let my_compiler = |builder| {
+        compiler(builder, |builder, re| {
+            let builder = builder.clone();
+            let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian();
+            let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian();
+            Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+                let fwd: sparse::DFA<&[u8]> =
+                    sparse::DFA::from_bytes(&fwd_bytes).unwrap().0;
+                let rev: sparse::DFA<&[u8]> =
+                    sparse::DFA::from_bytes(&rev_bytes).unwrap().0;
+                let re = builder.build_from_dfas(fwd, rev);
+                run_test(&re, test)
+            }))
+        })
+    };
+    TestRunner::new()?
+        .test_iter(suite()?.iter(), my_compiler(builder))
+        .assert();
+    Ok(())
+}
+
+fn dense_compiler(
+    builder: dfa::regex::Builder,
+) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
+    compiler(builder, |_, re| {
+        Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+            run_test(&re, test)
+        }))
+    })
+}
+
+fn sparse_compiler(
+    builder: dfa::regex::Builder,
+) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
+    compiler(builder, |builder, re| {
+        let fwd = re.forward().to_sparse()?;
+        let rev = re.reverse().to_sparse()?;
+        let re = builder.build_from_dfas(fwd, rev);
+        Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+            run_test(&re, test)
+        }))
+    })
+}
+
+fn compiler(
+    mut builder: dfa::regex::Builder,
+    mut create_matcher: impl FnMut(
+        &dfa::regex::Builder,
+        Regex,
+    ) -> Result<CompiledRegex>,
+) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
+    move |test, regexes| {
+        let regexes = regexes
+            .iter()
+            .map(|r| r.to_str().map(|s| s.to_string()))
+            .collect::<std::result::Result<Vec<String>, _>>()?;
+
+        // Check if our regex contains things that aren't supported by DFAs.
+        // That is, Unicode word boundaries when searching non-ASCII text.
+        let mut thompson = thompson::Builder::new();
+        thompson.configure(config_thompson(test));
+        // TODO: Modify Hir to report facts like this, instead of needing to
+        // build an NFA to do it.
+        if let Ok(nfa) = thompson.build_many(&regexes) {
+            let non_ascii = test.input().iter().any(|&b| !b.is_ascii());
+            if nfa.has_word_boundary_unicode() && non_ascii {
+                return Ok(CompiledRegex::skip());
+            }
+        }
+        if !configure_regex_builder(test, &mut builder) {
+            return Ok(CompiledRegex::skip());
+        }
+        create_matcher(&builder, builder.build_many(&regexes)?)
+    }
+}
+
+fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> {
+    let is_match = if re.is_match(test.input()) {
+        TestResult::matched()
+    } else {
+        TestResult::no_match()
+    };
+    let is_match = is_match.name("is_match");
+
+    let find_matches = match test.search_kind() {
+        TestSearchKind::Earliest => {
+            let it = re
+                .find_earliest_iter(test.input())
+                .take(test.match_limit().unwrap_or(std::usize::MAX))
+                .map(|m| Match {
+                    id: m.pattern().as_usize(),
+                    start: m.start(),
+                    end: m.end(),
+                });
+            TestResult::matches(it).name("find_earliest_iter")
+        }
+        TestSearchKind::Leftmost => {
+            let it = re
+                .find_leftmost_iter(test.input())
+                .take(test.match_limit().unwrap_or(std::usize::MAX))
+                .map(|m| Match {
+                    id: m.pattern().as_usize(),
+                    start: m.start(),
+                    end: m.end(),
+                });
+            TestResult::matches(it).name("find_leftmost_iter")
+        }
+        TestSearchKind::Overlapping => {
+            let it = re
+                .find_overlapping_iter(test.input())
+                .take(test.match_limit().unwrap_or(std::usize::MAX))
+                .map(|m| Match {
+                    id: m.pattern().as_usize(),
+                    start: m.start(),
+                    end: m.end(),
+                });
+            TestResult::matches(it).name("find_overlapping_iter")
+        }
+    };
+
+    vec![is_match, find_matches]
+}
+
+/// Configures the given regex builder with all relevant settings on the given
+/// regex test.
+///
+/// If the regex test has a setting that is unsupported, then this returns
+/// false (implying the test should be skipped).
+fn configure_regex_builder(
+    test: &RegexTest,
+    builder: &mut dfa::regex::Builder,
+) -> bool {
+    let match_kind = match test.match_kind() {
+        TestMatchKind::All => MatchKind::All,
+        TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst,
+        TestMatchKind::LeftmostLongest => return false,
+    };
+
+    let syntax_config = SyntaxConfig::new()
+        .case_insensitive(test.case_insensitive())
+        .unicode(test.unicode())
+        .utf8(test.utf8());
+    let dense_config = dense::Config::new()
+        .anchored(test.anchored())
+        .match_kind(match_kind)
+        .unicode_word_boundary(true);
+    let regex_config = Regex::config().utf8(test.utf8());
+
+    builder
+        .configure(regex_config)
+        .syntax(syntax_config)
+        .thompson(config_thompson(test))
+        .dense(dense_config);
+    true
+}
+
+/// Configuration of a Thompson NFA compiler from a regex test.
+fn config_thompson(test: &RegexTest) -> thompson::Config {
+    thompson::Config::new().utf8(test.utf8())
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 18:31:44 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 18:31:44 +0000
commit	c23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree	2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/regex-automata-0.2.0/tests/dfa
parent	Releasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
download	rustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip