summaryrefslogtreecommitdiffstats
path: root/vendor/regex-automata/tests/dfa/suite.rs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--vendor/regex-automata/tests/dfa/suite.rs280
1 files changed, 280 insertions, 0 deletions
diff --git a/vendor/regex-automata/tests/dfa/suite.rs b/vendor/regex-automata/tests/dfa/suite.rs
new file mode 100644
index 000000000..426ae346d
--- /dev/null
+++ b/vendor/regex-automata/tests/dfa/suite.rs
@@ -0,0 +1,280 @@
+use regex_automata::{
+ dfa::{self, dense, regex::Regex, sparse, Automaton},
+ nfa::thompson,
+ MatchKind, SyntaxConfig,
+};
+use regex_syntax as syntax;
+
+use regex_test::{
+ bstr::{BString, ByteSlice},
+ CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
+ SearchKind as TestSearchKind, TestResult, TestRunner,
+};
+
+use crate::{suite, Result};
+
+/// Runs the test suite with the default configuration.
+#[test]
+fn unminimized_default() -> Result<()> {
+ let builder = Regex::builder();
+ TestRunner::new()?
+ .test_iter(suite()?.iter(), dense_compiler(builder))
+ .assert();
+ Ok(())
+}
+
+/// Runs the test suite with byte classes disabled.
+#[test]
+fn unminimized_no_byte_class() -> Result<()> {
+ let mut builder = Regex::builder();
+ builder.dense(dense::Config::new().byte_classes(false));
+
+ TestRunner::new()?
+ .test_iter(suite()?.iter(), dense_compiler(builder))
+ .assert();
+ Ok(())
+}
+
+/// Runs the test suite with NFA shrinking disabled.
+#[test]
+fn unminimized_no_nfa_shrink() -> Result<()> {
+ let mut builder = Regex::builder();
+ builder.thompson(thompson::Config::new().shrink(false));
+
+ TestRunner::new()?
+ .test_iter(suite()?.iter(), dense_compiler(builder))
+ .assert();
+ Ok(())
+}
+
+/// Runs the test suite on a minimized DFA with an otherwise default
+/// configuration.
+#[test]
+fn minimized_default() -> Result<()> {
+ let mut builder = Regex::builder();
+ builder.dense(dense::Config::new().minimize(true));
+ TestRunner::new()?
+ // These regexes tend to be too big. Minimization takes... forever.
+ .blacklist("expensive")
+ .test_iter(suite()?.iter(), dense_compiler(builder))
+ .assert();
+ Ok(())
+}
+
+/// Runs the test suite on a minimized DFA with byte classes disabled.
+#[test]
+fn minimized_no_byte_class() -> Result<()> {
+ let mut builder = Regex::builder();
+ builder.dense(dense::Config::new().minimize(true).byte_classes(false));
+
+ TestRunner::new()?
+ // These regexes tend to be too big. Minimization takes... forever.
+ .blacklist("expensive")
+ .test_iter(suite()?.iter(), dense_compiler(builder))
+ .assert();
+ Ok(())
+}
+
+/// Runs the test suite on a sparse unminimized DFA.
+#[test]
+fn sparse_unminimized_default() -> Result<()> {
+ let builder = Regex::builder();
+ TestRunner::new()?
+ .test_iter(suite()?.iter(), sparse_compiler(builder))
+ .assert();
+ Ok(())
+}
+
+/// Another basic sanity test that checks we can serialize and then deserialize
+/// a regex, and that the resulting regex can be used for searching correctly.
+#[test]
+fn serialization_unminimized_default() -> Result<()> {
+ let builder = Regex::builder();
+ let my_compiler = |builder| {
+ compiler(builder, |builder, re| {
+ let builder = builder.clone();
+ let (fwd_bytes, _) = re.forward().to_bytes_native_endian();
+ let (rev_bytes, _) = re.reverse().to_bytes_native_endian();
+ Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+ let fwd: dense::DFA<&[u32]> =
+ dense::DFA::from_bytes(&fwd_bytes).unwrap().0;
+ let rev: dense::DFA<&[u32]> =
+ dense::DFA::from_bytes(&rev_bytes).unwrap().0;
+ let re = builder.build_from_dfas(fwd, rev);
+
+ run_test(&re, test)
+ }))
+ })
+ };
+ TestRunner::new()?
+ .test_iter(suite()?.iter(), my_compiler(builder))
+ .assert();
+ Ok(())
+}
+
+/// A basic sanity test that checks we can serialize and then deserialize a
+/// regex using sparse DFAs, and that the resulting regex can be used for
+/// searching correctly.
+#[test]
+fn sparse_serialization_unminimized_default() -> Result<()> {
+ let builder = Regex::builder();
+ let my_compiler = |builder| {
+ compiler(builder, |builder, re| {
+ let builder = builder.clone();
+ let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian();
+ let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian();
+ Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+ let fwd: sparse::DFA<&[u8]> =
+ sparse::DFA::from_bytes(&fwd_bytes).unwrap().0;
+ let rev: sparse::DFA<&[u8]> =
+ sparse::DFA::from_bytes(&rev_bytes).unwrap().0;
+ let re = builder.build_from_dfas(fwd, rev);
+ run_test(&re, test)
+ }))
+ })
+ };
+ TestRunner::new()?
+ .test_iter(suite()?.iter(), my_compiler(builder))
+ .assert();
+ Ok(())
+}
+
+fn dense_compiler(
+ builder: dfa::regex::Builder,
+) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
+ compiler(builder, |_, re| {
+ Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+ run_test(&re, test)
+ }))
+ })
+}
+
+fn sparse_compiler(
+ builder: dfa::regex::Builder,
+) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
+ compiler(builder, |builder, re| {
+ let fwd = re.forward().to_sparse()?;
+ let rev = re.reverse().to_sparse()?;
+ let re = builder.build_from_dfas(fwd, rev);
+ Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
+ run_test(&re, test)
+ }))
+ })
+}
+
+fn compiler(
+ mut builder: dfa::regex::Builder,
+ mut create_matcher: impl FnMut(
+ &dfa::regex::Builder,
+ Regex,
+ ) -> Result<CompiledRegex>,
+) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
+ move |test, regexes| {
+ let regexes = regexes
+ .iter()
+ .map(|r| r.to_str().map(|s| s.to_string()))
+ .collect::<std::result::Result<Vec<String>, _>>()?;
+
+ // Check if our regex contains things that aren't supported by DFAs.
+ // That is, Unicode word boundaries when searching non-ASCII text.
+ let mut thompson = thompson::Builder::new();
+ thompson.configure(config_thompson(test));
+ // TODO: Modify Hir to report facts like this, instead of needing to
+ // build an NFA to do it.
+ if let Ok(nfa) = thompson.build_many(&regexes) {
+ let non_ascii = test.input().iter().any(|&b| !b.is_ascii());
+ if nfa.has_word_boundary_unicode() && non_ascii {
+ return Ok(CompiledRegex::skip());
+ }
+ }
+ if !configure_regex_builder(test, &mut builder) {
+ return Ok(CompiledRegex::skip());
+ }
+ create_matcher(&builder, builder.build_many(&regexes)?)
+ }
+}
+
+fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> {
+ let is_match = if re.is_match(test.input()) {
+ TestResult::matched()
+ } else {
+ TestResult::no_match()
+ };
+ let is_match = is_match.name("is_match");
+
+ let find_matches = match test.search_kind() {
+ TestSearchKind::Earliest => {
+ let it = re
+ .find_earliest_iter(test.input())
+ .take(test.match_limit().unwrap_or(std::usize::MAX))
+ .map(|m| Match {
+ id: m.pattern().as_usize(),
+ start: m.start(),
+ end: m.end(),
+ });
+ TestResult::matches(it).name("find_earliest_iter")
+ }
+ TestSearchKind::Leftmost => {
+ let it = re
+ .find_leftmost_iter(test.input())
+ .take(test.match_limit().unwrap_or(std::usize::MAX))
+ .map(|m| Match {
+ id: m.pattern().as_usize(),
+ start: m.start(),
+ end: m.end(),
+ });
+ TestResult::matches(it).name("find_leftmost_iter")
+ }
+ TestSearchKind::Overlapping => {
+ let it = re
+ .find_overlapping_iter(test.input())
+ .take(test.match_limit().unwrap_or(std::usize::MAX))
+ .map(|m| Match {
+ id: m.pattern().as_usize(),
+ start: m.start(),
+ end: m.end(),
+ });
+ TestResult::matches(it).name("find_overlapping_iter")
+ }
+ };
+
+ vec![is_match, find_matches]
+}
+
+/// Configures the given regex builder with all relevant settings on the given
+/// regex test.
+///
+/// If the regex test has a setting that is unsupported, then this returns
+/// false (implying the test should be skipped).
+fn configure_regex_builder(
+ test: &RegexTest,
+ builder: &mut dfa::regex::Builder,
+) -> bool {
+ let match_kind = match test.match_kind() {
+ TestMatchKind::All => MatchKind::All,
+ TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst,
+ TestMatchKind::LeftmostLongest => return false,
+ };
+
+ let syntax_config = SyntaxConfig::new()
+ .case_insensitive(test.case_insensitive())
+ .unicode(test.unicode())
+ .utf8(test.utf8());
+ let dense_config = dense::Config::new()
+ .anchored(test.anchored())
+ .match_kind(match_kind)
+ .unicode_word_boundary(true);
+ let regex_config = Regex::config().utf8(test.utf8());
+
+ builder
+ .configure(regex_config)
+ .syntax(syntax_config)
+ .thompson(config_thompson(test))
+ .dense(dense_config);
+ true
+}
+
+/// Configuration of a Thompson NFA compiler from a regex test.
+fn config_thompson(test: &RegexTest) -> thompson::Config {
+ thompson::Config::new().utf8(test.utf8())
+}