summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex/tests/consistent.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/regex/tests/consistent.rs')
-rw-r--r--third_party/rust/regex/tests/consistent.rs238
1 files changed, 238 insertions, 0 deletions
diff --git a/third_party/rust/regex/tests/consistent.rs b/third_party/rust/regex/tests/consistent.rs
new file mode 100644
index 0000000000..722f2a51a0
--- /dev/null
+++ b/third_party/rust/regex/tests/consistent.rs
@@ -0,0 +1,238 @@
+use regex::internal::ExecBuilder;
+
+/// Given a regex, check if all of the backends produce the same
+/// results on a number of different inputs.
+///
+/// For now this just throws quickcheck at the problem, which
+/// is not very good because it only really tests half of the
+/// problem space. It is pretty unlikely that a random string
+/// will match any given regex, so this will probably just
+/// be checking that the different backends fail in the same
+/// way. This is still worthwhile to test, but is definitely not
+/// the whole story.
+///
+/// TODO(ethan): In order to cover the other half of the problem
+/// space, we should generate a random matching string by inspecting
+/// the AST of the input regex. The right way to do this probably
+/// involves adding a custom Arbitrary instance around a couple
+/// of newtypes. That way we can respect the quickcheck size hinting
+/// and shrinking and whatnot.
+pub fn backends_are_consistent(re: &str) -> Result<u64, String> {
+ let standard_backends = vec![
+ (
+ "bounded_backtracking_re",
+ ExecBuilder::new(re)
+ .bounded_backtracking()
+ .build()
+ .map(|exec| exec.into_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ (
+ "pikevm_re",
+ ExecBuilder::new(re)
+ .nfa()
+ .build()
+ .map(|exec| exec.into_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ (
+ "default_re",
+ ExecBuilder::new(re)
+ .build()
+ .map(|exec| exec.into_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ ];
+
+ let utf8bytes_backends = vec![
+ (
+ "bounded_backtracking_utf8bytes_re",
+ ExecBuilder::new(re)
+ .bounded_backtracking()
+ .bytes(true)
+ .build()
+ .map(|exec| exec.into_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ (
+ "pikevm_utf8bytes_re",
+ ExecBuilder::new(re)
+ .nfa()
+ .bytes(true)
+ .build()
+ .map(|exec| exec.into_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ (
+ "default_utf8bytes_re",
+ ExecBuilder::new(re)
+ .bytes(true)
+ .build()
+ .map(|exec| exec.into_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ ];
+
+ let bytes_backends = vec![
+ (
+ "bounded_backtracking_bytes_re",
+ ExecBuilder::new(re)
+ .bounded_backtracking()
+ .only_utf8(false)
+ .build()
+ .map(|exec| exec.into_byte_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ (
+ "pikevm_bytes_re",
+ ExecBuilder::new(re)
+ .nfa()
+ .only_utf8(false)
+ .build()
+ .map(|exec| exec.into_byte_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ (
+ "default_bytes_re",
+ ExecBuilder::new(re)
+ .only_utf8(false)
+ .build()
+ .map(|exec| exec.into_byte_regex())
+ .map_err(|err| format!("{}", err))?,
+ ),
+ ];
+
+ Ok(string_checker::check_backends(&standard_backends)?
+ + string_checker::check_backends(&utf8bytes_backends)?
+ + bytes_checker::check_backends(&bytes_backends)?)
+}
+
+//
+// A consistency checker parameterized by the input type (&str or &[u8]).
+//
+
+macro_rules! checker {
+ ($module_name:ident, $regex_type:path, $mk_input:expr) => {
+ mod $module_name {
+ use quickcheck;
+ use quickcheck::{Arbitrary, TestResult};
+
+ pub fn check_backends(
+ backends: &[(&str, $regex_type)],
+ ) -> Result<u64, String> {
+ let mut total_passed = 0;
+ for regex in backends[1..].iter() {
+ total_passed += quickcheck_regex_eq(&backends[0], regex)?;
+ }
+
+ Ok(total_passed)
+ }
+
+ fn quickcheck_regex_eq(
+ &(name1, ref re1): &(&str, $regex_type),
+ &(name2, ref re2): &(&str, $regex_type),
+ ) -> Result<u64, String> {
+ quickcheck::QuickCheck::new()
+ .quicktest(RegexEqualityTest::new(
+ re1.clone(),
+ re2.clone(),
+ ))
+ .map_err(|err| {
+ format!(
+ "{}(/{}/) and {}(/{}/) are inconsistent.\
+ QuickCheck Err: {:?}",
+ name1, re1, name2, re2, err
+ )
+ })
+ }
+
+ struct RegexEqualityTest {
+ re1: $regex_type,
+ re2: $regex_type,
+ }
+ impl RegexEqualityTest {
+ fn new(re1: $regex_type, re2: $regex_type) -> Self {
+ RegexEqualityTest { re1: re1, re2: re2 }
+ }
+ }
+
+ impl quickcheck::Testable for RegexEqualityTest {
+ fn result(&self, gen: &mut quickcheck::Gen) -> TestResult {
+ let input = $mk_input(gen);
+ let input = &input;
+
+ if self.re1.find(&input) != self.re2.find(input) {
+ return TestResult::error(format!(
+ "find mismatch input={:?}",
+ input
+ ));
+ }
+
+ let cap1 = self.re1.captures(input);
+ let cap2 = self.re2.captures(input);
+ match (cap1, cap2) {
+ (None, None) => {}
+ (Some(cap1), Some(cap2)) => {
+ for (c1, c2) in cap1.iter().zip(cap2.iter()) {
+ if c1 != c2 {
+ return TestResult::error(format!(
+ "captures mismatch input={:?}",
+ input
+ ));
+ }
+ }
+ }
+ _ => {
+ return TestResult::error(format!(
+ "captures mismatch input={:?}",
+ input
+ ))
+ }
+ }
+
+ let fi1 = self.re1.find_iter(input);
+ let fi2 = self.re2.find_iter(input);
+ for (m1, m2) in fi1.zip(fi2) {
+ if m1 != m2 {
+ return TestResult::error(format!(
+ "find_iter mismatch input={:?}",
+ input
+ ));
+ }
+ }
+
+ let ci1 = self.re1.captures_iter(input);
+ let ci2 = self.re2.captures_iter(input);
+ for (cap1, cap2) in ci1.zip(ci2) {
+ for (c1, c2) in cap1.iter().zip(cap2.iter()) {
+ if c1 != c2 {
+ return TestResult::error(format!(
+ "captures_iter mismatch input={:?}",
+ input
+ ));
+ }
+ }
+ }
+
+ let s1 = self.re1.split(input);
+ let s2 = self.re2.split(input);
+ for (chunk1, chunk2) in s1.zip(s2) {
+ if chunk1 != chunk2 {
+ return TestResult::error(format!(
+ "split mismatch input={:?}",
+ input
+ ));
+ }
+ }
+
+ TestResult::from_bool(true)
+ }
+ }
+ } // mod
+ }; // rule case
+} // macro_rules!
+
+checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen));
+checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary(
+ gen
+));