use regex::internal::ExecBuilder; /// Given a regex, check if all of the backends produce the same /// results on a number of different inputs. /// /// For now this just throws quickcheck at the problem, which /// is not very good because it only really tests half of the /// problem space. It is pretty unlikely that a random string /// will match any given regex, so this will probably just /// be checking that the different backends fail in the same /// way. This is still worthwhile to test, but is definitely not /// the whole story. /// /// TODO(ethan): In order to cover the other half of the problem /// space, we should generate a random matching string by inspecting /// the AST of the input regex. The right way to do this probably /// involves adding a custom Arbitrary instance around a couple /// of newtypes. That way we can respect the quickcheck size hinting /// and shrinking and whatnot. pub fn backends_are_consistent(re: &str) -> Result { let standard_backends = vec![ ( "bounded_backtracking_re", ExecBuilder::new(re) .bounded_backtracking() .build() .map(|exec| exec.into_regex()) .map_err(|err| format!("{}", err))?, ), ( "pikevm_re", ExecBuilder::new(re) .nfa() .build() .map(|exec| exec.into_regex()) .map_err(|err| format!("{}", err))?, ), ( "default_re", ExecBuilder::new(re) .build() .map(|exec| exec.into_regex()) .map_err(|err| format!("{}", err))?, ), ]; let utf8bytes_backends = vec![ ( "bounded_backtracking_utf8bytes_re", ExecBuilder::new(re) .bounded_backtracking() .bytes(true) .build() .map(|exec| exec.into_regex()) .map_err(|err| format!("{}", err))?, ), ( "pikevm_utf8bytes_re", ExecBuilder::new(re) .nfa() .bytes(true) .build() .map(|exec| exec.into_regex()) .map_err(|err| format!("{}", err))?, ), ( "default_utf8bytes_re", ExecBuilder::new(re) .bytes(true) .build() .map(|exec| exec.into_regex()) .map_err(|err| format!("{}", err))?, ), ]; let bytes_backends = vec![ ( "bounded_backtracking_bytes_re", ExecBuilder::new(re) .bounded_backtracking() .only_utf8(false) .build() .map(|exec| exec.into_byte_regex()) .map_err(|err| format!("{}", err))?, ), ( "pikevm_bytes_re", ExecBuilder::new(re) .nfa() .only_utf8(false) .build() .map(|exec| exec.into_byte_regex()) .map_err(|err| format!("{}", err))?, ), ( "default_bytes_re", ExecBuilder::new(re) .only_utf8(false) .build() .map(|exec| exec.into_byte_regex()) .map_err(|err| format!("{}", err))?, ), ]; Ok(string_checker::check_backends(&standard_backends)? + string_checker::check_backends(&utf8bytes_backends)? + bytes_checker::check_backends(&bytes_backends)?) } // // A consistency checker parameterized by the input type (&str or &[u8]). // macro_rules! checker { ($module_name:ident, $regex_type:path, $mk_input:expr) => { mod $module_name { use quickcheck; use quickcheck::{Arbitrary, TestResult}; pub fn check_backends( backends: &[(&str, $regex_type)], ) -> Result { let mut total_passed = 0; for regex in backends[1..].iter() { total_passed += quickcheck_regex_eq(&backends[0], regex)?; } Ok(total_passed) } fn quickcheck_regex_eq( &(name1, ref re1): &(&str, $regex_type), &(name2, ref re2): &(&str, $regex_type), ) -> Result { quickcheck::QuickCheck::new() .quicktest(RegexEqualityTest::new( re1.clone(), re2.clone(), )) .map_err(|err| { format!( "{}(/{}/) and {}(/{}/) are inconsistent.\ QuickCheck Err: {:?}", name1, re1, name2, re2, err ) }) } struct RegexEqualityTest { re1: $regex_type, re2: $regex_type, } impl RegexEqualityTest { fn new(re1: $regex_type, re2: $regex_type) -> Self { RegexEqualityTest { re1: re1, re2: re2 } } } impl quickcheck::Testable for RegexEqualityTest { fn result( &self, gen: &mut G, ) -> TestResult { let input = $mk_input(gen); let input = &input; if self.re1.find(&input) != self.re2.find(input) { return TestResult::error(format!( "find mismatch input={:?}", input )); } let cap1 = self.re1.captures(input); let cap2 = self.re2.captures(input); match (cap1, cap2) { (None, None) => {} (Some(cap1), Some(cap2)) => { for (c1, c2) in cap1.iter().zip(cap2.iter()) { if c1 != c2 { return TestResult::error(format!( "captures mismatch input={:?}", input )); } } } _ => { return TestResult::error(format!( "captures mismatch input={:?}", input )) } } let fi1 = self.re1.find_iter(input); let fi2 = self.re2.find_iter(input); for (m1, m2) in fi1.zip(fi2) { if m1 != m2 { return TestResult::error(format!( "find_iter mismatch input={:?}", input )); } } let ci1 = self.re1.captures_iter(input); let ci2 = self.re2.captures_iter(input); for (cap1, cap2) in ci1.zip(ci2) { for (c1, c2) in cap1.iter().zip(cap2.iter()) { if c1 != c2 { return TestResult::error(format!( "captures_iter mismatch input={:?}", input )); } } } let s1 = self.re1.split(input); let s2 = self.re2.split(input); for (chunk1, chunk2) in s1.zip(s2) { if chunk1 != chunk2 { return TestResult::error(format!( "split mismatch input={:?}", input )); } } TestResult::from_bool(true) } } } // mod }; // rule case } // macro_rules! checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen)); checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::::arbitrary( gen ));