diff options
Diffstat (limited to 'vendor/memchr/src/tests')
-rw-r--r-- | vendor/memchr/src/tests/memchr/iter.rs | 230 | ||||
-rw-r--r-- | vendor/memchr/src/tests/memchr/memchr.rs | 134 | ||||
-rw-r--r-- | vendor/memchr/src/tests/memchr/mod.rs | 314 | ||||
-rw-r--r-- | vendor/memchr/src/tests/memchr/naive.rs | 33 | ||||
-rw-r--r-- | vendor/memchr/src/tests/memchr/prop.rs | 321 | ||||
-rw-r--r-- | vendor/memchr/src/tests/memchr/simple.rs | 23 | ||||
-rw-r--r-- | vendor/memchr/src/tests/memchr/testdata.rs | 351 | ||||
-rw-r--r-- | vendor/memchr/src/tests/mod.rs | 18 | ||||
-rw-r--r-- | vendor/memchr/src/tests/packedpair.rs | 216 | ||||
-rw-r--r-- | vendor/memchr/src/tests/substring/mod.rs | 232 | ||||
-rw-r--r-- | vendor/memchr/src/tests/substring/naive.rs | 45 | ||||
-rw-r--r-- | vendor/memchr/src/tests/substring/prop.rs | 126 |
12 files changed, 1289 insertions, 754 deletions
diff --git a/vendor/memchr/src/tests/memchr/iter.rs b/vendor/memchr/src/tests/memchr/iter.rs deleted file mode 100644 index 80ea5c279..000000000 --- a/vendor/memchr/src/tests/memchr/iter.rs +++ /dev/null @@ -1,230 +0,0 @@ -use quickcheck::quickcheck; - -use crate::{tests::memchr::testdata::memchr_tests, Memchr, Memchr2, Memchr3}; - -#[test] -fn memchr1_iter() { - for test in memchr_tests() { - test.iter_one(false, Memchr::new); - } -} - -#[test] -fn memchr2_iter() { - for test in memchr_tests() { - test.iter_two(false, Memchr2::new); - } -} - -#[test] -fn memchr3_iter() { - for test in memchr_tests() { - test.iter_three(false, Memchr3::new); - } -} - -#[test] -fn memrchr1_iter() { - for test in memchr_tests() { - test.iter_one(true, |n1, corpus| Memchr::new(n1, corpus).rev()); - } -} - -#[test] -fn memrchr2_iter() { - for test in memchr_tests() { - test.iter_two(true, |n1, n2, corpus| { - Memchr2::new(n1, n2, corpus).rev() - }) - } -} - -#[test] -fn memrchr3_iter() { - for test in memchr_tests() { - test.iter_three(true, |n1, n2, n3, corpus| { - Memchr3::new(n1, n2, n3, corpus).rev() - }) - } -} - -quickcheck! { - fn qc_memchr_double_ended_iter( - needle: u8, data: Vec<u8>, take_side: Vec<bool> - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr::new(needle, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found.iter().cloned().eq(positions1(needle, &data)) - } - - fn qc_memchr2_double_ended_iter( - needle1: u8, needle2: u8, data: Vec<u8>, take_side: Vec<bool> - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr2::new(needle1, needle2, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found.iter().cloned().eq(positions2(needle1, needle2, &data)) - } - - fn qc_memchr3_double_ended_iter( - needle1: u8, needle2: u8, needle3: u8, - data: Vec<u8>, take_side: Vec<bool> - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr3::new(needle1, needle2, needle3, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found - .iter() - .cloned() - .eq(positions3(needle1, needle2, needle3, &data)) - } - - fn qc_memchr1_iter(data: Vec<u8>) -> bool { - let needle = 0; - let answer = positions1(needle, &data); - answer.eq(Memchr::new(needle, &data)) - } - - fn qc_memchr1_rev_iter(data: Vec<u8>) -> bool { - let needle = 0; - let answer = positions1(needle, &data); - answer.rev().eq(Memchr::new(needle, &data).rev()) - } - - fn qc_memchr2_iter(data: Vec<u8>) -> bool { - let needle1 = 0; - let needle2 = 1; - let answer = positions2(needle1, needle2, &data); - answer.eq(Memchr2::new(needle1, needle2, &data)) - } - - fn qc_memchr2_rev_iter(data: Vec<u8>) -> bool { - let needle1 = 0; - let needle2 = 1; - let answer = positions2(needle1, needle2, &data); - answer.rev().eq(Memchr2::new(needle1, needle2, &data).rev()) - } - - fn qc_memchr3_iter(data: Vec<u8>) -> bool { - let needle1 = 0; - let needle2 = 1; - let needle3 = 2; - let answer = positions3(needle1, needle2, needle3, &data); - answer.eq(Memchr3::new(needle1, needle2, needle3, &data)) - } - - fn qc_memchr3_rev_iter(data: Vec<u8>) -> bool { - let needle1 = 0; - let needle2 = 1; - let needle3 = 2; - let answer = positions3(needle1, needle2, needle3, &data); - answer.rev().eq(Memchr3::new(needle1, needle2, needle3, &data).rev()) - } - - fn qc_memchr1_iter_size_hint(data: Vec<u8>) -> bool { - // test that the size hint is within reasonable bounds - let needle = 0; - let mut iter = Memchr::new(needle, &data); - let mut real_count = data - .iter() - .filter(|&&elt| elt == needle) - .count(); - - while let Some(index) = iter.next() { - real_count -= 1; - let (lower, upper) = iter.size_hint(); - assert!(lower <= real_count); - assert!(upper.unwrap() >= real_count); - assert!(upper.unwrap() <= data.len() - index); - } - true - } -} - -// take items from a DEI, taking front for each true and back for each false. -// Return a vector with the concatenation of the fronts and the reverse of the -// backs. -fn double_ended_take<I, J>(mut iter: I, take_side: J) -> Vec<I::Item> -where - I: DoubleEndedIterator, - J: Iterator<Item = bool>, -{ - let mut found_front = Vec::new(); - let mut found_back = Vec::new(); - - for take_front in take_side { - if take_front { - if let Some(pos) = iter.next() { - found_front.push(pos); - } else { - break; - } - } else { - if let Some(pos) = iter.next_back() { - found_back.push(pos); - } else { - break; - } - }; - } - - let mut all_found = found_front; - all_found.extend(found_back.into_iter().rev()); - all_found -} - -// return an iterator of the 0-based indices of haystack that match the needle -fn positions1<'a>( - n1: u8, - haystack: &'a [u8], -) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1) - .map(|t| t.0); - Box::new(it) -} - -fn positions2<'a>( - n1: u8, - n2: u8, - haystack: &'a [u8], -) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1 || b == n2) - .map(|t| t.0); - Box::new(it) -} - -fn positions3<'a>( - n1: u8, - n2: u8, - n3: u8, - haystack: &'a [u8], -) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) - .map(|t| t.0); - Box::new(it) -} diff --git a/vendor/memchr/src/tests/memchr/memchr.rs b/vendor/memchr/src/tests/memchr/memchr.rs deleted file mode 100644 index ac955ed68..000000000 --- a/vendor/memchr/src/tests/memchr/memchr.rs +++ /dev/null @@ -1,134 +0,0 @@ -use quickcheck::quickcheck; - -use crate::{ - memchr, - memchr::{fallback, naive}, - memchr2, memchr3, memrchr, memrchr2, memrchr3, - tests::memchr::testdata::memchr_tests, -}; - -#[test] -fn memchr1_find() { - for test in memchr_tests() { - test.one(false, memchr); - } -} - -#[test] -fn memchr1_fallback_find() { - for test in memchr_tests() { - test.one(false, fallback::memchr); - } -} - -#[test] -fn memchr2_find() { - for test in memchr_tests() { - test.two(false, memchr2); - } -} - -#[test] -fn memchr2_fallback_find() { - for test in memchr_tests() { - test.two(false, fallback::memchr2); - } -} - -#[test] -fn memchr3_find() { - for test in memchr_tests() { - test.three(false, memchr3); - } -} - -#[test] -fn memchr3_fallback_find() { - for test in memchr_tests() { - test.three(false, fallback::memchr3); - } -} - -#[test] -fn memrchr1_find() { - for test in memchr_tests() { - test.one(true, memrchr); - } -} - -#[test] -fn memrchr1_fallback_find() { - for test in memchr_tests() { - test.one(true, fallback::memrchr); - } -} - -#[test] -fn memrchr2_find() { - for test in memchr_tests() { - test.two(true, memrchr2); - } -} - -#[test] -fn memrchr2_fallback_find() { - for test in memchr_tests() { - test.two(true, fallback::memrchr2); - } -} - -#[test] -fn memrchr3_find() { - for test in memchr_tests() { - test.three(true, memrchr3); - } -} - -#[test] -fn memrchr3_fallback_find() { - for test in memchr_tests() { - test.three(true, fallback::memrchr3); - } -} - -quickcheck! { - fn qc_memchr1_matches_naive(n1: u8, corpus: Vec<u8>) -> bool { - memchr(n1, &corpus) == naive::memchr(n1, &corpus) - } -} - -quickcheck! { - fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> bool { - memchr2(n1, n2, &corpus) == naive::memchr2(n1, n2, &corpus) - } -} - -quickcheck! { - fn qc_memchr3_matches_naive( - n1: u8, n2: u8, n3: u8, - corpus: Vec<u8> - ) -> bool { - memchr3(n1, n2, n3, &corpus) == naive::memchr3(n1, n2, n3, &corpus) - } -} - -quickcheck! { - fn qc_memrchr1_matches_naive(n1: u8, corpus: Vec<u8>) -> bool { - memrchr(n1, &corpus) == naive::memrchr(n1, &corpus) - } -} - -quickcheck! { - fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> bool { - memrchr2(n1, n2, &corpus) == naive::memrchr2(n1, n2, &corpus) - } -} - -quickcheck! { - fn qc_memrchr3_matches_naive( - n1: u8, n2: u8, n3: u8, - corpus: Vec<u8> - ) -> bool { - memrchr3(n1, n2, n3, &corpus) == naive::memrchr3(n1, n2, n3, &corpus) - } -} diff --git a/vendor/memchr/src/tests/memchr/mod.rs b/vendor/memchr/src/tests/memchr/mod.rs index 79f94ab56..0564ad4fb 100644 --- a/vendor/memchr/src/tests/memchr/mod.rs +++ b/vendor/memchr/src/tests/memchr/mod.rs @@ -1,7 +1,307 @@ -#[cfg(all(feature = "std", not(miri)))] -mod iter; -#[cfg(all(feature = "std", not(miri)))] -mod memchr; -mod simple; -#[cfg(all(feature = "std", not(miri)))] -mod testdata; +use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::ext::Byte; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ + Seed { haystack: "a", needles: &[b'a'], positions: &[0] }, + Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] }, + Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] }, + Seed { haystack: "", needles: &[b'a'], positions: &[] }, + Seed { haystack: "z", needles: &[b'a'], positions: &[] }, + Seed { haystack: "zz", needles: &[b'a'], positions: &[] }, + Seed { haystack: "zza", needles: &[b'a'], positions: &[2] }, + Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] }, + Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] }, + Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] }, + Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] }, + Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] }, + Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] }, + Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] }, + Seed { + haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", + needles: &[b'a'], + positions: &[32], + }, + // two needles (applied to memchr2 + memchr3) + Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] }, + Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] }, + Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] }, + Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] }, + Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] }, + // three needles (applied to memchr3) + Seed { + haystack: "xyz", + needles: &[b'x', b'y', b'z'], + positions: &[0, 1, 2], + }, + Seed { + haystack: "zxy", + needles: &[b'x', b'y', b'z'], + positions: &[0, 1, 2], + }, + Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] }, + Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] }, +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { + needle_len: usize, +} + +impl Runner { + /// Create a new test runner for forward and reverse byte search + /// implementations. + /// + /// The `needle_len` given must be at most `3` and at least `1`. It + /// corresponds to the number of needle bytes to search for. + pub(crate) fn new(needle_len: usize) -> Runner { + assert!(needle_len >= 1, "needle_len must be at least 1"); + assert!(needle_len <= 3, "needle_len must be at most 3"); + Runner { needle_len } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + pub(crate) fn forward_iter<F>(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let results = match test(t.haystack.as_bytes(), &t.needles) { + None => continue, + Some(results) => results, + }; + assert_eq!( + t.expected, + results, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::<Vec<char>>(), + t.haystack, + ); + } + } + } + + /// Run all tests in the reverse direction. This panics on the first + /// failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + pub(crate) fn reverse_iter<F>(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let mut results = match test(t.haystack.as_bytes(), &t.needles) + { + None => continue, + Some(results) => results, + }; + results.reverse(); + assert_eq!( + t.expected, + results, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::<Vec<char>>(), + t.haystack, + ); + } + } + } + + /// Run all tests as counting tests. This panics on the first failure. + /// + /// That is, this only checks that the number of matches is correct and + /// not whether the offsets of each match are. + pub(crate) fn count_iter<F>(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let got = match test(t.haystack.as_bytes(), &t.needles) { + None => continue, + Some(got) => got, + }; + assert_eq!( + t.expected.len(), + got, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::<Vec<char>>(), + t.haystack, + ); + } + } + } + + /// Like `Runner::forward`, but for a function that returns only the next + /// match and not all matches. + /// + /// If the function returns `None`, then it is skipped. + pub(crate) fn forward_oneshot<F>(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, + { + self.forward_iter(move |haystack, needles| { + let mut start = 0; + let mut results = vec![]; + while let Some(i) = test(&haystack[start..], needles)? { + results.push(start + i); + start += i + 1; + } + Some(results) + }) + } + + /// Like `Runner::reverse`, but for a function that returns only the last + /// match and not all matches. + /// + /// If the function returns `None`, then it is skipped. + pub(crate) fn reverse_oneshot<F>(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, + { + self.reverse_iter(move |haystack, needles| { + let mut end = haystack.len(); + let mut results = vec![]; + while let Some(i) = test(&haystack[..end], needles)? { + results.push(i); + end = i; + } + Some(results) + }) + } +} + +/// A single test for memr?chr{,2,3}. +#[derive(Clone, Debug)] +struct Test { + /// The string to search in. + haystack: String, + /// The needles to look for. + needles: Vec<u8>, + /// The offsets that are expected to be found for all needles in the + /// forward direction. + expected: Vec<usize>, +} + +impl Test { + fn new(seed: &Seed) -> Test { + Test { + haystack: seed.haystack.to_string(), + needles: seed.needles.to_vec(), + expected: seed.positions.to_vec(), + } + } +} + +/// Data that can be expanded into many memchr tests by padding out the corpus. +#[derive(Clone, Debug)] +struct Seed { + /// The thing to search. We use `&str` instead of `&[u8]` because they + /// are nicer to write in tests, and we don't miss much since memchr + /// doesn't care about UTF-8. + /// + /// Corpora cannot contain either '%' or '#'. We use these bytes when + /// expanding test cases into many test cases, and we assume they are not + /// used. If they are used, `memchr_tests` will panic. + haystack: &'static str, + /// The needles to search for. This is intended to be an alternation of + /// needles. The number of needles may cause this test to be skipped for + /// some memchr variants. For example, a test with 2 needles cannot be used + /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. + /// However, a test with only 1 needle can be used to test all of `memchr`, + /// `memchr2` and `memchr3`. We achieve this by filling in the needles with + /// bytes that we never used in the corpus (such as '#'). + needles: &'static [u8], + /// The positions expected to match for all of the needles. + positions: &'static [usize], +} + +impl Seed { + /// Controls how much we expand the haystack on either side for each test. + /// We lower this on Miri because otherwise running the tests would take + /// forever. + const EXPAND_LEN: usize = { + #[cfg(not(miri))] + { + 515 + } + #[cfg(miri)] + { + 6 + } + }; + + /// Expand this test into many variations of the same test. + /// + /// In particular, this will generate more tests with larger corpus sizes. + /// The expected positions are updated to maintain the integrity of the + /// test. + /// + /// This is important in testing a memchr implementation, because there are + /// often different cases depending on the length of the corpus. + /// + /// Note that we extend the corpus by adding `%` bytes, which we + /// don't otherwise use as a needle. + fn generate(&self) -> impl Iterator<Item = Test> { + let mut more = vec![]; + + // Add bytes to the start of the corpus. + for i in 0..Seed::EXPAND_LEN { + let mut t = Test::new(self); + let mut new: String = core::iter::repeat('%').take(i).collect(); + new.push_str(&t.haystack); + t.haystack = new; + t.expected = t.expected.into_iter().map(|p| p + i).collect(); + more.push(t); + } + // Add bytes to the end of the corpus. + for i in 1..Seed::EXPAND_LEN { + let mut t = Test::new(self); + let padding: String = core::iter::repeat('%').take(i).collect(); + t.haystack.push_str(&padding); + more.push(t); + } + + more.into_iter() + } +} diff --git a/vendor/memchr/src/tests/memchr/naive.rs b/vendor/memchr/src/tests/memchr/naive.rs new file mode 100644 index 000000000..6ebcdaea7 --- /dev/null +++ b/vendor/memchr/src/tests/memchr/naive.rs @@ -0,0 +1,33 @@ +pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> { + haystack.iter().position(|&b| b == n1) +} + +pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> { + haystack.iter().position(|&b| b == n1 || b == n2) +} + +pub(crate) fn memchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], +) -> Option<usize> { + haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) +} + +pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> { + haystack.iter().rposition(|&b| b == n1) +} + +pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> { + haystack.iter().rposition(|&b| b == n1 || b == n2) +} + +pub(crate) fn memrchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], +) -> Option<usize> { + haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) +} diff --git a/vendor/memchr/src/tests/memchr/prop.rs b/vendor/memchr/src/tests/memchr/prop.rs new file mode 100644 index 000000000..b9882602b --- /dev/null +++ b/vendor/memchr/src/tests/memchr/prop.rs @@ -0,0 +1,321 @@ +#[cfg(miri)] +#[macro_export] +macro_rules! define_memchr_quickcheck { + ($($tt:tt)*) => {}; +} + +#[cfg(not(miri))] +#[macro_export] +macro_rules! define_memchr_quickcheck { + ($mod:ident) => { + define_memchr_quickcheck!($mod, new); + }; + ($mod:ident, $cons:ident) => { + use alloc::vec::Vec; + + use quickcheck::TestResult; + + use crate::tests::memchr::{ + naive, + prop::{double_ended_take, naive1_iter, naive2_iter, naive3_iter}, + }; + + quickcheck::quickcheck! { + fn qc_memchr_matches_naive(n1: u8, corpus: Vec<u8>) -> TestResult { + let expected = naive::memchr(n1, &corpus); + let got = match $mod::One::$cons(n1) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr_matches_naive(n1: u8, corpus: Vec<u8>) -> TestResult { + let expected = naive::memrchr(n1, &corpus); + let got = match $mod::One::$cons(n1) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> TestResult { + let expected = naive::memchr2(n1, n2, &corpus); + let got = match $mod::Two::$cons(n1, n2) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> TestResult { + let expected = naive::memrchr2(n1, n2, &corpus); + let got = match $mod::Two::$cons(n1, n2) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr3_matches_naive( + n1: u8, n2: u8, n3: u8, + corpus: Vec<u8> + ) -> TestResult { + let expected = naive::memchr3(n1, n2, n3, &corpus); + let got = match $mod::Three::$cons(n1, n2, n3) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr3_matches_naive( + n1: u8, n2: u8, n3: u8, + corpus: Vec<u8> + ) -> TestResult { + let expected = naive::memrchr3(n1, n2, n3, &corpus); + let got = match $mod::Three::$cons(n1, n2, n3) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr_double_ended_iter( + needle: u8, data: Vec<u8>, take_side: Vec<bool> + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive1_iter(needle, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr2_double_ended_iter( + needle1: u8, needle2: u8, data: Vec<u8>, take_side: Vec<bool> + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive2_iter(needle1, needle2, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr3_double_ended_iter( + needle1: u8, needle2: u8, needle3: u8, + data: Vec<u8>, take_side: Vec<bool> + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive3_iter(needle1, needle2, needle3, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr1_iter(data: Vec<u8>) -> TestResult { + let needle = 0; + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive1_iter(needle, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr1_rev_iter(data: Vec<u8>) -> TestResult { + let needle = 0; + + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive1_iter(needle, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr2_iter(data: Vec<u8>) -> TestResult { + let needle1 = 0; + let needle2 = 1; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive2_iter(needle1, needle2, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr2_rev_iter(data: Vec<u8>) -> TestResult { + let needle1 = 0; + let needle2 = 1; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive2_iter(needle1, needle2, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr3_iter(data: Vec<u8>) -> TestResult { + let needle1 = 0; + let needle2 = 1; + let needle3 = 2; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive3_iter(needle1, needle2, needle3, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr3_rev_iter(data: Vec<u8>) -> TestResult { + let needle1 = 0; + let needle2 = 1; + let needle3 = 2; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive3_iter(needle1, needle2, needle3, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr1_iter_size_hint(data: Vec<u8>) -> TestResult { + // test that the size hint is within reasonable bounds + let needle = 0; + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let mut iter = finder.iter(&data); + let mut real_count = data + .iter() + .filter(|&&elt| elt == needle) + .count(); + + while let Some(index) = iter.next() { + real_count -= 1; + let (lower, upper) = iter.size_hint(); + assert!(lower <= real_count); + assert!(upper.unwrap() >= real_count); + assert!(upper.unwrap() <= data.len() - index); + } + TestResult::passed() + } + } + }; +} + +// take items from a DEI, taking front for each true and back for each false. +// Return a vector with the concatenation of the fronts and the reverse of the +// backs. +#[cfg(not(miri))] +pub(crate) fn double_ended_take<I, J>( + mut iter: I, + take_side: J, +) -> alloc::vec::Vec<I::Item> +where + I: DoubleEndedIterator, + J: Iterator<Item = bool>, +{ + let mut found_front = alloc::vec![]; + let mut found_back = alloc::vec![]; + + for take_front in take_side { + if take_front { + if let Some(pos) = iter.next() { + found_front.push(pos); + } else { + break; + } + } else { + if let Some(pos) = iter.next_back() { + found_back.push(pos); + } else { + break; + } + }; + } + + let mut all_found = found_front; + all_found.extend(found_back.into_iter().rev()); + all_found +} + +// return an iterator of the 0-based indices of haystack that match the needle +#[cfg(not(miri))] +pub(crate) fn naive1_iter<'a>( + n1: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator<Item = usize> + 'a { + haystack.iter().enumerate().filter(move |&(_, &b)| b == n1).map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive2_iter<'a>( + n1: u8, + n2: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator<Item = usize> + 'a { + haystack + .iter() + .enumerate() + .filter(move |&(_, &b)| b == n1 || b == n2) + .map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive3_iter<'a>( + n1: u8, + n2: u8, + n3: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator<Item = usize> + 'a { + haystack + .iter() + .enumerate() + .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) + .map(|t| t.0) +} diff --git a/vendor/memchr/src/tests/memchr/simple.rs b/vendor/memchr/src/tests/memchr/simple.rs deleted file mode 100644 index bed5b4863..000000000 --- a/vendor/memchr/src/tests/memchr/simple.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Simple tests using MIRI. These are intended only to be a simple exercise of -// memchr when tests are run under miri. These are mostly necessary because the -// other tests are far more extensive and take too long to run under miri. -// -// These tests are also run when the 'std' feature is not enabled. - -use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; - -#[test] -fn simple() { - assert_eq!(memchr(b'a', b"abcda"), Some(0)); - assert_eq!(memchr(b'z', b"abcda"), None); - assert_eq!(memchr2(b'a', b'z', b"abcda"), Some(0)); - assert_eq!(memchr2(b'z', b'y', b"abcda"), None); - assert_eq!(memchr3(b'a', b'z', b'b', b"abcda"), Some(0)); - assert_eq!(memchr3(b'z', b'y', b'x', b"abcda"), None); - assert_eq!(memrchr(b'a', b"abcda"), Some(4)); - assert_eq!(memrchr(b'z', b"abcda"), None); - assert_eq!(memrchr2(b'a', b'z', b"abcda"), Some(4)); - assert_eq!(memrchr2(b'z', b'y', b"abcda"), None); - assert_eq!(memrchr3(b'a', b'z', b'b', b"abcda"), Some(4)); - assert_eq!(memrchr3(b'z', b'y', b'x', b"abcda"), None); -} diff --git a/vendor/memchr/src/tests/memchr/testdata.rs b/vendor/memchr/src/tests/memchr/testdata.rs deleted file mode 100644 index 6dda5246f..000000000 --- a/vendor/memchr/src/tests/memchr/testdata.rs +++ /dev/null @@ -1,351 +0,0 @@ -use std::iter::repeat; - -/// Create a sequence of tests that should be run by memchr implementations. -pub fn memchr_tests() -> Vec<MemchrTest> { - let mut tests = Vec::new(); - for statict in MEMCHR_TESTS { - assert!(!statict.corpus.contains("%"), "% is not allowed in corpora"); - assert!(!statict.corpus.contains("#"), "# is not allowed in corpora"); - assert!(!statict.needles.contains(&b'%'), "% is an invalid needle"); - assert!(!statict.needles.contains(&b'#'), "# is an invalid needle"); - - let t = MemchrTest { - corpus: statict.corpus.to_string(), - needles: statict.needles.to_vec(), - positions: statict.positions.to_vec(), - }; - tests.push(t.clone()); - tests.extend(t.expand()); - } - tests -} - -/// A set of tests for memchr-like functions. -/// -/// These tests mostly try to cover the short string cases. We cover the longer -/// string cases via the benchmarks (which are tests themselves), via -/// quickcheck tests and via automatic expansion of each test case (by -/// increasing the corpus size). Finally, we cover different alignment cases -/// in the tests by varying the starting point of the slice. -const MEMCHR_TESTS: &[MemchrTestStatic] = &[ - // one needle (applied to memchr + memchr2 + memchr3) - MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] }, - MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] }, - MemchrTestStatic { - corpus: "aaa", - needles: &[b'a'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] }, - MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] }, - MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] }, - MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] }, - MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] }, - MemchrTestStatic { - corpus: "\x00\x00", - needles: &[b'\x00'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "\x00a\x00", - needles: &[b'\x00'], - positions: &[0, 2], - }, - MemchrTestStatic { - corpus: "zzzzzzzzzzzzzzzza", - needles: &[b'a'], - positions: &[16], - }, - MemchrTestStatic { - corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", - needles: &[b'a'], - positions: &[32], - }, - // two needles (applied to memchr2 + memchr3) - MemchrTestStatic { - corpus: "az", - needles: &[b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "az", - needles: &[b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] }, - MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] }, - MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] }, - MemchrTestStatic { - corpus: "yyyyaz", - needles: &[b'a', b'z'], - positions: &[4, 5], - }, - MemchrTestStatic { - corpus: "yyyyaz", - needles: &[b'z', b'a'], - positions: &[4, 5], - }, - // three needles (applied to memchr3) - MemchrTestStatic { - corpus: "xyz", - needles: &[b'x', b'y', b'z'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b'x', b'y', b'z'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b'x', b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b't', b'a', b'z'], - positions: &[0], - }, - MemchrTestStatic { - corpus: "yxz", - needles: &[b't', b'a', b'z'], - positions: &[2], - }, -]; - -/// A description of a test on a memchr like function. -#[derive(Clone, Debug)] -pub struct MemchrTest { - /// The thing to search. We use `&str` instead of `&[u8]` because they - /// are nicer to write in tests, and we don't miss much since memchr - /// doesn't care about UTF-8. - /// - /// Corpora cannot contain either '%' or '#'. We use these bytes when - /// expanding test cases into many test cases, and we assume they are not - /// used. If they are used, `memchr_tests` will panic. - corpus: String, - /// The needles to search for. This is intended to be an "alternation" of - /// needles. The number of needles may cause this test to be skipped for - /// some memchr variants. For example, a test with 2 needles cannot be used - /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. - /// However, a test with only 1 needle can be used to test all of `memchr`, - /// `memchr2` and `memchr3`. We achieve this by filling in the needles with - /// bytes that we never used in the corpus (such as '#'). - needles: Vec<u8>, - /// The positions expected to match for all of the needles. - positions: Vec<usize>, -} - -/// Like MemchrTest, but easier to define as a constant. -#[derive(Clone, Debug)] -pub struct MemchrTestStatic { - corpus: &'static str, - needles: &'static [u8], - positions: &'static [usize], -} - -impl MemchrTest { - pub fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) { - let needles = match self.needles(1) { - None => return, - Some(needles) => needles, - }; - // We test different alignments here. Since some implementations use - // AVX2, which can read 32 bytes at a time, we test at least that. - // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128 - // (avx) bytes at a time, so we include that in our offsets as well. - // - // You might think this would cause most needles to not be found, but - // we actually expand our tests to include corpus sizes all the way up - // to >500 bytes, so we should exercise most branches. - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], corpus.as_bytes()), - "search for {:?} failed in: {:?} (len: {}, alignment: {})", - needles[0] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>( - &self, - reverse: bool, - f: F, - ) { - let needles = match self.needles(2) { - None => return, - Some(needles) => needles, - }; - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], needles[1], corpus.as_bytes()), - "search for {:?}|{:?} failed in: {:?} \ - (len: {}, alignment: {})", - needles[0] as char, - needles[1] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>( - &self, - reverse: bool, - f: F, - ) { - let needles = match self.needles(3) { - None => return, - Some(needles) => needles, - }; - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], needles[1], needles[2], corpus.as_bytes()), - "search for {:?}|{:?}|{:?} failed in: {:?} \ - (len: {}, alignment: {})", - needles[0] as char, - needles[1] as char, - needles[2] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, &'a [u8]) -> I, - I: Iterator<Item = usize>, - { - if let Some(ns) = self.needles(1) { - self.iter(reverse, f(ns[0], self.corpus.as_bytes())); - } - } - - pub fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, u8, &'a [u8]) -> I, - I: Iterator<Item = usize>, - { - if let Some(ns) = self.needles(2) { - self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes())); - } - } - - pub fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, u8, u8, &'a [u8]) -> I, - I: Iterator<Item = usize>, - { - if let Some(ns) = self.needles(3) { - self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes())); - } - } - - /// Test that the positions yielded by the given iterator match the - /// positions in this test. If reverse is true, then reverse the positions - /// before comparing them. - fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) { - assert_eq!( - self.positions(0, reverse), - it.collect::<Vec<usize>>(), - r"search for {:?} failed in: {:?}", - self.needles.iter().map(|&b| b as char).collect::<Vec<char>>(), - self.corpus - ); - } - - /// Expand this test into many variations of the same test. - /// - /// In particular, this will generate more tests with larger corpus sizes. - /// The expected positions are updated to maintain the integrity of the - /// test. - /// - /// This is important in testing a memchr implementation, because there are - /// often different cases depending on the length of the corpus. - /// - /// Note that we extend the corpus by adding `%` bytes, which we - /// don't otherwise use as a needle. - fn expand(&self) -> Vec<MemchrTest> { - let mut more = Vec::new(); - - // Add bytes to the start of the corpus. - for i in 1..515 { - let mut t = self.clone(); - let mut new_corpus: String = repeat('%').take(i).collect(); - new_corpus.push_str(&t.corpus); - t.corpus = new_corpus; - t.positions = t.positions.into_iter().map(|p| p + i).collect(); - more.push(t); - } - // Add bytes to the end of the corpus. - for i in 1..515 { - let mut t = self.clone(); - let padding: String = repeat('%').take(i).collect(); - t.corpus.push_str(&padding); - more.push(t); - } - - more - } - - /// Return the corpus at the given alignment. - /// - /// If the alignment exceeds the length of the corpus, then this returns - /// an empty slice. - fn corpus(&self, align: usize) -> &str { - self.corpus.get(align..).unwrap_or("") - } - - /// Return exactly `count` needles from this test. If this test has less - /// than `count` needles, then add `#` until the number of needles - /// matches `count`. If this test has more than `count` needles, then - /// return `None` (because there is no way to use this test data for a - /// search using fewer needles). - fn needles(&self, count: usize) -> Option<Vec<u8>> { - if self.needles.len() > count { - return None; - } - - let mut needles = self.needles.to_vec(); - for _ in needles.len()..count { - // we assume # is never used in tests. - needles.push(b'#'); - } - Some(needles) - } - - /// Return the positions in this test, reversed if `reverse` is true. - /// - /// If alignment is given, then all positions greater than or equal to that - /// alignment are offset by the alignment. Positions less than the - /// alignment are dropped. - fn positions(&self, align: usize, reverse: bool) -> Vec<usize> { - let positions = if reverse { - let mut positions = self.positions.to_vec(); - positions.reverse(); - positions - } else { - self.positions.to_vec() - }; - positions - .into_iter() - .filter(|&p| p >= align) - .map(|p| p - align) - .collect() - } -} diff --git a/vendor/memchr/src/tests/mod.rs b/vendor/memchr/src/tests/mod.rs index f4d406cd1..259b67827 100644 --- a/vendor/memchr/src/tests/mod.rs +++ b/vendor/memchr/src/tests/mod.rs @@ -1,15 +1,15 @@ -mod memchr; +#[macro_use] +pub(crate) mod memchr; +pub(crate) mod packedpair; +#[macro_use] +pub(crate) mod substring; // For debugging, particularly in CI, print out the byte order of the current // target. -#[cfg(all(feature = "std", target_endian = "little"))] #[test] fn byte_order() { - eprintln!("LITTLE ENDIAN"); -} - -#[cfg(all(feature = "std", target_endian = "big"))] -#[test] -fn byte_order() { - eprintln!("BIG ENDIAN"); + #[cfg(target_endian = "little")] + std::eprintln!("LITTLE ENDIAN"); + #[cfg(target_endian = "big")] + std::eprintln!("BIG ENDIAN"); } diff --git a/vendor/memchr/src/tests/packedpair.rs b/vendor/memchr/src/tests/packedpair.rs new file mode 100644 index 000000000..204635b83 --- /dev/null +++ b/vendor/memchr/src/tests/packedpair.rs @@ -0,0 +1,216 @@ +use alloc::{boxed::Box, vec, vec::Vec}; + +/// A set of "packed pair" test seeds. Each seed serves as the base for the +/// generation of many other tests. In essence, the seed captures the pair of +/// bytes we used for a predicate and first byte among our needle. The tests +/// generated from each seed essentially vary the length of the needle and +/// haystack, while using the rare/first byte configuration from the seed. +/// +/// The purpose of this is to test many different needle/haystack lengths. +/// In particular, some of the vector optimizations might only have bugs +/// in haystacks of a certain size. +const SEEDS: &[Seed] = &[ + // Why not use different 'first' bytes? It seemed like a good idea to be + // able to configure it, but when I wrote the test generator below, it + // didn't seem necessary to use for reasons that I forget. + Seed { first: b'x', index1: b'y', index2: b'z' }, + Seed { first: b'x', index1: b'x', index2: b'z' }, + Seed { first: b'x', index1: b'y', index2: b'x' }, + Seed { first: b'x', index1: b'x', index2: b'x' }, + Seed { first: b'x', index1: b'y', index2: b'y' }, +]; + +/// Runs a host of "packed pair" search tests. +/// +/// These tests specifically look for the occurrence of a possible substring +/// match based on a pair of bytes matching at the right offsets. +pub(crate) struct Runner { + fwd: Option< + Box< + dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, + >, + >, +} + +impl Runner { + /// Create a new test runner for "packed pair" substring search. + pub(crate) fn new() -> Runner { + Runner { fwd: None } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + /// + /// This runs tests on both the forward and reverse implementations given. + /// If either (or both) are missing, then tests for that implementation are + /// skipped. + pub(crate) fn run(self) { + if let Some(mut fwd) = self.fwd { + for seed in SEEDS.iter() { + for t in seed.generate() { + match fwd(&t.haystack, &t.needle, t.index1, t.index2) { + None => continue, + Some(result) => { + assert_eq!( + t.fwd, result, + "FORWARD, needle: {:?}, haystack: {:?}, \ + index1: {:?}, index2: {:?}", + t.needle, t.haystack, t.index1, t.index2, + ) + } + } + } + } + } + } + + /// Set the implementation for forward "packed pair" substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then forward "packed pair" search is not tested. + pub(crate) fn fwd( + mut self, + search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, + ) -> Runner { + self.fwd = Some(Box::new(search)); + self + } +} + +/// A test that represents the input and expected output to a "packed pair" +/// search function. The test should be able to run with any "packed pair" +/// implementation and get the expected output. +struct Test { + haystack: Vec<u8>, + needle: Vec<u8>, + index1: u8, + index2: u8, + fwd: Option<usize>, +} + +impl Test { + /// Create a new "packed pair" test from a seed and some given offsets to + /// the pair of bytes to use as a predicate in the seed's needle. + /// + /// If a valid test could not be constructed, then None is returned. + /// (Currently, we take the approach of massaging tests to be valid + /// instead of rejecting them outright.) + fn new( + seed: Seed, + index1: usize, + index2: usize, + haystack_len: usize, + needle_len: usize, + fwd: Option<usize>, + ) -> Option<Test> { + let mut index1: u8 = index1.try_into().unwrap(); + let mut index2: u8 = index2.try_into().unwrap(); + // The '#' byte is never used in a haystack (unless we're expecting + // a match), while the '@' byte is never used in a needle. + let mut haystack = vec![b'@'; haystack_len]; + let mut needle = vec![b'#'; needle_len]; + needle[0] = seed.first; + needle[index1 as usize] = seed.index1; + needle[index2 as usize] = seed.index2; + // If we're expecting a match, then make sure the needle occurs + // in the haystack at the expected position. + if let Some(i) = fwd { + haystack[i..i + needle.len()].copy_from_slice(&needle); + } + // If the operations above lead to rare offsets pointing to the + // non-first occurrence of a byte, then adjust it. This might lead + // to redundant tests, but it's simpler than trying to change the + // generation process I think. + if let Some(i) = crate::memchr(seed.index1, &needle) { + index1 = u8::try_from(i).unwrap(); + } + if let Some(i) = crate::memchr(seed.index2, &needle) { + index2 = u8::try_from(i).unwrap(); + } + Some(Test { haystack, needle, index1, index2, fwd }) + } +} + +/// Data that describes a single prefilter test seed. +#[derive(Clone, Copy)] +struct Seed { + first: u8, + index1: u8, + index2: u8, +} + +impl Seed { + const NEEDLE_LENGTH_LIMIT: usize = { + #[cfg(not(miri))] + { + 33 + } + #[cfg(miri)] + { + 5 + } + }; + + const HAYSTACK_LENGTH_LIMIT: usize = { + #[cfg(not(miri))] + { + 65 + } + #[cfg(miri)] + { + 8 + } + }; + + /// Generate a series of prefilter tests from this seed. + fn generate(self) -> impl Iterator<Item = Test> { + let len_start = 2; + // The iterator below generates *a lot* of tests. The number of + // tests was chosen somewhat empirically to be "bearable" when + // running the test suite. + // + // We use an iterator here because the collective haystacks of all + // these test cases add up to enough memory to OOM a conservative + // sandbox or a small laptop. + (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { + let index_start = len_start - 1; + (index_start..needle_len).flat_map(move |index1| { + (index1..needle_len).flat_map(move |index2| { + (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( + move |haystack_len| { + Test::new( + self, + index1, + index2, + haystack_len, + needle_len, + None, + ) + .into_iter() + .chain( + (0..=(haystack_len - needle_len)).flat_map( + move |output| { + Test::new( + self, + index1, + index2, + haystack_len, + needle_len, + Some(output), + ) + }, + ), + ) + }, + ) + }) + }) + }) + } +} diff --git a/vendor/memchr/src/tests/substring/mod.rs b/vendor/memchr/src/tests/substring/mod.rs new file mode 100644 index 000000000..dd10cbdd4 --- /dev/null +++ b/vendor/memchr/src/tests/substring/mod.rs @@ -0,0 +1,232 @@ +/*! +This module defines tests and test helpers for substring implementations. +*/ + +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, +}; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ + Seed::new("", "", Some(0), Some(0)), + Seed::new("", "a", Some(0), Some(1)), + Seed::new("", "ab", Some(0), Some(2)), + Seed::new("", "abc", Some(0), Some(3)), + Seed::new("a", "", None, None), + Seed::new("a", "a", Some(0), Some(0)), + Seed::new("a", "aa", Some(0), Some(1)), + Seed::new("a", "ba", Some(1), Some(1)), + Seed::new("a", "bba", Some(2), Some(2)), + Seed::new("a", "bbba", Some(3), Some(3)), + Seed::new("a", "bbbab", Some(3), Some(3)), + Seed::new("a", "bbbabb", Some(3), Some(3)), + Seed::new("a", "bbbabbb", Some(3), Some(3)), + Seed::new("a", "bbbbbb", None, None), + Seed::new("ab", "", None, None), + Seed::new("ab", "a", None, None), + Seed::new("ab", "b", None, None), + Seed::new("ab", "ab", Some(0), Some(0)), + Seed::new("ab", "aab", Some(1), Some(1)), + Seed::new("ab", "aaab", Some(2), Some(2)), + Seed::new("ab", "abaab", Some(0), Some(3)), + Seed::new("ab", "baaab", Some(3), Some(3)), + Seed::new("ab", "acb", None, None), + Seed::new("ab", "abba", Some(0), Some(0)), + Seed::new("abc", "ab", None, None), + Seed::new("abc", "abc", Some(0), Some(0)), + Seed::new("abc", "abcz", Some(0), Some(0)), + Seed::new("abc", "abczz", Some(0), Some(0)), + Seed::new("abc", "zabc", Some(1), Some(1)), + Seed::new("abc", "zzabc", Some(2), Some(2)), + Seed::new("abc", "azbc", None, None), + Seed::new("abc", "abzc", None, None), + Seed::new("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), + Seed::new("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), + Seed::new( + "xyz", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", + Some(32), + Some(32), + ), + Seed::new("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), + Seed::new("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { + fwd: Option< + Box<dyn FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static>, + >, + rev: Option< + Box<dyn FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static>, + >, +} + +impl Runner { + /// Create a new test runner for forward and reverse substring search + /// implementations. + pub(crate) fn new() -> Runner { + Runner { fwd: None, rev: None } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + /// + /// This runs tests on both the forward and reverse implementations given. + /// If either (or both) are missing, then tests for that implementation are + /// skipped. + pub(crate) fn run(self) { + if let Some(mut fwd) = self.fwd { + for seed in SEEDS.iter() { + for t in seed.generate() { + match fwd(t.haystack.as_bytes(), t.needle.as_bytes()) { + None => continue, + Some(result) => { + assert_eq!( + t.fwd, result, + "FORWARD, needle: {:?}, haystack: {:?}", + t.needle, t.haystack, + ); + } + } + } + } + } + if let Some(mut rev) = self.rev { + for seed in SEEDS.iter() { + for t in seed.generate() { + match rev(t.haystack.as_bytes(), t.needle.as_bytes()) { + None => continue, + Some(result) => { + assert_eq!( + t.rev, result, + "REVERSE, needle: {:?}, haystack: {:?}", + t.needle, t.haystack, + ); + } + } + } + } + } + } + + /// Set the implementation for forward substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then forward substring search is not tested. + pub(crate) fn fwd( + mut self, + search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, + ) -> Runner { + self.fwd = Some(Box::new(search)); + self + } + + /// Set the implementation for reverse substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then reverse substring search is not tested. + pub(crate) fn rev( + mut self, + search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, + ) -> Runner { + self.rev = Some(Box::new(search)); + self + } +} + +/// A single substring test for forward and reverse searches. +#[derive(Clone, Debug)] +struct Test { + needle: String, + haystack: String, + fwd: Option<usize>, + rev: Option<usize>, +} + +/// A single substring test for forward and reverse searches. +/// +/// Each seed is valid on its own, but it also serves as a starting point +/// to generate more tests. Namely, we pad out the haystacks with other +/// characters so that we get more complete coverage. This is especially useful +/// for testing vector algorithms that tend to have weird special cases for +/// alignment and loop unrolling. +/// +/// Padding works by assuming certain characters never otherwise appear in a +/// needle or a haystack. Neither should contain a `#` character. +#[derive(Clone, Copy, Debug)] +struct Seed { + needle: &'static str, + haystack: &'static str, + fwd: Option<usize>, + rev: Option<usize>, +} + +impl Seed { + const MAX_PAD: usize = 34; + + const fn new( + needle: &'static str, + haystack: &'static str, + fwd: Option<usize>, + rev: Option<usize>, + ) -> Seed { + Seed { needle, haystack, fwd, rev } + } + + fn generate(self) -> impl Iterator<Item = Test> { + assert!(!self.needle.contains('#'), "needle must not contain '#'"); + assert!(!self.haystack.contains('#'), "haystack must not contain '#'"); + (0..=Seed::MAX_PAD) + // Generate tests for padding at the beginning of haystack. + .map(move |pad| { + let needle = self.needle.to_string(); + let prefix = "#".repeat(pad); + let haystack = format!("{}{}", prefix, self.haystack); + let fwd = if needle.is_empty() { + Some(0) + } else { + self.fwd.map(|i| pad + i) + }; + let rev = if needle.is_empty() { + Some(haystack.len()) + } else { + self.rev.map(|i| pad + i) + }; + Test { needle, haystack, fwd, rev } + }) + // Generate tests for padding at the end of haystack. + .chain((1..=Seed::MAX_PAD).map(move |pad| { + let needle = self.needle.to_string(); + let suffix = "#".repeat(pad); + let haystack = format!("{}{}", self.haystack, suffix); + let fwd = if needle.is_empty() { Some(0) } else { self.fwd }; + let rev = if needle.is_empty() { + Some(haystack.len()) + } else { + self.rev + }; + Test { needle, haystack, fwd, rev } + })) + } +} diff --git a/vendor/memchr/src/tests/substring/naive.rs b/vendor/memchr/src/tests/substring/naive.rs new file mode 100644 index 000000000..1bc600984 --- /dev/null +++ b/vendor/memchr/src/tests/substring/naive.rs @@ -0,0 +1,45 @@ +/*! +This module defines "naive" implementations of substring search. + +These are sometimes useful to compare with "real" substring implementations. +The idea is that they are so simple that they are unlikely to be incorrect. +*/ + +/// Naively search forwards for the given needle in the given haystack. +pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { + let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); + for i in 0..end { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +/// Naively search in reverse for the given needle in the given haystack. +pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { + let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); + for i in (0..end).rev() { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +#[cfg(test)] +mod tests { + use crate::tests::substring; + + use super::*; + + #[test] + fn forward() { + substring::Runner::new().fwd(|h, n| Some(find(h, n))).run() + } + + #[test] + fn reverse() { + substring::Runner::new().rev(|h, n| Some(rfind(h, n))).run() + } +} diff --git a/vendor/memchr/src/tests/substring/prop.rs b/vendor/memchr/src/tests/substring/prop.rs new file mode 100644 index 000000000..a8352ec74 --- /dev/null +++ b/vendor/memchr/src/tests/substring/prop.rs @@ -0,0 +1,126 @@ +/*! +This module defines a few quickcheck properties for substring search. + +It also provides a forward and reverse macro for conveniently defining +quickcheck tests that run these properties over any substring search +implementation. +*/ + +use crate::tests::substring::naive; + +/// $fwd is a `impl FnMut(haystack, needle) -> Option<Option<usize>>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_forward_quickcheck { + ($fwd:expr) => { + #[cfg(not(miri))] + quickcheck::quickcheck! { + fn qc_fwd_prefix_is_substring(bs: alloc::vec::Vec<u8>) -> bool { + crate::tests::substring::prop::prefix_is_substring(&bs, $fwd) + } + + fn qc_fwd_suffix_is_substring(bs: alloc::vec::Vec<u8>) -> bool { + crate::tests::substring::prop::suffix_is_substring(&bs, $fwd) + } + + fn qc_fwd_matches_naive( + haystack: alloc::vec::Vec<u8>, + needle: alloc::vec::Vec<u8> + ) -> bool { + crate::tests::substring::prop::same_as_naive( + false, + &haystack, + &needle, + $fwd, + ) + } + } + }; +} + +/// $rev is a `impl FnMut(haystack, needle) -> Option<Option<usize>>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_reverse_quickcheck { + ($rev:expr) => { + #[cfg(not(miri))] + quickcheck::quickcheck! { + fn qc_rev_prefix_is_substring(bs: alloc::vec::Vec<u8>) -> bool { + crate::tests::substring::prop::prefix_is_substring(&bs, $rev) + } + + fn qc_rev_suffix_is_substring(bs: alloc::vec::Vec<u8>) -> bool { + crate::tests::substring::prop::suffix_is_substring(&bs, $rev) + } + + fn qc_rev_matches_naive( + haystack: alloc::vec::Vec<u8>, + needle: alloc::vec::Vec<u8> + ) -> bool { + crate::tests::substring::prop::same_as_naive( + true, + &haystack, + &needle, + $rev, + ) + } + } + }; +} + +/// Check that every prefix of the given byte string is a substring. +pub(crate) fn prefix_is_substring( + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>, +) -> bool { + for i in 0..bs.len().saturating_sub(1) { + let prefix = &bs[..i]; + let result = match search(bs, prefix) { + None => continue, + Some(result) => result, + }; + if !result.is_some() { + return false; + } + } + true +} + +/// Check that every suffix of the given byte string is a substring. +pub(crate) fn suffix_is_substring( + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>, +) -> bool { + for i in 0..bs.len().saturating_sub(1) { + let suffix = &bs[i..]; + let result = match search(bs, suffix) { + None => continue, + Some(result) => result, + }; + if !result.is_some() { + return false; + } + } + true +} + +/// Check that naive substring search matches the result of the given search +/// algorithm. +pub(crate) fn same_as_naive( + reverse: bool, + haystack: &[u8], + needle: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>, +) -> bool { + let result = match search(haystack, needle) { + None => return true, + Some(result) => result, + }; + if reverse { + result == naive::rfind(haystack, needle) + } else { + result == naive::find(haystack, needle) + } +} |