diff options
Diffstat (limited to 'vendor/memchr/src/memmem/mod.rs')
-rw-r--r-- | vendor/memchr/src/memmem/mod.rs | 830 |
1 files changed, 123 insertions, 707 deletions
diff --git a/vendor/memchr/src/memmem/mod.rs b/vendor/memchr/src/memmem/mod.rs index e1cd1aec7..1a2a7e10c 100644 --- a/vendor/memchr/src/memmem/mod.rs +++ b/vendor/memchr/src/memmem/mod.rs @@ -66,99 +66,25 @@ assert_eq!(None, finder.find(b"quux baz bar")); ``` */ -pub use self::prefilter::Prefilter; +pub use crate::memmem::searcher::PrefilterConfig as Prefilter; + +// This is exported here for use in the crate::arch::all::twoway +// implementation. This is essentially an abstraction breaker. Namely, the +// public API of twoway doesn't support providing a prefilter, but its crate +// internal API does. The main reason for this is that I didn't want to do the +// API design required to support it without a concrete use case. +pub(crate) use crate::memmem::searcher::Pre; use crate::{ - cow::CowBytes, - memmem::{ - prefilter::{Pre, PrefilterFn, PrefilterState}, - rabinkarp::NeedleHash, - rarebytes::RareNeedleBytes, + arch::all::{ + packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, + rabinkarp, }, + cow::CowBytes, + memmem::searcher::{PrefilterState, Searcher, SearcherRev}, }; -/// Defines a suite of quickcheck properties for forward and reverse -/// substring searching. -/// -/// This is defined in this specific spot so that it can be used freely among -/// the different substring search implementations. I couldn't be bothered to -/// fight with the macro-visibility rules enough to figure out how to stuff it -/// somewhere more convenient. -#[cfg(all(test, feature = "std"))] -macro_rules! define_memmem_quickcheck_tests { - ($fwd:expr, $rev:expr) => { - use crate::memmem::proptests; - - quickcheck::quickcheck! { - fn qc_fwd_prefix_is_substring(bs: Vec<u8>) -> bool { - proptests::prefix_is_substring(false, &bs, $fwd) - } - - fn qc_fwd_suffix_is_substring(bs: Vec<u8>) -> bool { - proptests::suffix_is_substring(false, &bs, $fwd) - } - - fn qc_fwd_matches_naive( - haystack: Vec<u8>, - needle: Vec<u8> - ) -> bool { - proptests::matches_naive(false, &haystack, &needle, $fwd) - } - - fn qc_rev_prefix_is_substring(bs: Vec<u8>) -> bool { - proptests::prefix_is_substring(true, &bs, $rev) - } - - fn qc_rev_suffix_is_substring(bs: Vec<u8>) -> bool { - proptests::suffix_is_substring(true, &bs, $rev) - } - - fn qc_rev_matches_naive( - haystack: Vec<u8>, - needle: Vec<u8> - ) -> bool { - proptests::matches_naive(true, &haystack, &needle, $rev) - } - } - }; -} - -/// Defines a suite of "simple" hand-written tests for a substring -/// implementation. -/// -/// This is defined here for the same reason that -/// define_memmem_quickcheck_tests is defined here. -#[cfg(test)] -macro_rules! define_memmem_simple_tests { - ($fwd:expr, $rev:expr) => { - use crate::memmem::testsimples; - - #[test] - fn simple_forward() { - testsimples::run_search_tests_fwd($fwd); - } - - #[test] - fn simple_reverse() { - testsimples::run_search_tests_rev($rev); - } - }; -} - -mod byte_frequencies; -#[cfg(memchr_runtime_simd)] -mod genericsimd; -mod prefilter; -mod rabinkarp; -mod rarebytes; -mod twoway; -mod util; -#[cfg(memchr_runtime_simd)] -mod vector; -#[cfg(all(memchr_runtime_wasm128))] -mod wasm; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; +mod searcher; /// Returns an iterator over all non-overlapping occurrences of a substring in /// a haystack. @@ -258,7 +184,7 @@ pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( #[inline] pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { if haystack.len() < 64 { - rabinkarp::find(haystack, needle) + rabinkarp::Finder::new(needle).find(haystack, needle) } else { Finder::new(needle).find(haystack) } @@ -295,7 +221,7 @@ pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { #[inline] pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { if haystack.len() < 64 { - rabinkarp::rfind(haystack, needle) + rabinkarp::FinderRev::new(needle).rfind(haystack, needle) } else { FinderRev::new(needle).rfind(haystack) } @@ -321,7 +247,7 @@ impl<'h, 'n> FindIter<'h, 'n> { haystack: &'h [u8], finder: Finder<'n>, ) -> FindIter<'h, 'n> { - let prestate = finder.searcher.prefilter_state(); + let prestate = PrefilterState::new(); FindIter { haystack, prestate, finder, pos: 0 } } @@ -331,8 +257,8 @@ impl<'h, 'n> FindIter<'h, 'n> { /// If this is already an owned iterator, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FindIter<'h, 'static> { FindIter { @@ -348,20 +274,32 @@ impl<'h, 'n> Iterator for FindIter<'h, 'n> { type Item = usize; fn next(&mut self) -> Option<usize> { - if self.pos > self.haystack.len() { - return None; - } - let result = self - .finder - .searcher - .find(&mut self.prestate, &self.haystack[self.pos..]); - match result { - None => None, - Some(i) => { - let pos = self.pos + i; - self.pos = pos + core::cmp::max(1, self.finder.needle().len()); - Some(pos) - } + let needle = self.finder.needle(); + let haystack = self.haystack.get(self.pos..)?; + let idx = + self.finder.searcher.find(&mut self.prestate, haystack, needle)?; + + let pos = self.pos + idx; + self.pos = pos + needle.len().max(1); + + Some(pos) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + // The largest possible number of non-overlapping matches is the + // quotient of the haystack and the needle (or the length of the + // haystack, if the needle is empty) + match self.haystack.len().checked_sub(self.pos) { + None => (0, Some(0)), + Some(haystack_len) => match self.finder.needle().len() { + // Empty needles always succeed and match at every point + // (including the very end) + 0 => ( + haystack_len.saturating_add(1), + haystack_len.checked_add(1), + ), + needle_len => (0, Some(haystack_len / needle_len)), + }, } } } @@ -398,7 +336,7 @@ impl<'h, 'n> FindRevIter<'h, 'n> { /// this copies the needle. /// /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FindRevIter<'h, 'static> { FindRevIter { @@ -447,7 +385,8 @@ impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct Finder<'n> { - searcher: Searcher<'n>, + needle: CowBytes<'n>, + searcher: Searcher, } impl<'n> Finder<'n> { @@ -481,8 +420,11 @@ impl<'n> Finder<'n> { /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); /// assert_eq!(None, Finder::new("quux").find(haystack)); /// ``` + #[inline] pub fn find(&self, haystack: &[u8]) -> Option<usize> { - self.searcher.find(&mut self.searcher.prefilter_state(), haystack) + let mut prestate = PrefilterState::new(); + let needle = self.needle.as_slice(); + self.searcher.find(&mut prestate, haystack, needle) } /// Returns an iterator over all occurrences of a substring in a haystack. @@ -525,11 +467,14 @@ impl<'n> Finder<'n> { /// If this is already an owned finder, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> Finder<'static> { - Finder { searcher: self.searcher.into_owned() } + Finder { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } } /// Convert this finder into its borrowed variant. @@ -544,7 +489,10 @@ impl<'n> Finder<'n> { /// shorter of the two. #[inline] pub fn as_ref(&self) -> Finder<'_> { - Finder { searcher: self.searcher.as_ref() } + Finder { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } } /// Returns the needle that this finder searches for. @@ -555,7 +503,7 @@ impl<'n> Finder<'n> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle.as_slice() } } @@ -574,7 +522,8 @@ impl<'n> Finder<'n> { /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct FinderRev<'n> { - searcher: SearcherRev<'n>, + needle: CowBytes<'n>, + searcher: SearcherRev, } impl<'n> FinderRev<'n> { @@ -612,7 +561,7 @@ impl<'n> FinderRev<'n> { /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); /// ``` pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { - self.searcher.rfind(haystack.as_ref()) + self.searcher.rfind(haystack.as_ref(), self.needle.as_slice()) } /// Returns a reverse iterator over all occurrences of a substring in a @@ -657,10 +606,13 @@ impl<'n> FinderRev<'n> { /// this copies the needle. /// /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FinderRev<'static> { - FinderRev { searcher: self.searcher.into_owned() } + FinderRev { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } } /// Convert this finder into its borrowed variant. @@ -675,7 +627,10 @@ impl<'n> FinderRev<'n> { /// shorter of the two. #[inline] pub fn as_ref(&self) -> FinderRev<'_> { - FinderRev { searcher: self.searcher.as_ref() } + FinderRev { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } } /// Returns the needle that this finder searches for. @@ -686,7 +641,7 @@ impl<'n> FinderRev<'n> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle.as_slice() } } @@ -697,7 +652,7 @@ impl<'n> FinderRev<'n> { /// heuristic prefilters used to speed up certain searches. #[derive(Clone, Debug, Default)] pub struct FinderBuilder { - config: SearcherConfig, + prefilter: Prefilter, } impl FinderBuilder { @@ -712,7 +667,26 @@ impl FinderBuilder { &self, needle: &'n B, ) -> Finder<'n> { - Finder { searcher: Searcher::new(self.config, needle.as_ref()) } + self.build_forward_with_ranker(DefaultFrequencyRank, needle) + } + + /// Build a forward finder using the given needle and a custom heuristic for + /// determining the frequency of a given byte in the dataset. + /// See [`HeuristicFrequencyRank`] for more details. + pub fn build_forward_with_ranker< + 'n, + R: HeuristicFrequencyRank, + B: ?Sized + AsRef<[u8]>, + >( + &self, + ranker: R, + needle: &'n B, + ) -> Finder<'n> { + let needle = needle.as_ref(); + Finder { + needle: CowBytes::new(needle), + searcher: Searcher::new(self.prefilter, ranker, needle), + } } /// Build a reverse finder using the given needle from the current @@ -721,7 +695,11 @@ impl FinderBuilder { &self, needle: &'n B, ) -> FinderRev<'n> { - FinderRev { searcher: SearcherRev::new(needle.as_ref()) } + let needle = needle.as_ref(); + FinderRev { + needle: CowBytes::new(needle), + searcher: SearcherRev::new(needle), + } } /// Configure the prefilter setting for the finder. @@ -729,593 +707,31 @@ impl FinderBuilder { /// See the documentation for [`Prefilter`] for more discussion on why /// you might want to configure this. pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { - self.config.prefilter = prefilter; + self.prefilter = prefilter; self } } -/// The internal implementation of a forward substring searcher. -/// -/// The reality is that this is a "meta" searcher. Namely, depending on a -/// variety of parameters (CPU support, target, needle size, haystack size and -/// even dynamic properties such as prefilter effectiveness), the actual -/// algorithm employed to do substring search may change. -#[derive(Clone, Debug)] -struct Searcher<'n> { - /// The actual needle we're searching for. - /// - /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is - /// specialized to a single variant (the borrowed form). - needle: CowBytes<'n>, - /// A collection of facts computed on the needle that are useful for more - /// than one substring search algorithm. - ninfo: NeedleInfo, - /// A prefilter function, if it was deemed appropriate. - /// - /// Some substring search implementations (like Two-Way) benefit greatly - /// if we can quickly find candidate starting positions for a match. - prefn: Option<PrefilterFn>, - /// The actual substring implementation in use. - kind: SearcherKind, -} - -/// A collection of facts computed about a search needle. -/// -/// We group these things together because it's useful to be able to hand them -/// to prefilters or substring algorithms that want them. -#[derive(Clone, Copy, Debug)] -pub(crate) struct NeedleInfo { - /// The offsets of "rare" bytes detected in the needle. - /// - /// This is meant to be a heuristic in order to maximize the effectiveness - /// of vectorized code. Namely, vectorized code tends to focus on only - /// one or two bytes. If we pick bytes from the needle that occur - /// infrequently, then more time will be spent in the vectorized code and - /// will likely make the overall search (much) faster. - /// - /// Of course, this is only a heuristic based on a background frequency - /// distribution of bytes. But it tends to work very well in practice. - pub(crate) rarebytes: RareNeedleBytes, - /// A Rabin-Karp hash of the needle. - /// - /// This is store here instead of in a more specific Rabin-Karp search - /// since Rabin-Karp may be used even if another SearchKind corresponds - /// to some other search implementation. e.g., If measurements suggest RK - /// is faster in some cases or if a search implementation can't handle - /// particularly small haystack. (Moreover, we cannot use RK *generally*, - /// since its worst case time is multiplicative. Instead, we only use it - /// some small haystacks, where "small" is a constant.) - pub(crate) nhash: NeedleHash, -} - -/// Configuration for substring search. -#[derive(Clone, Copy, Debug, Default)] -struct SearcherConfig { - /// This permits changing the behavior of the prefilter, since it can have - /// a variable impact on performance. - prefilter: Prefilter, -} - -#[derive(Clone, Debug)] -enum SearcherKind { - /// A special case for empty needles. An empty needle always matches, even - /// in an empty haystack. - Empty, - /// This is used whenever the needle is a single byte. In this case, we - /// always use memchr. - OneByte(u8), - /// Two-Way is the generic work horse and is what provides our additive - /// linear time guarantee. In general, it's used when the needle is bigger - /// than 8 bytes or so. - TwoWay(twoway::Forward), - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - GenericSIMD128(x86::sse::Forward), - #[cfg(memchr_runtime_wasm128)] - GenericSIMD128(wasm::Forward), - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - GenericSIMD256(x86::avx::Forward), -} - -impl<'n> Searcher<'n> { - fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { - use self::SearcherKind::*; - - let ninfo = NeedleInfo::new(needle); - let mk = |kind: SearcherKind| { - let prefn = prefilter::forward( - &config.prefilter, - &ninfo.rarebytes, - needle, - ); - Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } - }; - if needle.len() == 0 { - return mk(Empty); - } - if needle.len() == 1 { - return mk(OneByte(needle[0])); - } - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - { - if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) { - return mk(GenericSIMD256(fwd)); - } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) { - return mk(GenericSIMD128(fwd)); - } - } - #[cfg(all(target_arch = "wasm32", memchr_runtime_simd))] - { - if let Some(fwd) = wasm::Forward::new(&ninfo, needle) { - return mk(GenericSIMD128(fwd)); - } - } - - mk(TwoWay(twoway::Forward::new(needle))) - } - - /// Return a fresh prefilter state that can be used with this searcher. - /// A prefilter state is used to track the effectiveness of a searcher's - /// prefilter for speeding up searches. Therefore, the prefilter state - /// should generally be reused on subsequent searches (such as in an - /// iterator). For searches on a different haystack, then a new prefilter - /// state should be used. - /// - /// This always initializes a valid (but possibly inert) prefilter state - /// even if this searcher does not have a prefilter enabled. - fn prefilter_state(&self) -> PrefilterState { - if self.prefn.is_none() { - PrefilterState::inert() - } else { - PrefilterState::new() - } - } - - fn needle(&self) -> &[u8] { - self.needle.as_slice() - } - - fn as_ref(&self) -> Searcher<'_> { - use self::SearcherKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(gs) => GenericSIMD128(gs), - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(gs) => GenericSIMD256(gs), - }; - Searcher { - needle: CowBytes::new(self.needle()), - ninfo: self.ninfo, - prefn: self.prefn, - kind, - } - } - - #[cfg(feature = "std")] - fn into_owned(self) -> Searcher<'static> { - use self::SearcherKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(gs) => GenericSIMD128(gs), - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(gs) => GenericSIMD256(gs), - }; - Searcher { - needle: self.needle.into_owned(), - ninfo: self.ninfo, - prefn: self.prefn, - kind, - } - } - - /// Implements forward substring search by selecting the implementation - /// chosen at construction and executing it on the given haystack with the - /// prefilter's current state of effectiveness. - #[inline(always)] - fn find( - &self, - state: &mut PrefilterState, - haystack: &[u8], - ) -> Option<usize> { - use self::SearcherKind::*; - - let needle = self.needle(); - if haystack.len() < needle.len() { - return None; - } - match self.kind { - Empty => Some(0), - OneByte(b) => crate::memchr(b, haystack), - TwoWay(ref tw) => { - // For very short haystacks (e.g., where the prefilter probably - // can't run), it's faster to just run RK. - if rabinkarp::is_fast(haystack, needle) { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - self.find_tw(tw, state, haystack, needle) - } - } - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(ref gs) => { - // The SIMD matcher can't handle particularly short haystacks, - // so we fall back to RK in these cases. - if haystack.len() < gs.min_haystack_len() { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - gs.find(haystack, needle) - } - } - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(ref gs) => { - // The SIMD matcher can't handle particularly short haystacks, - // so we fall back to RK in these cases. - if haystack.len() < gs.min_haystack_len() { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - gs.find(haystack, needle) - } - } - } - } - - /// Calls Two-Way on the given haystack/needle. - /// - /// This is marked as unlineable since it seems to have a better overall - /// effect on benchmarks. However, this is one of those cases where - /// inlining it results an improvement in other benchmarks too, so I - /// suspect we just don't have enough data yet to make the right call here. - /// - /// I suspect the main problem is that this function contains two different - /// inlined copies of Two-Way: one with and one without prefilters enabled. - #[inline(never)] - fn find_tw( - &self, - tw: &twoway::Forward, - state: &mut PrefilterState, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - if let Some(prefn) = self.prefn { - // We used to look at the length of a haystack here. That is, if - // it was too small, then don't bother with the prefilter. But two - // things changed: the prefilter falls back to memchr for small - // haystacks, and, above, Rabin-Karp is employed for tiny haystacks - // anyway. - if state.is_effective() { - let mut pre = Pre { state, prefn, ninfo: &self.ninfo }; - return tw.find(Some(&mut pre), haystack, needle); - } - } - tw.find(None, haystack, needle) - } -} - -impl NeedleInfo { - pub(crate) fn new(needle: &[u8]) -> NeedleInfo { - NeedleInfo { - rarebytes: RareNeedleBytes::forward(needle), - nhash: NeedleHash::forward(needle), - } - } -} - -/// The internal implementation of a reverse substring searcher. -/// -/// See the forward searcher docs for more details. Currently, the reverse -/// searcher is considerably simpler since it lacks prefilter support. This -/// was done because it adds a lot of code, and more surface area to test. And -/// in particular, it's not clear whether a prefilter on reverse searching is -/// worth it. (If you have a compelling use case, please file an issue!) -#[derive(Clone, Debug)] -struct SearcherRev<'n> { - /// The actual needle we're searching for. - needle: CowBytes<'n>, - /// A Rabin-Karp hash of the needle. - nhash: NeedleHash, - /// The actual substring implementation in use. - kind: SearcherRevKind, -} - -#[derive(Clone, Debug)] -enum SearcherRevKind { - /// A special case for empty needles. An empty needle always matches, even - /// in an empty haystack. - Empty, - /// This is used whenever the needle is a single byte. In this case, we - /// always use memchr. - OneByte(u8), - /// Two-Way is the generic work horse and is what provides our additive - /// linear time guarantee. In general, it's used when the needle is bigger - /// than 8 bytes or so. - TwoWay(twoway::Reverse), -} - -impl<'n> SearcherRev<'n> { - fn new(needle: &'n [u8]) -> SearcherRev<'n> { - use self::SearcherRevKind::*; - - let kind = if needle.len() == 0 { - Empty - } else if needle.len() == 1 { - OneByte(needle[0]) - } else { - TwoWay(twoway::Reverse::new(needle)) - }; - SearcherRev { - needle: CowBytes::new(needle), - nhash: NeedleHash::reverse(needle), - kind, - } - } - - fn needle(&self) -> &[u8] { - self.needle.as_slice() - } - - fn as_ref(&self) -> SearcherRev<'_> { - use self::SearcherRevKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - }; - SearcherRev { - needle: CowBytes::new(self.needle()), - nhash: self.nhash, - kind, - } - } - - #[cfg(feature = "std")] - fn into_owned(self) -> SearcherRev<'static> { - use self::SearcherRevKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - }; - SearcherRev { - needle: self.needle.into_owned(), - nhash: self.nhash, - kind, - } - } - - /// Implements reverse substring search by selecting the implementation - /// chosen at construction and executing it on the given haystack with the - /// prefilter's current state of effectiveness. - #[inline(always)] - fn rfind(&self, haystack: &[u8]) -> Option<usize> { - use self::SearcherRevKind::*; - - let needle = self.needle(); - if haystack.len() < needle.len() { - return None; - } - match self.kind { - Empty => Some(haystack.len()), - OneByte(b) => crate::memrchr(b, haystack), - TwoWay(ref tw) => { - // For very short haystacks (e.g., where the prefilter probably - // can't run), it's faster to just run RK. - if rabinkarp::is_fast(haystack, needle) { - rabinkarp::rfind_with(&self.nhash, haystack, needle) - } else { - tw.rfind(haystack, needle) - } - } - } - } -} - -/// This module defines some generic quickcheck properties useful for testing -/// any substring search algorithm. It also runs those properties for the -/// top-level public API memmem routines. (The properties are also used to -/// test various substring search implementations more granularly elsewhere as -/// well.) -#[cfg(all(test, feature = "std", not(miri)))] -mod proptests { - // N.B. This defines the quickcheck tests using the properties defined - // below. Because of macro-visibility weirdness, the actual macro is - // defined at the top of this file. - define_memmem_quickcheck_tests!(super::find, super::rfind); - - /// Check that every prefix of the given byte string is a substring. - pub(crate) fn prefix_is_substring( - reverse: bool, - bs: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) -> bool { - if bs.is_empty() { - return true; - } - for i in 0..(bs.len() - 1) { - let prefix = &bs[..i]; - if reverse { - assert_eq!(naive_rfind(bs, prefix), search(bs, prefix)); - } else { - assert_eq!(naive_find(bs, prefix), search(bs, prefix)); - } - } - true - } - - /// Check that every suffix of the given byte string is a substring. - pub(crate) fn suffix_is_substring( - reverse: bool, - bs: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) -> bool { - if bs.is_empty() { - return true; - } - for i in 0..(bs.len() - 1) { - let suffix = &bs[i..]; - if reverse { - assert_eq!(naive_rfind(bs, suffix), search(bs, suffix)); - } else { - assert_eq!(naive_find(bs, suffix), search(bs, suffix)); - } - } - true - } - - /// Check that naive substring search matches the result of the given search - /// algorithm. - pub(crate) fn matches_naive( - reverse: bool, - haystack: &[u8], - needle: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) -> bool { - if reverse { - naive_rfind(haystack, needle) == search(haystack, needle) - } else { - naive_find(haystack, needle) == search(haystack, needle) - } - } - - /// Naively search forwards for the given needle in the given haystack. - fn naive_find(haystack: &[u8], needle: &[u8]) -> Option<usize> { - if needle.is_empty() { - return Some(0); - } else if haystack.len() < needle.len() { - return None; - } - for i in 0..(haystack.len() - needle.len() + 1) { - if needle == &haystack[i..i + needle.len()] { - return Some(i); - } - } - None - } - - /// Naively search in reverse for the given needle in the given haystack. - fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { - if needle.is_empty() { - return Some(haystack.len()); - } else if haystack.len() < needle.len() { - return None; - } - for i in (0..(haystack.len() - needle.len() + 1)).rev() { - if needle == &haystack[i..i + needle.len()] { - return Some(i); - } - } - None - } -} - -/// This module defines some hand-written "simple" substring tests. It -/// also provides routines for easily running them on any substring search -/// implementation. #[cfg(test)] -mod testsimples { - define_memmem_simple_tests!(super::find, super::rfind); - - /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple. - type SearchTest = - (&'static str, &'static str, Option<usize>, Option<usize>); - - const SEARCH_TESTS: &'static [SearchTest] = &[ - ("", "", Some(0), Some(0)), - ("", "a", Some(0), Some(1)), - ("", "ab", Some(0), Some(2)), - ("", "abc", Some(0), Some(3)), - ("a", "", None, None), - ("a", "a", Some(0), Some(0)), - ("a", "aa", Some(0), Some(1)), - ("a", "ba", Some(1), Some(1)), - ("a", "bba", Some(2), Some(2)), - ("a", "bbba", Some(3), Some(3)), - ("a", "bbbab", Some(3), Some(3)), - ("a", "bbbabb", Some(3), Some(3)), - ("a", "bbbabbb", Some(3), Some(3)), - ("a", "bbbbbb", None, None), - ("ab", "", None, None), - ("ab", "a", None, None), - ("ab", "b", None, None), - ("ab", "ab", Some(0), Some(0)), - ("ab", "aab", Some(1), Some(1)), - ("ab", "aaab", Some(2), Some(2)), - ("ab", "abaab", Some(0), Some(3)), - ("ab", "baaab", Some(3), Some(3)), - ("ab", "acb", None, None), - ("ab", "abba", Some(0), Some(0)), - ("abc", "ab", None, None), - ("abc", "abc", Some(0), Some(0)), - ("abc", "abcz", Some(0), Some(0)), - ("abc", "abczz", Some(0), Some(0)), - ("abc", "zabc", Some(1), Some(1)), - ("abc", "zzabc", Some(2), Some(2)), - ("abc", "azbc", None, None), - ("abc", "abzc", None, None), - ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), - ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), - ("xyz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", Some(32), Some(32)), - // Failures caught by quickcheck. - ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), - ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), - ]; - - /// Run the substring search tests. `search` should be a closure that - /// accepts a haystack and a needle and returns the starting position - /// of the first occurrence of needle in the haystack, or `None` if one - /// doesn't exist. - pub(crate) fn run_search_tests_fwd( - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) { - for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS { - let (n, h) = (needle.as_bytes(), haystack.as_bytes()); - assert_eq!( - expected_fwd, - search(h, n), - "needle: {:?}, haystack: {:?}, expected: {:?}", - n, - h, - expected_fwd - ); - } - } - - /// Run the substring search tests. `search` should be a closure that - /// accepts a haystack and a needle and returns the starting position of - /// the last occurrence of needle in the haystack, or `None` if one doesn't - /// exist. - pub(crate) fn run_search_tests_rev( - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) { - for &(needle, haystack, _, expected_rev) in SEARCH_TESTS { - let (n, h) = (needle.as_bytes(), haystack.as_bytes()); - assert_eq!( - expected_rev, - search(h, n), - "needle: {:?}, haystack: {:?}, expected: {:?}", - n, - h, - expected_rev - ); - } +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h))); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h))) + .run(); } } |