use crate::arch::all::{ packedpair::{HeuristicFrequencyRank, Pair}, rabinkarp, twoway, }; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::neon::packedpair as neon; #[cfg(target_arch = "wasm32")] use crate::arch::wasm32::simd128::packedpair as simd128; #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] use crate::arch::x86_64::{ avx2::packedpair as avx2, sse2::packedpair as sse2, }; /// A "meta" substring searcher. /// /// To a first approximation, this chooses what it believes to be the "best" /// substring search implemnetation based on the needle at construction time. /// Then, every call to `find` will execute that particular implementation. To /// a second approximation, multiple substring search algorithms may be used, /// depending on the haystack. For example, for supremely short haystacks, /// Rabin-Karp is typically used. /// /// See the documentation on `Prefilter` for an explanation of the dispatching /// mechanism. The quick summary is that an enum has too much overhead and /// we can't use dynamic dispatch via traits because we need to work in a /// core-only environment. (Dynamic dispatch works in core-only, but you /// need `&dyn Trait` and we really need a `Box` here. The latter /// requires `alloc`.) So instead, we use a union and an appropriately paired /// free function to read from the correct field on the union and execute the /// chosen substring search implementation. #[derive(Clone)] pub(crate) struct Searcher { call: SearcherKindFn, kind: SearcherKind, rabinkarp: rabinkarp::Finder, } impl Searcher { /// Creates a new "meta" substring searcher that attempts to choose the /// best algorithm based on the needle, heuristics and what the current /// target supports. #[inline] pub(crate) fn new( prefilter: PrefilterConfig, ranker: R, needle: &[u8], ) -> Searcher { let rabinkarp = rabinkarp::Finder::new(needle); if needle.len() <= 1 { return if needle.is_empty() { trace!("building empty substring searcher"); Searcher { call: searcher_kind_empty, kind: SearcherKind { empty: () }, rabinkarp, } } else { trace!("building one-byte substring searcher"); debug_assert_eq!(1, needle.len()); Searcher { call: searcher_kind_one_byte, kind: SearcherKind { one_byte: needle[0] }, rabinkarp, } }; } let pair = match Pair::with_ranker(needle, &ranker) { Some(pair) => pair, None => return Searcher::twoway(needle, rabinkarp, None), }; debug_assert_ne!( pair.index1(), pair.index2(), "pair offsets should not be equivalent" ); #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] { if let Some(pp) = avx2::Finder::with_pair(needle, pair) { if do_packed_search(needle) { trace!("building x86_64 AVX2 substring searcher"); let kind = SearcherKind { avx2: pp }; Searcher { call: searcher_kind_avx2, kind, rabinkarp } } else if prefilter.is_none() { Searcher::twoway(needle, rabinkarp, None) } else { let prestrat = Prefilter::avx2(pp, needle); Searcher::twoway(needle, rabinkarp, Some(prestrat)) } } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) { if do_packed_search(needle) { trace!("building x86_64 SSE2 substring searcher"); let kind = SearcherKind { sse2: pp }; Searcher { call: searcher_kind_sse2, kind, rabinkarp } } else if prefilter.is_none() { Searcher::twoway(needle, rabinkarp, None) } else { let prestrat = Prefilter::sse2(pp, needle); Searcher::twoway(needle, rabinkarp, Some(prestrat)) } } else if prefilter.is_none() { Searcher::twoway(needle, rabinkarp, None) } else { // We're pretty unlikely to get to this point, but it is // possible to be running on x86_64 without SSE2. Namely, it's // really up to the OS whether it wants to support vector // registers or not. let prestrat = Prefilter::fallback(ranker, pair, needle); Searcher::twoway(needle, rabinkarp, prestrat) } } #[cfg(target_arch = "wasm32")] { if let Some(pp) = simd128::Finder::with_pair(needle, pair) { if do_packed_search(needle) { trace!("building wasm32 simd128 substring searcher"); let kind = SearcherKind { simd128: pp }; Searcher { call: searcher_kind_simd128, kind, rabinkarp } } else if prefilter.is_none() { Searcher::twoway(needle, rabinkarp, None) } else { let prestrat = Prefilter::simd128(pp, needle); Searcher::twoway(needle, rabinkarp, Some(prestrat)) } } else if prefilter.is_none() { Searcher::twoway(needle, rabinkarp, None) } else { let prestrat = Prefilter::fallback(ranker, pair, needle); Searcher::twoway(needle, rabinkarp, prestrat) } } #[cfg(target_arch = "aarch64")] { if let Some(pp) = neon::Finder::with_pair(needle, pair) { if do_packed_search(needle) { trace!("building aarch64 neon substring searcher"); let kind = SearcherKind { neon: pp }; Searcher { call: searcher_kind_neon, kind, rabinkarp } } else if prefilter.is_none() { Searcher::twoway(needle, rabinkarp, None) } else { let prestrat = Prefilter::neon(pp, needle); Searcher::twoway(needle, rabinkarp, Some(prestrat)) } } else if prefilter.is_none() { Searcher::twoway(needle, rabinkarp, None) } else { let prestrat = Prefilter::fallback(ranker, pair, needle); Searcher::twoway(needle, rabinkarp, prestrat) } } #[cfg(not(any( all(target_arch = "x86_64", target_feature = "sse2"), target_arch = "wasm32", target_arch = "aarch64" )))] { if prefilter.is_none() { Searcher::twoway(needle, rabinkarp, None) } else { let prestrat = Prefilter::fallback(ranker, pair, needle); Searcher::twoway(needle, rabinkarp, prestrat) } } } /// Creates a new searcher that always uses the Two-Way algorithm. This is /// typically used when vector algorithms are unavailable or inappropriate. /// (For example, when the needle is "too long.") /// /// If a prefilter is given, then the searcher returned will be accelerated /// by the prefilter. #[inline] fn twoway( needle: &[u8], rabinkarp: rabinkarp::Finder, prestrat: Option, ) -> Searcher { let finder = twoway::Finder::new(needle); match prestrat { None => { trace!("building scalar two-way substring searcher"); let kind = SearcherKind { two_way: finder }; Searcher { call: searcher_kind_two_way, kind, rabinkarp } } Some(prestrat) => { trace!( "building scalar two-way \ substring searcher with a prefilter" ); let two_way_with_prefilter = TwoWayWithPrefilter { finder, prestrat }; let kind = SearcherKind { two_way_with_prefilter }; Searcher { call: searcher_kind_two_way_with_prefilter, kind, rabinkarp, } } } } /// Searches the given haystack for the given needle. The needle given /// should be the same as the needle that this finder was initialized /// with. /// /// Inlining this can lead to big wins for latency, and #[inline] doesn't /// seem to be enough in some cases. #[inline(always)] pub(crate) fn find( &self, prestate: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option { if haystack.len() < needle.len() { None } else { // SAFETY: By construction, we've ensured that the function // in `self.call` is properly paired with the union used in // `self.kind`. unsafe { (self.call)(self, prestate, haystack, needle) } } } } impl core::fmt::Debug for Searcher { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { f.debug_struct("Searcher") .field("call", &"") .field("kind", &"") .field("rabinkarp", &self.rabinkarp) .finish() } } /// A union indicating one of several possible substring search implementations /// that are in active use. /// /// This union should only be read by one of the functions prefixed with /// `searcher_kind_`. Namely, the correct function is meant to be paired with /// the union by the caller, such that the function always reads from the /// designated union field. #[derive(Clone, Copy)] union SearcherKind { empty: (), one_byte: u8, two_way: twoway::Finder, two_way_with_prefilter: TwoWayWithPrefilter, #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] sse2: crate::arch::x86_64::sse2::packedpair::Finder, #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] avx2: crate::arch::x86_64::avx2::packedpair::Finder, #[cfg(target_arch = "wasm32")] simd128: crate::arch::wasm32::simd128::packedpair::Finder, #[cfg(target_arch = "aarch64")] neon: crate::arch::aarch64::neon::packedpair::Finder, } /// A two-way substring searcher with a prefilter. #[derive(Copy, Clone, Debug)] struct TwoWayWithPrefilter { finder: twoway::Finder, prestrat: Prefilter, } /// The type of a substring search function. /// /// # Safety /// /// When using a function of this type, callers must ensure that the correct /// function is paired with the value populated in `SearcherKind` union. type SearcherKindFn = unsafe fn( searcher: &Searcher, prestate: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option; /// Reads from the `empty` field of `SearcherKind` to handle the case of /// searching for the empty needle. Works on all platforms. /// /// # Safety /// /// Callers must ensure that the `searcher.kind.empty` union field is set. unsafe fn searcher_kind_empty( _searcher: &Searcher, _prestate: &mut PrefilterState, _haystack: &[u8], _needle: &[u8], ) -> Option { Some(0) } /// Reads from the `one_byte` field of `SearcherKind` to handle the case of /// searching for a single byte needle. Works on all platforms. /// /// # Safety /// /// Callers must ensure that the `searcher.kind.one_byte` union field is set. unsafe fn searcher_kind_one_byte( searcher: &Searcher, _prestate: &mut PrefilterState, haystack: &[u8], _needle: &[u8], ) -> Option { let needle = searcher.kind.one_byte; crate::memchr(needle, haystack) } /// Reads from the `two_way` field of `SearcherKind` to handle the case of /// searching for an arbitrary needle without prefilter acceleration. Works on /// all platforms. /// /// # Safety /// /// Callers must ensure that the `searcher.kind.two_way` union field is set. unsafe fn searcher_kind_two_way( searcher: &Searcher, _prestate: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option { if rabinkarp::is_fast(haystack, needle) { searcher.rabinkarp.find(haystack, needle) } else { searcher.kind.two_way.find(haystack, needle) } } /// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle /// the case of searching for an arbitrary needle with prefilter acceleration. /// Works on all platforms. /// /// # Safety /// /// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union /// field is set. unsafe fn searcher_kind_two_way_with_prefilter( searcher: &Searcher, prestate: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option { if rabinkarp::is_fast(haystack, needle) { searcher.rabinkarp.find(haystack, needle) } else { let TwoWayWithPrefilter { ref finder, ref prestrat } = searcher.kind.two_way_with_prefilter; let pre = Pre { prestate, prestrat }; finder.find_with_prefilter(Some(pre), haystack, needle) } } /// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2 /// vectorized substring search implementation. /// /// # Safety /// /// Callers must ensure that the `searcher.kind.sse2` union field is set. #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] unsafe fn searcher_kind_sse2( searcher: &Searcher, _prestate: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option { let finder = &searcher.kind.sse2; if haystack.len() < finder.min_haystack_len() { searcher.rabinkarp.find(haystack, needle) } else { finder.find(haystack, needle) } } /// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2 /// vectorized substring search implementation. /// /// # Safety /// /// Callers must ensure that the `searcher.kind.avx2` union field is set. #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] unsafe fn searcher_kind_avx2( searcher: &Searcher, _prestate: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option { let finder = &searcher.kind.avx2; if haystack.len() < finder.min_haystack_len() { searcher.rabinkarp.find(haystack, needle) } else { finder.find(haystack, needle) } } /// Reads from the `simd128` field of `SearcherKind` to execute the wasm32 /// simd128 vectorized substring search implementation. /// /// # Safety /// /// Callers must ensure that the `searcher.kind.simd128` union field is set. #[cfg(target_arch = "wasm32")] unsafe fn searcher_kind_simd128( searcher: &Searcher, _prestate: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option { let finder = &searcher.kind.simd128; if haystack.len() < finder.min_haystack_len() { searcher.rabinkarp.find(haystack, needle) } else { finder.find(haystack, needle) } } /// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon /// vectorized substring search implementation. /// /// # Safety /// /// Callers must ensure that the `searcher.kind.neon` union field is set. #[cfg(target_arch = "aarch64")] unsafe fn searcher_kind_neon( searcher: &Searcher, _prestate: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option { let finder = &searcher.kind.neon; if haystack.len() < finder.min_haystack_len() { searcher.rabinkarp.find(haystack, needle) } else { finder.find(haystack, needle) } } /// A reverse substring searcher. #[derive(Clone, Debug)] pub(crate) struct SearcherRev { kind: SearcherRevKind, rabinkarp: rabinkarp::FinderRev, } /// The kind of the reverse searcher. /// /// For the reverse case, we don't do any SIMD acceleration or prefilters. /// There is no specific technical reason why we don't, but rather don't do it /// because it's not clear it's worth the extra code to do so. If you have a /// use case for it, please file an issue. /// /// We also don't do the union trick as we do with the forward case and /// prefilters. Basically for the same reason we don't have prefilters or /// vector algorithms for reverse searching: it's not clear it's worth doing. /// Please file an issue if you have a compelling use case for fast reverse /// substring search. #[derive(Clone, Debug)] enum SearcherRevKind { Empty, OneByte { needle: u8 }, TwoWay { finder: twoway::FinderRev }, } impl SearcherRev { /// Creates a new searcher for finding occurrences of the given needle in /// reverse. That is, it reports the last (instead of the first) occurrence /// of a needle in a haystack. #[inline] pub(crate) fn new(needle: &[u8]) -> SearcherRev { let kind = if needle.len() <= 1 { if needle.is_empty() { trace!("building empty reverse substring searcher"); SearcherRevKind::Empty } else { trace!("building one-byte reverse substring searcher"); debug_assert_eq!(1, needle.len()); SearcherRevKind::OneByte { needle: needle[0] } } } else { trace!("building scalar two-way reverse substring searcher"); let finder = twoway::FinderRev::new(needle); SearcherRevKind::TwoWay { finder } }; let rabinkarp = rabinkarp::FinderRev::new(needle); SearcherRev { kind, rabinkarp } } /// Searches the given haystack for the last occurrence of the given /// needle. The needle given should be the same as the needle that this /// finder was initialized with. #[inline] pub(crate) fn rfind( &self, haystack: &[u8], needle: &[u8], ) -> Option { if haystack.len() < needle.len() { return None; } match self.kind { SearcherRevKind::Empty => Some(haystack.len()), SearcherRevKind::OneByte { needle } => { crate::memrchr(needle, haystack) } SearcherRevKind::TwoWay { ref finder } => { if rabinkarp::is_fast(haystack, needle) { self.rabinkarp.rfind(haystack, needle) } else { finder.rfind(haystack, needle) } } } } } /// Prefilter controls whether heuristics are used to accelerate searching. /// /// A prefilter refers to the idea of detecting candidate matches very quickly, /// and then confirming whether those candidates are full matches. This /// idea can be quite effective since it's often the case that looking for /// candidates can be a lot faster than running a complete substring search /// over the entire input. Namely, looking for candidates can be done with /// extremely fast vectorized code. /// /// The downside of a prefilter is that it assumes false positives (which are /// candidates generated by a prefilter that aren't matches) are somewhat rare /// relative to the frequency of full matches. That is, if a lot of false /// positives are generated, then it's possible for search time to be worse /// than if the prefilter wasn't enabled in the first place. /// /// Another downside of a prefilter is that it can result in highly variable /// performance, where some cases are extraordinarily fast and others aren't. /// Typically, variable performance isn't a problem, but it may be for your use /// case. /// /// The use of prefilters in this implementation does use a heuristic to detect /// when a prefilter might not be carrying its weight, and will dynamically /// disable its use. Nevertheless, this configuration option gives callers /// the ability to disable prefilters if you have knowledge that they won't be /// useful. #[derive(Clone, Copy, Debug)] #[non_exhaustive] pub enum PrefilterConfig { /// Never used a prefilter in substring search. None, /// Automatically detect whether a heuristic prefilter should be used. If /// it is used, then heuristics will be used to dynamically disable the /// prefilter if it is believed to not be carrying its weight. Auto, } impl Default for PrefilterConfig { fn default() -> PrefilterConfig { PrefilterConfig::Auto } } impl PrefilterConfig { /// Returns true when this prefilter is set to the `None` variant. fn is_none(&self) -> bool { matches!(*self, PrefilterConfig::None) } } /// The implementation of a prefilter. /// /// This type encapsulates dispatch to one of several possible choices for a /// prefilter. Generally speaking, all prefilters have the same approximate /// algorithm: they choose a couple of bytes from the needle that are believed /// to be rare, use a fast vector algorithm to look for those bytes and return /// positions as candidates for some substring search algorithm (currently only /// Two-Way) to confirm as a match or not. /// /// The differences between the algorithms are actually at the vector /// implementation level. Namely, we need different routines based on both /// which target architecture we're on and what CPU features are supported. /// /// The straight-forwardly obvious approach here is to use an enum, and make /// `Prefilter::find` do case analysis to determine which algorithm was /// selected and invoke it. However, I've observed that this leads to poor /// codegen in some cases, especially in latency sensitive benchmarks. That is, /// this approach comes with overhead that I wasn't able to eliminate. /// /// The second obvious approach is to use dynamic dispatch with traits. Doing /// that in this context where `Prefilter` owns the selection generally /// requires heap allocation, and this code is designed to run in core-only /// environments. /// /// So we settle on using a union (that's `PrefilterKind`) and a function /// pointer (that's `PrefilterKindFn`). We select the right function pointer /// based on which field in the union we set, and that function in turn /// knows which field of the union to access. The downside of this approach /// is that it forces us to think about safety, but the upside is that /// there are some nice latency improvements to benchmarks. (Especially the /// `memmem/sliceslice/short` benchmark.) /// /// In cases where we've selected a vector algorithm and the haystack given /// is too short, we fallback to the scalar version of `memchr` on the /// `rarest_byte`. (The scalar version of `memchr` is still better than a naive /// byte-at-a-time loop because it will read in `usize`-sized chunks at a /// time.) #[derive(Clone, Copy)] struct Prefilter { call: PrefilterKindFn, kind: PrefilterKind, rarest_byte: u8, rarest_offset: u8, } impl Prefilter { /// Return a "fallback" prefilter, but only if it is believed to be /// effective. #[inline] fn fallback( ranker: R, pair: Pair, needle: &[u8], ) -> Option { /// The maximum frequency rank permitted for the fallback prefilter. /// If the rarest byte in the needle has a frequency rank above this /// value, then no prefilter is used if the fallback prefilter would /// otherwise be selected. const MAX_FALLBACK_RANK: u8 = 250; trace!("building fallback prefilter"); let rarest_offset = pair.index1(); let rarest_byte = needle[usize::from(rarest_offset)]; let rarest_rank = ranker.rank(rarest_byte); if rarest_rank > MAX_FALLBACK_RANK { None } else { let finder = crate::arch::all::packedpair::Finder::with_pair( needle, pair.clone(), )?; let call = prefilter_kind_fallback; let kind = PrefilterKind { fallback: finder }; Some(Prefilter { call, kind, rarest_byte, rarest_offset }) } } /// Return a prefilter using a x86_64 SSE2 vector algorithm. #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] #[inline] fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter { trace!("building x86_64 SSE2 prefilter"); let rarest_offset = finder.pair().index1(); let rarest_byte = needle[usize::from(rarest_offset)]; Prefilter { call: prefilter_kind_sse2, kind: PrefilterKind { sse2: finder }, rarest_byte, rarest_offset, } } /// Return a prefilter using a x86_64 AVX2 vector algorithm. #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] #[inline] fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter { trace!("building x86_64 AVX2 prefilter"); let rarest_offset = finder.pair().index1(); let rarest_byte = needle[usize::from(rarest_offset)]; Prefilter { call: prefilter_kind_avx2, kind: PrefilterKind { avx2: finder }, rarest_byte, rarest_offset, } } /// Return a prefilter using a wasm32 simd128 vector algorithm. #[cfg(target_arch = "wasm32")] #[inline] fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter { trace!("building wasm32 simd128 prefilter"); let rarest_offset = finder.pair().index1(); let rarest_byte = needle[usize::from(rarest_offset)]; Prefilter { call: prefilter_kind_simd128, kind: PrefilterKind { simd128: finder }, rarest_byte, rarest_offset, } } /// Return a prefilter using a aarch64 neon vector algorithm. #[cfg(target_arch = "aarch64")] #[inline] fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter { trace!("building aarch64 neon prefilter"); let rarest_offset = finder.pair().index1(); let rarest_byte = needle[usize::from(rarest_offset)]; Prefilter { call: prefilter_kind_neon, kind: PrefilterKind { neon: finder }, rarest_byte, rarest_offset, } } /// Return a *candidate* position for a match. /// /// When this returns an offset, it implies that a match could begin at /// that offset, but it may not. That is, it is possible for a false /// positive to be returned. /// /// When `None` is returned, then it is guaranteed that there are no /// matches for the needle in the given haystack. That is, it is impossible /// for a false negative to be returned. /// /// The purpose of this routine is to look for candidate matching positions /// as quickly as possible before running a (likely) slower confirmation /// step. #[inline] fn find(&self, haystack: &[u8]) -> Option { // SAFETY: By construction, we've ensured that the function in // `self.call` is properly paired with the union used in `self.kind`. unsafe { (self.call)(self, haystack) } } /// A "simple" prefilter that just looks for the occurrence of the rarest /// byte from the needle. This is generally only used for very small /// haystacks. #[inline] fn find_simple(&self, haystack: &[u8]) -> Option { // We don't use crate::memchr here because the haystack should be small // enough that memchr won't be able to use vector routines anyway. So // we just skip straight to the fallback implementation which is likely // faster. (A byte-at-a-time loop is only used when the haystack is // smaller than `size_of::()`.) crate::arch::all::memchr::One::new(self.rarest_byte) .find(haystack) .map(|i| i.saturating_sub(usize::from(self.rarest_offset))) } } impl core::fmt::Debug for Prefilter { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { f.debug_struct("Prefilter") .field("call", &"") .field("kind", &"") .field("rarest_byte", &self.rarest_byte) .field("rarest_offset", &self.rarest_offset) .finish() } } /// A union indicating one of several possible prefilters that are in active /// use. /// /// This union should only be read by one of the functions prefixed with /// `prefilter_kind_`. Namely, the correct function is meant to be paired with /// the union by the caller, such that the function always reads from the /// designated union field. #[derive(Clone, Copy)] union PrefilterKind { fallback: crate::arch::all::packedpair::Finder, #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] sse2: crate::arch::x86_64::sse2::packedpair::Finder, #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] avx2: crate::arch::x86_64::avx2::packedpair::Finder, #[cfg(target_arch = "wasm32")] simd128: crate::arch::wasm32::simd128::packedpair::Finder, #[cfg(target_arch = "aarch64")] neon: crate::arch::aarch64::neon::packedpair::Finder, } /// The type of a prefilter function. /// /// # Safety /// /// When using a function of this type, callers must ensure that the correct /// function is paired with the value populated in `PrefilterKind` union. type PrefilterKindFn = unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option; /// Reads from the `fallback` field of `PrefilterKind` to execute the fallback /// prefilter. Works on all platforms. /// /// # Safety /// /// Callers must ensure that the `strat.kind.fallback` union field is set. unsafe fn prefilter_kind_fallback( strat: &Prefilter, haystack: &[u8], ) -> Option { strat.kind.fallback.find_prefilter(haystack) } /// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2 /// prefilter. /// /// # Safety /// /// Callers must ensure that the `strat.kind.sse2` union field is set. #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] unsafe fn prefilter_kind_sse2( strat: &Prefilter, haystack: &[u8], ) -> Option { let finder = &strat.kind.sse2; if haystack.len() < finder.min_haystack_len() { strat.find_simple(haystack) } else { finder.find_prefilter(haystack) } } /// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2 /// prefilter. /// /// # Safety /// /// Callers must ensure that the `strat.kind.avx2` union field is set. #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] unsafe fn prefilter_kind_avx2( strat: &Prefilter, haystack: &[u8], ) -> Option { let finder = &strat.kind.avx2; if haystack.len() < finder.min_haystack_len() { strat.find_simple(haystack) } else { finder.find_prefilter(haystack) } } /// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32 /// simd128 prefilter. /// /// # Safety /// /// Callers must ensure that the `strat.kind.simd128` union field is set. #[cfg(target_arch = "wasm32")] unsafe fn prefilter_kind_simd128( strat: &Prefilter, haystack: &[u8], ) -> Option { let finder = &strat.kind.simd128; if haystack.len() < finder.min_haystack_len() { strat.find_simple(haystack) } else { finder.find_prefilter(haystack) } } /// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon /// prefilter. /// /// # Safety /// /// Callers must ensure that the `strat.kind.neon` union field is set. #[cfg(target_arch = "aarch64")] unsafe fn prefilter_kind_neon( strat: &Prefilter, haystack: &[u8], ) -> Option { let finder = &strat.kind.neon; if haystack.len() < finder.min_haystack_len() { strat.find_simple(haystack) } else { finder.find_prefilter(haystack) } } /// PrefilterState tracks state associated with the effectiveness of a /// prefilter. It is used to track how many bytes, on average, are skipped by /// the prefilter. If this average dips below a certain threshold over time, /// then the state renders the prefilter inert and stops using it. /// /// A prefilter state should be created for each search. (Where creating an /// iterator is treated as a single search.) A prefilter state should only be /// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert /// `PrefilterState`. #[derive(Clone, Copy, Debug)] pub(crate) struct PrefilterState { /// The number of skips that has been executed. This is always 1 greater /// than the actual number of skips. The special sentinel value of 0 /// indicates that the prefilter is inert. This is useful to avoid /// additional checks to determine whether the prefilter is still /// "effective." Once a prefilter becomes inert, it should no longer be /// used (according to our heuristics). skips: u32, /// The total number of bytes that have been skipped. skipped: u32, } impl PrefilterState { /// The minimum number of skip attempts to try before considering whether /// a prefilter is effective or not. const MIN_SKIPS: u32 = 50; /// The minimum amount of bytes that skipping must average. /// /// This value was chosen based on varying it and checking /// the microbenchmarks. In particular, this can impact the /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set /// too low. const MIN_SKIP_BYTES: u32 = 8; /// Create a fresh prefilter state. #[inline] pub(crate) fn new() -> PrefilterState { PrefilterState { skips: 1, skipped: 0 } } /// Update this state with the number of bytes skipped on the last /// invocation of the prefilter. #[inline] fn update(&mut self, skipped: usize) { self.skips = self.skips.saturating_add(1); // We need to do this dance since it's technically possible for // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the // size of a prefilter state.) self.skipped = match u32::try_from(skipped) { Err(_) => core::u32::MAX, Ok(skipped) => self.skipped.saturating_add(skipped), }; } /// Return true if and only if this state indicates that a prefilter is /// still effective. #[inline] fn is_effective(&mut self) -> bool { if self.is_inert() { return false; } if self.skips() < PrefilterState::MIN_SKIPS { return true; } if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { return true; } // We're inert. self.skips = 0; false } /// Returns true if the prefilter this state represents should no longer /// be used. #[inline] fn is_inert(&self) -> bool { self.skips == 0 } /// Returns the total number of times the prefilter has been used. #[inline] fn skips(&self) -> u32 { // Remember, `0` is a sentinel value indicating inertness, so we // always need to subtract `1` to get our actual number of skips. self.skips.saturating_sub(1) } } /// A combination of prefilter effectiveness state and the prefilter itself. #[derive(Debug)] pub(crate) struct Pre<'a> { /// State that tracks the effectiveness of a prefilter. prestate: &'a mut PrefilterState, /// The actual prefilter. prestrat: &'a Prefilter, } impl<'a> Pre<'a> { /// Call this prefilter on the given haystack with the given needle. #[inline] pub(crate) fn find(&mut self, haystack: &[u8]) -> Option { let result = self.prestrat.find(haystack); self.prestate.update(result.unwrap_or(haystack.len())); result } /// Return true if and only if this prefilter should be used. #[inline] pub(crate) fn is_effective(&mut self) -> bool { self.prestate.is_effective() } } /// Returns true if the needle has the right characteristics for a vector /// algorithm to handle the entirety of substring search. /// /// Vector algorithms can be used for prefilters for other substring search /// algorithms (like Two-Way), but they can also be used for substring search /// on their own. When used for substring search, vector algorithms will /// quickly identify candidate match positions (just like in the prefilter /// case), but instead of returning the candidate position they will try to /// confirm the match themselves. Confirmation happens via `memcmp`. This /// works well for short needles, but can break down when many false candidate /// positions are generated for large needles. Thus, we only permit vector /// algorithms to own substring search when the needle is of a certain length. #[inline] fn do_packed_search(needle: &[u8]) -> bool { /// The minimum length of a needle required for this algorithm. The minimum /// is 2 since a length of 1 should just use memchr and a length of 0 isn't /// a case handled by this searcher. const MIN_LEN: usize = 2; /// The maximum length of a needle required for this algorithm. /// /// In reality, there is no hard max here. The code below can handle any /// length needle. (Perhaps that suggests there are missing optimizations.) /// Instead, this is a heuristic and a bound guaranteeing our linear time /// complexity. /// /// It is a heuristic because when a candidate match is found, memcmp is /// run. For very large needles with lots of false positives, memcmp can /// make the code run quite slow. /// /// It is a bound because the worst case behavior with memcmp is /// multiplicative in the size of the needle and haystack, and we want /// to keep that additive. This bound ensures we still meet that bound /// theoretically, since it's just a constant. We aren't acting in bad /// faith here, memcmp on tiny needles is so fast that even in pathological /// cases (see pathological vector benchmarks), this is still just as fast /// or faster in practice. /// /// This specific number was chosen by tweaking a bit and running /// benchmarks. The rare-medium-needle, for example, gets about 5% faster /// by using this algorithm instead of a prefilter-accelerated Two-Way. /// There's also a theoretical desire to keep this number reasonably /// low, to mitigate the impact of pathological cases. I did try 64, and /// some benchmarks got a little better, and others (particularly the /// pathological ones), got a lot worse. So... 32 it is? const MAX_LEN: usize = 32; MIN_LEN <= needle.len() && needle.len() <= MAX_LEN }