diff options
Diffstat (limited to 'vendor/memchr/src/memmem')
20 files changed, 1153 insertions, 4033 deletions
diff --git a/vendor/memchr/src/memmem/byte_frequencies.rs b/vendor/memchr/src/memmem/byte_frequencies.rs deleted file mode 100644 index c313b629d..000000000 --- a/vendor/memchr/src/memmem/byte_frequencies.rs +++ /dev/null @@ -1,258 +0,0 @@ -pub const BYTE_FREQUENCIES: [u8; 256] = [ - 55, // '\x00' - 52, // '\x01' - 51, // '\x02' - 50, // '\x03' - 49, // '\x04' - 48, // '\x05' - 47, // '\x06' - 46, // '\x07' - 45, // '\x08' - 103, // '\t' - 242, // '\n' - 66, // '\x0b' - 67, // '\x0c' - 229, // '\r' - 44, // '\x0e' - 43, // '\x0f' - 42, // '\x10' - 41, // '\x11' - 40, // '\x12' - 39, // '\x13' - 38, // '\x14' - 37, // '\x15' - 36, // '\x16' - 35, // '\x17' - 34, // '\x18' - 33, // '\x19' - 56, // '\x1a' - 32, // '\x1b' - 31, // '\x1c' - 30, // '\x1d' - 29, // '\x1e' - 28, // '\x1f' - 255, // ' ' - 148, // '!' - 164, // '"' - 149, // '#' - 136, // '$' - 160, // '%' - 155, // '&' - 173, // "'" - 221, // '(' - 222, // ')' - 134, // '*' - 122, // '+' - 232, // ',' - 202, // '-' - 215, // '.' - 224, // '/' - 208, // '0' - 220, // '1' - 204, // '2' - 187, // '3' - 183, // '4' - 179, // '5' - 177, // '6' - 168, // '7' - 178, // '8' - 200, // '9' - 226, // ':' - 195, // ';' - 154, // '<' - 184, // '=' - 174, // '>' - 126, // '?' - 120, // '@' - 191, // 'A' - 157, // 'B' - 194, // 'C' - 170, // 'D' - 189, // 'E' - 162, // 'F' - 161, // 'G' - 150, // 'H' - 193, // 'I' - 142, // 'J' - 137, // 'K' - 171, // 'L' - 176, // 'M' - 185, // 'N' - 167, // 'O' - 186, // 'P' - 112, // 'Q' - 175, // 'R' - 192, // 'S' - 188, // 'T' - 156, // 'U' - 140, // 'V' - 143, // 'W' - 123, // 'X' - 133, // 'Y' - 128, // 'Z' - 147, // '[' - 138, // '\\' - 146, // ']' - 114, // '^' - 223, // '_' - 151, // '`' - 249, // 'a' - 216, // 'b' - 238, // 'c' - 236, // 'd' - 253, // 'e' - 227, // 'f' - 218, // 'g' - 230, // 'h' - 247, // 'i' - 135, // 'j' - 180, // 'k' - 241, // 'l' - 233, // 'm' - 246, // 'n' - 244, // 'o' - 231, // 'p' - 139, // 'q' - 245, // 'r' - 243, // 's' - 251, // 't' - 235, // 'u' - 201, // 'v' - 196, // 'w' - 240, // 'x' - 214, // 'y' - 152, // 'z' - 182, // '{' - 205, // '|' - 181, // '}' - 127, // '~' - 27, // '\x7f' - 212, // '\x80' - 211, // '\x81' - 210, // '\x82' - 213, // '\x83' - 228, // '\x84' - 197, // '\x85' - 169, // '\x86' - 159, // '\x87' - 131, // '\x88' - 172, // '\x89' - 105, // '\x8a' - 80, // '\x8b' - 98, // '\x8c' - 96, // '\x8d' - 97, // '\x8e' - 81, // '\x8f' - 207, // '\x90' - 145, // '\x91' - 116, // '\x92' - 115, // '\x93' - 144, // '\x94' - 130, // '\x95' - 153, // '\x96' - 121, // '\x97' - 107, // '\x98' - 132, // '\x99' - 109, // '\x9a' - 110, // '\x9b' - 124, // '\x9c' - 111, // '\x9d' - 82, // '\x9e' - 108, // '\x9f' - 118, // '\xa0' - 141, // '¡' - 113, // '¢' - 129, // '£' - 119, // '¤' - 125, // '¥' - 165, // '¦' - 117, // '§' - 92, // '¨' - 106, // '©' - 83, // 'ª' - 72, // '«' - 99, // '¬' - 93, // '\xad' - 65, // '®' - 79, // '¯' - 166, // '°' - 237, // '±' - 163, // '²' - 199, // '³' - 190, // '´' - 225, // 'µ' - 209, // '¶' - 203, // '·' - 198, // '¸' - 217, // '¹' - 219, // 'º' - 206, // '»' - 234, // '¼' - 248, // '½' - 158, // '¾' - 239, // '¿' - 255, // 'À' - 255, // 'Á' - 255, // 'Â' - 255, // 'Ã' - 255, // 'Ä' - 255, // 'Å' - 255, // 'Æ' - 255, // 'Ç' - 255, // 'È' - 255, // 'É' - 255, // 'Ê' - 255, // 'Ë' - 255, // 'Ì' - 255, // 'Í' - 255, // 'Î' - 255, // 'Ï' - 255, // 'Ð' - 255, // 'Ñ' - 255, // 'Ò' - 255, // 'Ó' - 255, // 'Ô' - 255, // 'Õ' - 255, // 'Ö' - 255, // '×' - 255, // 'Ø' - 255, // 'Ù' - 255, // 'Ú' - 255, // 'Û' - 255, // 'Ü' - 255, // 'Ý' - 255, // 'Þ' - 255, // 'ß' - 255, // 'à' - 255, // 'á' - 255, // 'â' - 255, // 'ã' - 255, // 'ä' - 255, // 'å' - 255, // 'æ' - 255, // 'ç' - 255, // 'è' - 255, // 'é' - 255, // 'ê' - 255, // 'ë' - 255, // 'ì' - 255, // 'í' - 255, // 'î' - 255, // 'ï' - 255, // 'ð' - 255, // 'ñ' - 255, // 'ò' - 255, // 'ó' - 255, // 'ô' - 255, // 'õ' - 255, // 'ö' - 255, // '÷' - 255, // 'ø' - 255, // 'ù' - 255, // 'ú' - 255, // 'û' - 255, // 'ü' - 255, // 'ý' - 255, // 'þ' - 255, // 'ÿ' -]; diff --git a/vendor/memchr/src/memmem/genericsimd.rs b/vendor/memchr/src/memmem/genericsimd.rs deleted file mode 100644 index 28bfdab88..000000000 --- a/vendor/memchr/src/memmem/genericsimd.rs +++ /dev/null @@ -1,266 +0,0 @@ -use core::mem::size_of; - -use crate::memmem::{util::memcmp, vector::Vector, NeedleInfo}; - -/// The minimum length of a needle required for this algorithm. The minimum -/// is 2 since a length of 1 should just use memchr and a length of 0 isn't -/// a case handled by this searcher. -pub(crate) const MIN_NEEDLE_LEN: usize = 2; - -/// The maximum length of a needle required for this algorithm. -/// -/// In reality, there is no hard max here. The code below can handle any -/// length needle. (Perhaps that suggests there are missing optimizations.) -/// Instead, this is a heuristic and a bound guaranteeing our linear time -/// complexity. -/// -/// It is a heuristic because when a candidate match is found, memcmp is run. -/// For very large needles with lots of false positives, memcmp can make the -/// code run quite slow. -/// -/// It is a bound because the worst case behavior with memcmp is multiplicative -/// in the size of the needle and haystack, and we want to keep that additive. -/// This bound ensures we still meet that bound theoretically, since it's just -/// a constant. We aren't acting in bad faith here, memcmp on tiny needles -/// is so fast that even in pathological cases (see pathological vector -/// benchmarks), this is still just as fast or faster in practice. -/// -/// This specific number was chosen by tweaking a bit and running benchmarks. -/// The rare-medium-needle, for example, gets about 5% faster by using this -/// algorithm instead of a prefilter-accelerated Two-Way. There's also a -/// theoretical desire to keep this number reasonably low, to mitigate the -/// impact of pathological cases. I did try 64, and some benchmarks got a -/// little better, and others (particularly the pathological ones), got a lot -/// worse. So... 32 it is? -pub(crate) const MAX_NEEDLE_LEN: usize = 32; - -/// The implementation of the forward vector accelerated substring search. -/// -/// This is extremely similar to the prefilter vector module by the same name. -/// The key difference is that this is not a prefilter. Instead, it handles -/// confirming its own matches. The trade off is that this only works with -/// smaller needles. The speed up here is that an inlined memcmp on a tiny -/// needle is very quick, even on pathological inputs. This is much better than -/// combining a prefilter with Two-Way, where using Two-Way to confirm the -/// match has higher latency. -/// -/// So why not use this for all needles? We could, and it would probably work -/// really well on most inputs. But its worst case is multiplicative and we -/// want to guarantee worst case additive time. Some of the benchmarks try to -/// justify this (see the pathological ones). -/// -/// The prefilter variant of this has more comments. Also note that we only -/// implement this for forward searches for now. If you have a compelling use -/// case for accelerated reverse search, please file an issue. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward { - rare1i: u8, - rare2i: u8, -} - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option<Forward> { - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_u8(); - // If the needle is too short or too long, give up. Also, give up - // if the rare bytes detected are at the same position. (It likely - // suggests a degenerate case, although it should technically not be - // possible.) - if needle.len() < MIN_NEEDLE_LEN - || needle.len() > MAX_NEEDLE_LEN - || rare1i == rare2i - { - return None; - } - Some(Forward { rare1i, rare2i }) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work for a particular vector. Passing a haystack with a length - /// smaller than this will cause `fwd_find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len<V: Vector>(&self) -> usize { - self.rare2i as usize + size_of::<V>() - } -} - -/// Searches the given haystack for the given needle. The needle given should -/// be the same as the needle that this searcher was initialized with. -/// -/// # Panics -/// -/// When the given haystack has a length smaller than `min_haystack_len`. -/// -/// # Safety -/// -/// Since this is meant to be used with vector functions, callers need to -/// specialize this inside of a function with a `target_feature` attribute. -/// Therefore, callers must ensure that whatever target feature is being used -/// supports the vector functions that this function is specialized for. (For -/// the specific vector functions used, see the Vector trait implementations.) -#[inline(always)] -pub(crate) unsafe fn fwd_find<V: Vector>( - fwd: &Forward, - haystack: &[u8], - needle: &[u8], -) -> Option<usize> { - // It would be nice if we didn't have this check here, since the meta - // searcher should handle it for us. But without this, I don't think we - // guarantee that end_ptr.sub(needle.len()) won't result in UB. We could - // put it as part of the safety contract, but it makes it more complicated - // than necessary. - if haystack.len() < needle.len() { - return None; - } - let min_haystack_len = fwd.min_haystack_len::<V>(); - assert!(haystack.len() >= min_haystack_len, "haystack too small"); - debug_assert!(needle.len() <= haystack.len()); - debug_assert!( - needle.len() >= MIN_NEEDLE_LEN, - "needle must be at least {} bytes", - MIN_NEEDLE_LEN, - ); - debug_assert!( - needle.len() <= MAX_NEEDLE_LEN, - "needle must be at most {} bytes", - MAX_NEEDLE_LEN, - ); - - let (rare1i, rare2i) = (fwd.rare1i as usize, fwd.rare2i as usize); - let rare1chunk = V::splat(needle[rare1i]); - let rare2chunk = V::splat(needle[rare2i]); - - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let max_ptr = end_ptr.sub(min_haystack_len); - let mut ptr = start_ptr; - - // N.B. I did experiment with unrolling the loop to deal with size(V) - // bytes at a time and 2*size(V) bytes at a time. The double unroll was - // marginally faster while the quadruple unroll was unambiguously slower. - // In the end, I decided the complexity from unrolling wasn't worth it. I - // used the memmem/krate/prebuilt/huge-en/ benchmarks to compare. - while ptr <= max_ptr { - let m = fwd_find_in_chunk( - fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, !0, - ); - if let Some(chunki) = m { - return Some(matched(start_ptr, ptr, chunki)); - } - ptr = ptr.add(size_of::<V>()); - } - if ptr < end_ptr { - let remaining = diff(end_ptr, ptr); - debug_assert!( - remaining < min_haystack_len, - "remaining bytes should be smaller than the minimum haystack \ - length of {}, but there are {} bytes remaining", - min_haystack_len, - remaining, - ); - if remaining < needle.len() { - return None; - } - debug_assert!( - max_ptr < ptr, - "after main loop, ptr should have exceeded max_ptr", - ); - let overlap = diff(ptr, max_ptr); - debug_assert!( - overlap > 0, - "overlap ({}) must always be non-zero", - overlap, - ); - debug_assert!( - overlap < size_of::<V>(), - "overlap ({}) cannot possibly be >= than a vector ({})", - overlap, - size_of::<V>(), - ); - // The mask has all of its bits set except for the first N least - // significant bits, where N=overlap. This way, any matches that - // occur in find_in_chunk within the overlap are automatically - // ignored. - let mask = !((1 << overlap) - 1); - ptr = max_ptr; - let m = fwd_find_in_chunk( - fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, mask, - ); - if let Some(chunki) = m { - return Some(matched(start_ptr, ptr, chunki)); - } - } - None -} - -/// Search for an occurrence of two rare bytes from the needle in the chunk -/// pointed to by ptr, with the end of the haystack pointed to by end_ptr. When -/// an occurrence is found, memcmp is run to check if a match occurs at the -/// corresponding position. -/// -/// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 -/// bytes repeated in each 8-bit lane, respectively. -/// -/// mask should have bits set corresponding the positions in the chunk in which -/// matches are considered. This is only used for the last vector load where -/// the beginning of the vector might have overlapped with the last load in -/// the main loop. The mask lets us avoid visiting positions that have already -/// been discarded as matches. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at both -/// (ptr + rare1i) and (ptr + rare2i). It must also be safe to do unaligned -/// loads on ptr up to (end_ptr - needle.len()). -#[inline(always)] -unsafe fn fwd_find_in_chunk<V: Vector>( - fwd: &Forward, - needle: &[u8], - ptr: *const u8, - end_ptr: *const u8, - rare1chunk: V, - rare2chunk: V, - mask: u32, -) -> Option<usize> { - let chunk0 = V::load_unaligned(ptr.add(fwd.rare1i as usize)); - let chunk1 = V::load_unaligned(ptr.add(fwd.rare2i as usize)); - - let eq0 = chunk0.cmpeq(rare1chunk); - let eq1 = chunk1.cmpeq(rare2chunk); - - let mut match_offsets = eq0.and(eq1).movemask() & mask; - while match_offsets != 0 { - let offset = match_offsets.trailing_zeros() as usize; - let ptr = ptr.add(offset); - if end_ptr.sub(needle.len()) < ptr { - return None; - } - let chunk = core::slice::from_raw_parts(ptr, needle.len()); - if memcmp(needle, chunk) { - return Some(offset); - } - match_offsets &= match_offsets - 1; - } - None -} - -/// Accepts a chunk-relative offset and returns a haystack relative offset -/// after updating the prefilter state. -/// -/// See the same function with the same name in the prefilter variant of this -/// algorithm to learned why it's tagged with inline(never). Even here, where -/// the function is simpler, inlining it leads to poorer codegen. (Although -/// it does improve some benchmarks, like prebuiltiter/huge-en/common-you.) -#[cold] -#[inline(never)] -fn matched(start_ptr: *const u8, ptr: *const u8, chunki: usize) -> usize { - diff(ptr, start_ptr) + chunki -} - -/// Subtract `b` from `a` and return the difference. `a` must be greater than -/// or equal to `b`. -fn diff(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memmem/mod.rs b/vendor/memchr/src/memmem/mod.rs index e1cd1aec7..1a2a7e10c 100644 --- a/vendor/memchr/src/memmem/mod.rs +++ b/vendor/memchr/src/memmem/mod.rs @@ -66,99 +66,25 @@ assert_eq!(None, finder.find(b"quux baz bar")); ``` */ -pub use self::prefilter::Prefilter; +pub use crate::memmem::searcher::PrefilterConfig as Prefilter; + +// This is exported here for use in the crate::arch::all::twoway +// implementation. This is essentially an abstraction breaker. Namely, the +// public API of twoway doesn't support providing a prefilter, but its crate +// internal API does. The main reason for this is that I didn't want to do the +// API design required to support it without a concrete use case. +pub(crate) use crate::memmem::searcher::Pre; use crate::{ - cow::CowBytes, - memmem::{ - prefilter::{Pre, PrefilterFn, PrefilterState}, - rabinkarp::NeedleHash, - rarebytes::RareNeedleBytes, + arch::all::{ + packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, + rabinkarp, }, + cow::CowBytes, + memmem::searcher::{PrefilterState, Searcher, SearcherRev}, }; -/// Defines a suite of quickcheck properties for forward and reverse -/// substring searching. -/// -/// This is defined in this specific spot so that it can be used freely among -/// the different substring search implementations. I couldn't be bothered to -/// fight with the macro-visibility rules enough to figure out how to stuff it -/// somewhere more convenient. -#[cfg(all(test, feature = "std"))] -macro_rules! define_memmem_quickcheck_tests { - ($fwd:expr, $rev:expr) => { - use crate::memmem::proptests; - - quickcheck::quickcheck! { - fn qc_fwd_prefix_is_substring(bs: Vec<u8>) -> bool { - proptests::prefix_is_substring(false, &bs, $fwd) - } - - fn qc_fwd_suffix_is_substring(bs: Vec<u8>) -> bool { - proptests::suffix_is_substring(false, &bs, $fwd) - } - - fn qc_fwd_matches_naive( - haystack: Vec<u8>, - needle: Vec<u8> - ) -> bool { - proptests::matches_naive(false, &haystack, &needle, $fwd) - } - - fn qc_rev_prefix_is_substring(bs: Vec<u8>) -> bool { - proptests::prefix_is_substring(true, &bs, $rev) - } - - fn qc_rev_suffix_is_substring(bs: Vec<u8>) -> bool { - proptests::suffix_is_substring(true, &bs, $rev) - } - - fn qc_rev_matches_naive( - haystack: Vec<u8>, - needle: Vec<u8> - ) -> bool { - proptests::matches_naive(true, &haystack, &needle, $rev) - } - } - }; -} - -/// Defines a suite of "simple" hand-written tests for a substring -/// implementation. -/// -/// This is defined here for the same reason that -/// define_memmem_quickcheck_tests is defined here. -#[cfg(test)] -macro_rules! define_memmem_simple_tests { - ($fwd:expr, $rev:expr) => { - use crate::memmem::testsimples; - - #[test] - fn simple_forward() { - testsimples::run_search_tests_fwd($fwd); - } - - #[test] - fn simple_reverse() { - testsimples::run_search_tests_rev($rev); - } - }; -} - -mod byte_frequencies; -#[cfg(memchr_runtime_simd)] -mod genericsimd; -mod prefilter; -mod rabinkarp; -mod rarebytes; -mod twoway; -mod util; -#[cfg(memchr_runtime_simd)] -mod vector; -#[cfg(all(memchr_runtime_wasm128))] -mod wasm; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; +mod searcher; /// Returns an iterator over all non-overlapping occurrences of a substring in /// a haystack. @@ -258,7 +184,7 @@ pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( #[inline] pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { if haystack.len() < 64 { - rabinkarp::find(haystack, needle) + rabinkarp::Finder::new(needle).find(haystack, needle) } else { Finder::new(needle).find(haystack) } @@ -295,7 +221,7 @@ pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { #[inline] pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { if haystack.len() < 64 { - rabinkarp::rfind(haystack, needle) + rabinkarp::FinderRev::new(needle).rfind(haystack, needle) } else { FinderRev::new(needle).rfind(haystack) } @@ -321,7 +247,7 @@ impl<'h, 'n> FindIter<'h, 'n> { haystack: &'h [u8], finder: Finder<'n>, ) -> FindIter<'h, 'n> { - let prestate = finder.searcher.prefilter_state(); + let prestate = PrefilterState::new(); FindIter { haystack, prestate, finder, pos: 0 } } @@ -331,8 +257,8 @@ impl<'h, 'n> FindIter<'h, 'n> { /// If this is already an owned iterator, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FindIter<'h, 'static> { FindIter { @@ -348,20 +274,32 @@ impl<'h, 'n> Iterator for FindIter<'h, 'n> { type Item = usize; fn next(&mut self) -> Option<usize> { - if self.pos > self.haystack.len() { - return None; - } - let result = self - .finder - .searcher - .find(&mut self.prestate, &self.haystack[self.pos..]); - match result { - None => None, - Some(i) => { - let pos = self.pos + i; - self.pos = pos + core::cmp::max(1, self.finder.needle().len()); - Some(pos) - } + let needle = self.finder.needle(); + let haystack = self.haystack.get(self.pos..)?; + let idx = + self.finder.searcher.find(&mut self.prestate, haystack, needle)?; + + let pos = self.pos + idx; + self.pos = pos + needle.len().max(1); + + Some(pos) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + // The largest possible number of non-overlapping matches is the + // quotient of the haystack and the needle (or the length of the + // haystack, if the needle is empty) + match self.haystack.len().checked_sub(self.pos) { + None => (0, Some(0)), + Some(haystack_len) => match self.finder.needle().len() { + // Empty needles always succeed and match at every point + // (including the very end) + 0 => ( + haystack_len.saturating_add(1), + haystack_len.checked_add(1), + ), + needle_len => (0, Some(haystack_len / needle_len)), + }, } } } @@ -398,7 +336,7 @@ impl<'h, 'n> FindRevIter<'h, 'n> { /// this copies the needle. /// /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FindRevIter<'h, 'static> { FindRevIter { @@ -447,7 +385,8 @@ impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct Finder<'n> { - searcher: Searcher<'n>, + needle: CowBytes<'n>, + searcher: Searcher, } impl<'n> Finder<'n> { @@ -481,8 +420,11 @@ impl<'n> Finder<'n> { /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); /// assert_eq!(None, Finder::new("quux").find(haystack)); /// ``` + #[inline] pub fn find(&self, haystack: &[u8]) -> Option<usize> { - self.searcher.find(&mut self.searcher.prefilter_state(), haystack) + let mut prestate = PrefilterState::new(); + let needle = self.needle.as_slice(); + self.searcher.find(&mut prestate, haystack, needle) } /// Returns an iterator over all occurrences of a substring in a haystack. @@ -525,11 +467,14 @@ impl<'n> Finder<'n> { /// If this is already an owned finder, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> Finder<'static> { - Finder { searcher: self.searcher.into_owned() } + Finder { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } } /// Convert this finder into its borrowed variant. @@ -544,7 +489,10 @@ impl<'n> Finder<'n> { /// shorter of the two. #[inline] pub fn as_ref(&self) -> Finder<'_> { - Finder { searcher: self.searcher.as_ref() } + Finder { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } } /// Returns the needle that this finder searches for. @@ -555,7 +503,7 @@ impl<'n> Finder<'n> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle.as_slice() } } @@ -574,7 +522,8 @@ impl<'n> Finder<'n> { /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct FinderRev<'n> { - searcher: SearcherRev<'n>, + needle: CowBytes<'n>, + searcher: SearcherRev, } impl<'n> FinderRev<'n> { @@ -612,7 +561,7 @@ impl<'n> FinderRev<'n> { /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); /// ``` pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { - self.searcher.rfind(haystack.as_ref()) + self.searcher.rfind(haystack.as_ref(), self.needle.as_slice()) } /// Returns a reverse iterator over all occurrences of a substring in a @@ -657,10 +606,13 @@ impl<'n> FinderRev<'n> { /// this copies the needle. /// /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FinderRev<'static> { - FinderRev { searcher: self.searcher.into_owned() } + FinderRev { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } } /// Convert this finder into its borrowed variant. @@ -675,7 +627,10 @@ impl<'n> FinderRev<'n> { /// shorter of the two. #[inline] pub fn as_ref(&self) -> FinderRev<'_> { - FinderRev { searcher: self.searcher.as_ref() } + FinderRev { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } } /// Returns the needle that this finder searches for. @@ -686,7 +641,7 @@ impl<'n> FinderRev<'n> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle.as_slice() } } @@ -697,7 +652,7 @@ impl<'n> FinderRev<'n> { /// heuristic prefilters used to speed up certain searches. #[derive(Clone, Debug, Default)] pub struct FinderBuilder { - config: SearcherConfig, + prefilter: Prefilter, } impl FinderBuilder { @@ -712,7 +667,26 @@ impl FinderBuilder { &self, needle: &'n B, ) -> Finder<'n> { - Finder { searcher: Searcher::new(self.config, needle.as_ref()) } + self.build_forward_with_ranker(DefaultFrequencyRank, needle) + } + + /// Build a forward finder using the given needle and a custom heuristic for + /// determining the frequency of a given byte in the dataset. + /// See [`HeuristicFrequencyRank`] for more details. + pub fn build_forward_with_ranker< + 'n, + R: HeuristicFrequencyRank, + B: ?Sized + AsRef<[u8]>, + >( + &self, + ranker: R, + needle: &'n B, + ) -> Finder<'n> { + let needle = needle.as_ref(); + Finder { + needle: CowBytes::new(needle), + searcher: Searcher::new(self.prefilter, ranker, needle), + } } /// Build a reverse finder using the given needle from the current @@ -721,7 +695,11 @@ impl FinderBuilder { &self, needle: &'n B, ) -> FinderRev<'n> { - FinderRev { searcher: SearcherRev::new(needle.as_ref()) } + let needle = needle.as_ref(); + FinderRev { + needle: CowBytes::new(needle), + searcher: SearcherRev::new(needle), + } } /// Configure the prefilter setting for the finder. @@ -729,593 +707,31 @@ impl FinderBuilder { /// See the documentation for [`Prefilter`] for more discussion on why /// you might want to configure this. pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { - self.config.prefilter = prefilter; + self.prefilter = prefilter; self } } -/// The internal implementation of a forward substring searcher. -/// -/// The reality is that this is a "meta" searcher. Namely, depending on a -/// variety of parameters (CPU support, target, needle size, haystack size and -/// even dynamic properties such as prefilter effectiveness), the actual -/// algorithm employed to do substring search may change. -#[derive(Clone, Debug)] -struct Searcher<'n> { - /// The actual needle we're searching for. - /// - /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is - /// specialized to a single variant (the borrowed form). - needle: CowBytes<'n>, - /// A collection of facts computed on the needle that are useful for more - /// than one substring search algorithm. - ninfo: NeedleInfo, - /// A prefilter function, if it was deemed appropriate. - /// - /// Some substring search implementations (like Two-Way) benefit greatly - /// if we can quickly find candidate starting positions for a match. - prefn: Option<PrefilterFn>, - /// The actual substring implementation in use. - kind: SearcherKind, -} - -/// A collection of facts computed about a search needle. -/// -/// We group these things together because it's useful to be able to hand them -/// to prefilters or substring algorithms that want them. -#[derive(Clone, Copy, Debug)] -pub(crate) struct NeedleInfo { - /// The offsets of "rare" bytes detected in the needle. - /// - /// This is meant to be a heuristic in order to maximize the effectiveness - /// of vectorized code. Namely, vectorized code tends to focus on only - /// one or two bytes. If we pick bytes from the needle that occur - /// infrequently, then more time will be spent in the vectorized code and - /// will likely make the overall search (much) faster. - /// - /// Of course, this is only a heuristic based on a background frequency - /// distribution of bytes. But it tends to work very well in practice. - pub(crate) rarebytes: RareNeedleBytes, - /// A Rabin-Karp hash of the needle. - /// - /// This is store here instead of in a more specific Rabin-Karp search - /// since Rabin-Karp may be used even if another SearchKind corresponds - /// to some other search implementation. e.g., If measurements suggest RK - /// is faster in some cases or if a search implementation can't handle - /// particularly small haystack. (Moreover, we cannot use RK *generally*, - /// since its worst case time is multiplicative. Instead, we only use it - /// some small haystacks, where "small" is a constant.) - pub(crate) nhash: NeedleHash, -} - -/// Configuration for substring search. -#[derive(Clone, Copy, Debug, Default)] -struct SearcherConfig { - /// This permits changing the behavior of the prefilter, since it can have - /// a variable impact on performance. - prefilter: Prefilter, -} - -#[derive(Clone, Debug)] -enum SearcherKind { - /// A special case for empty needles. An empty needle always matches, even - /// in an empty haystack. - Empty, - /// This is used whenever the needle is a single byte. In this case, we - /// always use memchr. - OneByte(u8), - /// Two-Way is the generic work horse and is what provides our additive - /// linear time guarantee. In general, it's used when the needle is bigger - /// than 8 bytes or so. - TwoWay(twoway::Forward), - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - GenericSIMD128(x86::sse::Forward), - #[cfg(memchr_runtime_wasm128)] - GenericSIMD128(wasm::Forward), - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - GenericSIMD256(x86::avx::Forward), -} - -impl<'n> Searcher<'n> { - fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { - use self::SearcherKind::*; - - let ninfo = NeedleInfo::new(needle); - let mk = |kind: SearcherKind| { - let prefn = prefilter::forward( - &config.prefilter, - &ninfo.rarebytes, - needle, - ); - Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } - }; - if needle.len() == 0 { - return mk(Empty); - } - if needle.len() == 1 { - return mk(OneByte(needle[0])); - } - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - { - if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) { - return mk(GenericSIMD256(fwd)); - } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) { - return mk(GenericSIMD128(fwd)); - } - } - #[cfg(all(target_arch = "wasm32", memchr_runtime_simd))] - { - if let Some(fwd) = wasm::Forward::new(&ninfo, needle) { - return mk(GenericSIMD128(fwd)); - } - } - - mk(TwoWay(twoway::Forward::new(needle))) - } - - /// Return a fresh prefilter state that can be used with this searcher. - /// A prefilter state is used to track the effectiveness of a searcher's - /// prefilter for speeding up searches. Therefore, the prefilter state - /// should generally be reused on subsequent searches (such as in an - /// iterator). For searches on a different haystack, then a new prefilter - /// state should be used. - /// - /// This always initializes a valid (but possibly inert) prefilter state - /// even if this searcher does not have a prefilter enabled. - fn prefilter_state(&self) -> PrefilterState { - if self.prefn.is_none() { - PrefilterState::inert() - } else { - PrefilterState::new() - } - } - - fn needle(&self) -> &[u8] { - self.needle.as_slice() - } - - fn as_ref(&self) -> Searcher<'_> { - use self::SearcherKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(gs) => GenericSIMD128(gs), - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(gs) => GenericSIMD256(gs), - }; - Searcher { - needle: CowBytes::new(self.needle()), - ninfo: self.ninfo, - prefn: self.prefn, - kind, - } - } - - #[cfg(feature = "std")] - fn into_owned(self) -> Searcher<'static> { - use self::SearcherKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(gs) => GenericSIMD128(gs), - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(gs) => GenericSIMD256(gs), - }; - Searcher { - needle: self.needle.into_owned(), - ninfo: self.ninfo, - prefn: self.prefn, - kind, - } - } - - /// Implements forward substring search by selecting the implementation - /// chosen at construction and executing it on the given haystack with the - /// prefilter's current state of effectiveness. - #[inline(always)] - fn find( - &self, - state: &mut PrefilterState, - haystack: &[u8], - ) -> Option<usize> { - use self::SearcherKind::*; - - let needle = self.needle(); - if haystack.len() < needle.len() { - return None; - } - match self.kind { - Empty => Some(0), - OneByte(b) => crate::memchr(b, haystack), - TwoWay(ref tw) => { - // For very short haystacks (e.g., where the prefilter probably - // can't run), it's faster to just run RK. - if rabinkarp::is_fast(haystack, needle) { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - self.find_tw(tw, state, haystack, needle) - } - } - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(ref gs) => { - // The SIMD matcher can't handle particularly short haystacks, - // so we fall back to RK in these cases. - if haystack.len() < gs.min_haystack_len() { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - gs.find(haystack, needle) - } - } - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(ref gs) => { - // The SIMD matcher can't handle particularly short haystacks, - // so we fall back to RK in these cases. - if haystack.len() < gs.min_haystack_len() { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - gs.find(haystack, needle) - } - } - } - } - - /// Calls Two-Way on the given haystack/needle. - /// - /// This is marked as unlineable since it seems to have a better overall - /// effect on benchmarks. However, this is one of those cases where - /// inlining it results an improvement in other benchmarks too, so I - /// suspect we just don't have enough data yet to make the right call here. - /// - /// I suspect the main problem is that this function contains two different - /// inlined copies of Two-Way: one with and one without prefilters enabled. - #[inline(never)] - fn find_tw( - &self, - tw: &twoway::Forward, - state: &mut PrefilterState, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - if let Some(prefn) = self.prefn { - // We used to look at the length of a haystack here. That is, if - // it was too small, then don't bother with the prefilter. But two - // things changed: the prefilter falls back to memchr for small - // haystacks, and, above, Rabin-Karp is employed for tiny haystacks - // anyway. - if state.is_effective() { - let mut pre = Pre { state, prefn, ninfo: &self.ninfo }; - return tw.find(Some(&mut pre), haystack, needle); - } - } - tw.find(None, haystack, needle) - } -} - -impl NeedleInfo { - pub(crate) fn new(needle: &[u8]) -> NeedleInfo { - NeedleInfo { - rarebytes: RareNeedleBytes::forward(needle), - nhash: NeedleHash::forward(needle), - } - } -} - -/// The internal implementation of a reverse substring searcher. -/// -/// See the forward searcher docs for more details. Currently, the reverse -/// searcher is considerably simpler since it lacks prefilter support. This -/// was done because it adds a lot of code, and more surface area to test. And -/// in particular, it's not clear whether a prefilter on reverse searching is -/// worth it. (If you have a compelling use case, please file an issue!) -#[derive(Clone, Debug)] -struct SearcherRev<'n> { - /// The actual needle we're searching for. - needle: CowBytes<'n>, - /// A Rabin-Karp hash of the needle. - nhash: NeedleHash, - /// The actual substring implementation in use. - kind: SearcherRevKind, -} - -#[derive(Clone, Debug)] -enum SearcherRevKind { - /// A special case for empty needles. An empty needle always matches, even - /// in an empty haystack. - Empty, - /// This is used whenever the needle is a single byte. In this case, we - /// always use memchr. - OneByte(u8), - /// Two-Way is the generic work horse and is what provides our additive - /// linear time guarantee. In general, it's used when the needle is bigger - /// than 8 bytes or so. - TwoWay(twoway::Reverse), -} - -impl<'n> SearcherRev<'n> { - fn new(needle: &'n [u8]) -> SearcherRev<'n> { - use self::SearcherRevKind::*; - - let kind = if needle.len() == 0 { - Empty - } else if needle.len() == 1 { - OneByte(needle[0]) - } else { - TwoWay(twoway::Reverse::new(needle)) - }; - SearcherRev { - needle: CowBytes::new(needle), - nhash: NeedleHash::reverse(needle), - kind, - } - } - - fn needle(&self) -> &[u8] { - self.needle.as_slice() - } - - fn as_ref(&self) -> SearcherRev<'_> { - use self::SearcherRevKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - }; - SearcherRev { - needle: CowBytes::new(self.needle()), - nhash: self.nhash, - kind, - } - } - - #[cfg(feature = "std")] - fn into_owned(self) -> SearcherRev<'static> { - use self::SearcherRevKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - }; - SearcherRev { - needle: self.needle.into_owned(), - nhash: self.nhash, - kind, - } - } - - /// Implements reverse substring search by selecting the implementation - /// chosen at construction and executing it on the given haystack with the - /// prefilter's current state of effectiveness. - #[inline(always)] - fn rfind(&self, haystack: &[u8]) -> Option<usize> { - use self::SearcherRevKind::*; - - let needle = self.needle(); - if haystack.len() < needle.len() { - return None; - } - match self.kind { - Empty => Some(haystack.len()), - OneByte(b) => crate::memrchr(b, haystack), - TwoWay(ref tw) => { - // For very short haystacks (e.g., where the prefilter probably - // can't run), it's faster to just run RK. - if rabinkarp::is_fast(haystack, needle) { - rabinkarp::rfind_with(&self.nhash, haystack, needle) - } else { - tw.rfind(haystack, needle) - } - } - } - } -} - -/// This module defines some generic quickcheck properties useful for testing -/// any substring search algorithm. It also runs those properties for the -/// top-level public API memmem routines. (The properties are also used to -/// test various substring search implementations more granularly elsewhere as -/// well.) -#[cfg(all(test, feature = "std", not(miri)))] -mod proptests { - // N.B. This defines the quickcheck tests using the properties defined - // below. Because of macro-visibility weirdness, the actual macro is - // defined at the top of this file. - define_memmem_quickcheck_tests!(super::find, super::rfind); - - /// Check that every prefix of the given byte string is a substring. - pub(crate) fn prefix_is_substring( - reverse: bool, - bs: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) -> bool { - if bs.is_empty() { - return true; - } - for i in 0..(bs.len() - 1) { - let prefix = &bs[..i]; - if reverse { - assert_eq!(naive_rfind(bs, prefix), search(bs, prefix)); - } else { - assert_eq!(naive_find(bs, prefix), search(bs, prefix)); - } - } - true - } - - /// Check that every suffix of the given byte string is a substring. - pub(crate) fn suffix_is_substring( - reverse: bool, - bs: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) -> bool { - if bs.is_empty() { - return true; - } - for i in 0..(bs.len() - 1) { - let suffix = &bs[i..]; - if reverse { - assert_eq!(naive_rfind(bs, suffix), search(bs, suffix)); - } else { - assert_eq!(naive_find(bs, suffix), search(bs, suffix)); - } - } - true - } - - /// Check that naive substring search matches the result of the given search - /// algorithm. - pub(crate) fn matches_naive( - reverse: bool, - haystack: &[u8], - needle: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) -> bool { - if reverse { - naive_rfind(haystack, needle) == search(haystack, needle) - } else { - naive_find(haystack, needle) == search(haystack, needle) - } - } - - /// Naively search forwards for the given needle in the given haystack. - fn naive_find(haystack: &[u8], needle: &[u8]) -> Option<usize> { - if needle.is_empty() { - return Some(0); - } else if haystack.len() < needle.len() { - return None; - } - for i in 0..(haystack.len() - needle.len() + 1) { - if needle == &haystack[i..i + needle.len()] { - return Some(i); - } - } - None - } - - /// Naively search in reverse for the given needle in the given haystack. - fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { - if needle.is_empty() { - return Some(haystack.len()); - } else if haystack.len() < needle.len() { - return None; - } - for i in (0..(haystack.len() - needle.len() + 1)).rev() { - if needle == &haystack[i..i + needle.len()] { - return Some(i); - } - } - None - } -} - -/// This module defines some hand-written "simple" substring tests. It -/// also provides routines for easily running them on any substring search -/// implementation. #[cfg(test)] -mod testsimples { - define_memmem_simple_tests!(super::find, super::rfind); - - /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple. - type SearchTest = - (&'static str, &'static str, Option<usize>, Option<usize>); - - const SEARCH_TESTS: &'static [SearchTest] = &[ - ("", "", Some(0), Some(0)), - ("", "a", Some(0), Some(1)), - ("", "ab", Some(0), Some(2)), - ("", "abc", Some(0), Some(3)), - ("a", "", None, None), - ("a", "a", Some(0), Some(0)), - ("a", "aa", Some(0), Some(1)), - ("a", "ba", Some(1), Some(1)), - ("a", "bba", Some(2), Some(2)), - ("a", "bbba", Some(3), Some(3)), - ("a", "bbbab", Some(3), Some(3)), - ("a", "bbbabb", Some(3), Some(3)), - ("a", "bbbabbb", Some(3), Some(3)), - ("a", "bbbbbb", None, None), - ("ab", "", None, None), - ("ab", "a", None, None), - ("ab", "b", None, None), - ("ab", "ab", Some(0), Some(0)), - ("ab", "aab", Some(1), Some(1)), - ("ab", "aaab", Some(2), Some(2)), - ("ab", "abaab", Some(0), Some(3)), - ("ab", "baaab", Some(3), Some(3)), - ("ab", "acb", None, None), - ("ab", "abba", Some(0), Some(0)), - ("abc", "ab", None, None), - ("abc", "abc", Some(0), Some(0)), - ("abc", "abcz", Some(0), Some(0)), - ("abc", "abczz", Some(0), Some(0)), - ("abc", "zabc", Some(1), Some(1)), - ("abc", "zzabc", Some(2), Some(2)), - ("abc", "azbc", None, None), - ("abc", "abzc", None, None), - ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), - ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), - ("xyz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", Some(32), Some(32)), - // Failures caught by quickcheck. - ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), - ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), - ]; - - /// Run the substring search tests. `search` should be a closure that - /// accepts a haystack and a needle and returns the starting position - /// of the first occurrence of needle in the haystack, or `None` if one - /// doesn't exist. - pub(crate) fn run_search_tests_fwd( - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) { - for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS { - let (n, h) = (needle.as_bytes(), haystack.as_bytes()); - assert_eq!( - expected_fwd, - search(h, n), - "needle: {:?}, haystack: {:?}, expected: {:?}", - n, - h, - expected_fwd - ); - } - } - - /// Run the substring search tests. `search` should be a closure that - /// accepts a haystack and a needle and returns the starting position of - /// the last occurrence of needle in the haystack, or `None` if one doesn't - /// exist. - pub(crate) fn run_search_tests_rev( - mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, - ) { - for &(needle, haystack, _, expected_rev) in SEARCH_TESTS { - let (n, h) = (needle.as_bytes(), haystack.as_bytes()); - assert_eq!( - expected_rev, - search(h, n), - "needle: {:?}, haystack: {:?}, expected: {:?}", - n, - h, - expected_rev - ); - } +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h))); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h))) + .run(); } } diff --git a/vendor/memchr/src/memmem/prefilter/fallback.rs b/vendor/memchr/src/memmem/prefilter/fallback.rs deleted file mode 100644 index ae1bbccb3..000000000 --- a/vendor/memchr/src/memmem/prefilter/fallback.rs +++ /dev/null @@ -1,122 +0,0 @@ -/* -This module implements a "fallback" prefilter that only relies on memchr to -function. While memchr works best when it's explicitly vectorized, its -fallback implementations are fast enough to make a prefilter like this -worthwhile. - -The essence of this implementation is to identify two rare bytes in a needle -based on a background frequency distribution of bytes. We then run memchr on the -rarer byte. For each match, we use the second rare byte as a guard to quickly -check if a match is possible. If the position passes the guard test, then we do -a naive memcmp to confirm the match. - -In practice, this formulation works amazingly well, primarily because of the -heuristic use of a background frequency distribution. However, it does have a -number of weaknesses where it can get quite slow when its background frequency -distribution doesn't line up with the haystack being searched. This is why we -have specialized vector routines that essentially take this idea and move the -guard check into vectorized code. (Those specialized vector routines do still -make use of the background frequency distribution of bytes though.) - -This fallback implementation was originally formulated in regex many moons ago: -https://github.com/rust-lang/regex/blob/3db8722d0b204a85380fe2a65e13d7065d7dd968/src/literal/imp.rs#L370-L501 -Prior to that, I'm not aware of anyone using this technique in any prominent -substring search implementation. Although, I'm sure folks have had this same -insight long before me. - -Another version of this also appeared in bstr: -https://github.com/BurntSushi/bstr/blob/a444256ca7407fe180ee32534688549655b7a38e/src/search/prefilter.rs#L83-L340 -*/ - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// Look for a possible occurrence of needle. The position returned -/// corresponds to the beginning of the occurrence, if one exists. -/// -/// Callers may assume that this never returns false negatives (i.e., it -/// never misses an actual occurrence), but must check that the returned -/// position corresponds to a match. That is, it can return false -/// positives. -/// -/// This should only be used when Freqy is constructed for forward -/// searching. -pub(crate) fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option<usize> { - let mut i = 0; - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_usize(); - let (rare1, rare2) = ninfo.rarebytes.as_rare_bytes(needle); - while prestate.is_effective() { - // Use a fast vectorized implementation to skip to the next - // occurrence of the rarest byte (heuristically chosen) in the - // needle. - let found = crate::memchr(rare1, &haystack[i..])?; - prestate.update(found); - i += found; - - // If we can't align our first match with the haystack, then a - // match is impossible. - if i < rare1i { - i += 1; - continue; - } - - // Align our rare2 byte with the haystack. A mismatch means that - // a match is impossible. - let aligned_rare2i = i - rare1i + rare2i; - if haystack.get(aligned_rare2i) != Some(&rare2) { - i += 1; - continue; - } - - // We've done what we can. There might be a match here. - return Some(i - rare1i); - } - // The only way we get here is if we believe our skipping heuristic - // has become ineffective. We're allowed to return false positives, - // so return the position at which we advanced to, aligned to the - // haystack. - Some(i.saturating_sub(rare1i)) -} - -#[cfg(all(test, feature = "std"))] -mod tests { - use super::*; - - fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option<usize> { - let ninfo = NeedleInfo::new(needle); - let mut prestate = PrefilterState::new(); - find(&mut prestate, &ninfo, haystack, needle) - } - - #[test] - fn freqy_forward() { - assert_eq!(Some(0), freqy_find(b"BARFOO", b"BAR")); - assert_eq!(Some(3), freqy_find(b"FOOBAR", b"BAR")); - assert_eq!(Some(0), freqy_find(b"zyzz", b"zyzy")); - assert_eq!(Some(2), freqy_find(b"zzzy", b"zyzy")); - assert_eq!(None, freqy_find(b"zazb", b"zyzy")); - assert_eq!(Some(0), freqy_find(b"yzyy", b"yzyz")); - assert_eq!(Some(2), freqy_find(b"yyyz", b"yzyz")); - assert_eq!(None, freqy_find(b"yayb", b"yzyz")); - } - - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - // SAFETY: super::find is safe to call for all inputs and on all - // platforms. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/genericsimd.rs b/vendor/memchr/src/memmem/prefilter/genericsimd.rs deleted file mode 100644 index 1a6e38734..000000000 --- a/vendor/memchr/src/memmem/prefilter/genericsimd.rs +++ /dev/null @@ -1,207 +0,0 @@ -use core::mem::size_of; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - vector::Vector, - NeedleInfo, -}; - -/// The implementation of the forward vector accelerated candidate finder. -/// -/// This is inspired by the "generic SIMD" algorithm described here: -/// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd -/// -/// The main difference is that this is just a prefilter. That is, it reports -/// candidates once they are seen and doesn't attempt to confirm them. Also, -/// the bytes this routine uses to check for candidates are selected based on -/// an a priori background frequency distribution. This means that on most -/// haystacks, this will on average spend more time in vectorized code than you -/// would if you just selected the first and last bytes of the needle. -/// -/// Note that a non-prefilter variant of this algorithm can be found in the -/// parent module, but it only works on smaller needles. -/// -/// `prestate`, `ninfo`, `haystack` and `needle` are the four prefilter -/// function parameters. `fallback` is a prefilter that is used if the haystack -/// is too small to be handled with the given vector size. -/// -/// This routine is not safe because it is intended for callers to specialize -/// this with a particular vector (e.g., __m256i) and then call it with the -/// relevant target feature (e.g., avx2) enabled. -/// -/// # Panics -/// -/// If `needle.len() <= 1`, then this panics. -/// -/// # Safety -/// -/// Since this is meant to be used with vector functions, callers need to -/// specialize this inside of a function with a `target_feature` attribute. -/// Therefore, callers must ensure that whatever target feature is being used -/// supports the vector functions that this function is specialized for. (For -/// the specific vector functions used, see the Vector trait implementations.) -#[inline(always)] -pub(crate) unsafe fn find<V: Vector>( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - fallback: PrefilterFnTy, -) -> Option<usize> { - assert!(needle.len() >= 2, "needle must be at least 2 bytes"); - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_usize(); - let min_haystack_len = rare2i + size_of::<V>(); - if haystack.len() < min_haystack_len { - return fallback(prestate, ninfo, haystack, needle); - } - - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let max_ptr = end_ptr.sub(min_haystack_len); - let mut ptr = start_ptr; - - let rare1chunk = V::splat(needle[rare1i]); - let rare2chunk = V::splat(needle[rare2i]); - - // N.B. I did experiment with unrolling the loop to deal with size(V) - // bytes at a time and 2*size(V) bytes at a time. The double unroll - // was marginally faster while the quadruple unroll was unambiguously - // slower. In the end, I decided the complexity from unrolling wasn't - // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to - // compare. - while ptr <= max_ptr { - let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); - if let Some(chunki) = m { - return Some(matched(prestate, start_ptr, ptr, chunki)); - } - ptr = ptr.add(size_of::<V>()); - } - if ptr < end_ptr { - // This routine immediately quits if a candidate match is found. - // That means that if we're here, no candidate matches have been - // found at or before 'ptr'. Thus, we don't need to mask anything - // out even though we might technically search part of the haystack - // that we've already searched (because we know it can't match). - ptr = max_ptr; - let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); - if let Some(chunki) = m { - return Some(matched(prestate, start_ptr, ptr, chunki)); - } - } - prestate.update(haystack.len()); - None -} - -// Below are two different techniques for checking whether a candidate -// match exists in a given chunk or not. find_in_chunk2 checks two bytes -// where as find_in_chunk3 checks three bytes. The idea behind checking -// three bytes is that while we do a bit more work per iteration, we -// decrease the chances of a false positive match being reported and thus -// make the search faster overall. This actually works out for the -// memmem/krate/prebuilt/huge-en/never-all-common-bytes benchmark, where -// using find_in_chunk3 is about 25% faster than find_in_chunk2. However, -// it turns out that find_in_chunk2 is faster for all other benchmarks, so -// perhaps the extra check isn't worth it in practice. -// -// For now, we go with find_in_chunk2, but we leave find_in_chunk3 around -// to make it easy to switch to and benchmark when possible. - -/// Search for an occurrence of two rare bytes from the needle in the current -/// chunk pointed to by ptr. -/// -/// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 -/// bytes repeated in each 8-bit lane, respectively. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at both -/// (ptr + rare1i) and (ptr + rare2i). -#[inline(always)] -unsafe fn find_in_chunk2<V: Vector>( - ptr: *const u8, - rare1i: usize, - rare2i: usize, - rare1chunk: V, - rare2chunk: V, -) -> Option<usize> { - let chunk0 = V::load_unaligned(ptr.add(rare1i)); - let chunk1 = V::load_unaligned(ptr.add(rare2i)); - - let eq0 = chunk0.cmpeq(rare1chunk); - let eq1 = chunk1.cmpeq(rare2chunk); - - let match_offsets = eq0.and(eq1).movemask(); - if match_offsets == 0 { - return None; - } - Some(match_offsets.trailing_zeros() as usize) -} - -/// Search for an occurrence of two rare bytes and the first byte (even if one -/// of the rare bytes is equivalent to the first byte) from the needle in the -/// current chunk pointed to by ptr. -/// -/// firstchunk, rare1chunk and rare2chunk correspond to vectors with the first, -/// rare1 and rare2 bytes repeated in each 8-bit lane, respectively. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at ptr, -/// (ptr + rare1i) and (ptr + rare2i). -#[allow(dead_code)] -#[inline(always)] -unsafe fn find_in_chunk3<V: Vector>( - ptr: *const u8, - rare1i: usize, - rare2i: usize, - firstchunk: V, - rare1chunk: V, - rare2chunk: V, -) -> Option<usize> { - let chunk0 = V::load_unaligned(ptr); - let chunk1 = V::load_unaligned(ptr.add(rare1i)); - let chunk2 = V::load_unaligned(ptr.add(rare2i)); - - let eq0 = chunk0.cmpeq(firstchunk); - let eq1 = chunk1.cmpeq(rare1chunk); - let eq2 = chunk2.cmpeq(rare2chunk); - - let match_offsets = eq0.and(eq1).and(eq2).movemask(); - if match_offsets == 0 { - return None; - } - Some(match_offsets.trailing_zeros() as usize) -} - -/// Accepts a chunk-relative offset and returns a haystack relative offset -/// after updating the prefilter state. -/// -/// Why do we use this unlineable function when a search completes? Well, -/// I don't know. Really. Obviously this function was not here initially. -/// When doing profiling, the codegen for the inner loop here looked bad and -/// I didn't know why. There were a couple extra 'add' instructions and an -/// extra 'lea' instruction that I couldn't explain. I hypothesized that the -/// optimizer was having trouble untangling the hot code in the loop from the -/// code that deals with a candidate match. By putting the latter into an -/// unlineable function, it kind of forces the issue and it had the intended -/// effect: codegen improved measurably. It's good for a ~10% improvement -/// across the board on the memmem/krate/prebuilt/huge-en/ benchmarks. -#[cold] -#[inline(never)] -fn matched( - prestate: &mut PrefilterState, - start_ptr: *const u8, - ptr: *const u8, - chunki: usize, -) -> usize { - let found = diff(ptr, start_ptr) + chunki; - prestate.update(found); - found -} - -/// Subtract `b` from `a` and return the difference. `a` must be greater than -/// or equal to `b`. -fn diff(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memmem/prefilter/mod.rs b/vendor/memchr/src/memmem/prefilter/mod.rs deleted file mode 100644 index 015d3b27a..000000000 --- a/vendor/memchr/src/memmem/prefilter/mod.rs +++ /dev/null @@ -1,570 +0,0 @@ -use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo}; - -mod fallback; -#[cfg(memchr_runtime_simd)] -mod genericsimd; -#[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))] -mod wasm; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; - -/// The maximum frequency rank permitted for the fallback prefilter. If the -/// rarest byte in the needle has a frequency rank above this value, then no -/// prefilter is used if the fallback prefilter would otherwise be selected. -const MAX_FALLBACK_RANK: usize = 250; - -/// A combination of prefilter effectiveness state, the prefilter function and -/// the needle info required to run a prefilter. -/// -/// For the most part, these are grouped into a single type for convenience, -/// instead of needing to pass around all three as distinct function -/// parameters. -pub(crate) struct Pre<'a> { - /// State that tracks the effectiveness of a prefilter. - pub(crate) state: &'a mut PrefilterState, - /// The actual prefilter function. - pub(crate) prefn: PrefilterFn, - /// Information about a needle, such as its RK hash and rare byte offsets. - pub(crate) ninfo: &'a NeedleInfo, -} - -impl<'a> Pre<'a> { - /// Call this prefilter on the given haystack with the given needle. - #[inline(always)] - pub(crate) fn call( - &mut self, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - self.prefn.call(self.state, self.ninfo, haystack, needle) - } - - /// Return true if and only if this prefilter should be used. - #[inline(always)] - pub(crate) fn should_call(&mut self) -> bool { - self.state.is_effective() - } -} - -/// A prefilter function. -/// -/// A prefilter function describes both forward and reverse searches. -/// (Although, we don't currently implement prefilters for reverse searching.) -/// In the case of a forward search, the position returned corresponds to -/// the starting offset of a match (confirmed or possible). Its minimum -/// value is `0`, and its maximum value is `haystack.len() - 1`. In the case -/// of a reverse search, the position returned corresponds to the position -/// immediately after a match (confirmed or possible). Its minimum value is `1` -/// and its maximum value is `haystack.len()`. -/// -/// In both cases, the position returned is the starting (or ending) point of a -/// _possible_ match. That is, returning a false positive is okay. A prefilter, -/// however, must never return any false negatives. That is, if a match exists -/// at a particular position `i`, then a prefilter _must_ return that position. -/// It cannot skip past it. -/// -/// # Safety -/// -/// A prefilter function is not safe to create, since not all prefilters are -/// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions -/// may only be called on x86_64 CPUs with the relevant AVX feature enabled.) -/// Thus, callers must ensure that when a prefilter function is created that it -/// is safe to call for the current environment. -#[derive(Clone, Copy)] -pub(crate) struct PrefilterFn(PrefilterFnTy); - -/// The type of a prefilter function. All prefilters must satisfy this -/// signature. -/// -/// Using a function pointer like this does inhibit inlining, but it does -/// eliminate branching and the extra costs associated with copying a larger -/// enum. Note also, that using Box<dyn SomePrefilterTrait> can't really work -/// here, since we want to work in contexts that don't have dynamic memory -/// allocation. Moreover, in the default configuration of this crate on x86_64 -/// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter, -/// which generally won't be inlineable into the surrounding code anyway. -/// (Unless AVX2 is enabled at compile time, but this is typically rare, since -/// it produces a non-portable binary.) -pub(crate) type PrefilterFnTy = unsafe fn( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option<usize>; - -// If the haystack is too small for SSE2, then just run memchr on the -// rarest byte and be done with it. (It is likely that this code path is -// rarely exercised, since a higher level routine will probably dispatch to -// Rabin-Karp for such a small haystack.) -#[cfg(memchr_runtime_simd)] -fn simple_memchr_fallback( - _prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option<usize> { - let (rare, _) = ninfo.rarebytes.as_rare_ordered_usize(); - crate::memchr(needle[rare], haystack).map(|i| i.saturating_sub(rare)) -} - -impl PrefilterFn { - /// Create a new prefilter function from the function pointer given. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function is safe to call - /// for all inputs in the current environment. For example, if the given - /// prefilter function uses AVX instructions, then the caller must ensure - /// that the appropriate AVX CPU features are enabled. - pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn { - PrefilterFn(prefn) - } - - /// Call the underlying prefilter function with the given arguments. - pub fn call( - self, - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - // SAFETY: Callers have the burden of ensuring that a prefilter - // function is safe to call for all inputs in the current environment. - unsafe { (self.0)(prestate, ninfo, haystack, needle) } - } -} - -impl core::fmt::Debug for PrefilterFn { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - "<prefilter-fn(...)>".fmt(f) - } -} - -/// Prefilter controls whether heuristics are used to accelerate searching. -/// -/// A prefilter refers to the idea of detecting candidate matches very quickly, -/// and then confirming whether those candidates are full matches. This -/// idea can be quite effective since it's often the case that looking for -/// candidates can be a lot faster than running a complete substring search -/// over the entire input. Namely, looking for candidates can be done with -/// extremely fast vectorized code. -/// -/// The downside of a prefilter is that it assumes false positives (which are -/// candidates generated by a prefilter that aren't matches) are somewhat rare -/// relative to the frequency of full matches. That is, if a lot of false -/// positives are generated, then it's possible for search time to be worse -/// than if the prefilter wasn't enabled in the first place. -/// -/// Another downside of a prefilter is that it can result in highly variable -/// performance, where some cases are extraordinarily fast and others aren't. -/// Typically, variable performance isn't a problem, but it may be for your use -/// case. -/// -/// The use of prefilters in this implementation does use a heuristic to detect -/// when a prefilter might not be carrying its weight, and will dynamically -/// disable its use. Nevertheless, this configuration option gives callers -/// the ability to disable prefilters if you have knowledge that they won't be -/// useful. -#[derive(Clone, Copy, Debug)] -#[non_exhaustive] -pub enum Prefilter { - /// Never used a prefilter in substring search. - None, - /// Automatically detect whether a heuristic prefilter should be used. If - /// it is used, then heuristics will be used to dynamically disable the - /// prefilter if it is believed to not be carrying its weight. - Auto, -} - -impl Default for Prefilter { - fn default() -> Prefilter { - Prefilter::Auto - } -} - -impl Prefilter { - pub(crate) fn is_none(&self) -> bool { - match *self { - Prefilter::None => true, - _ => false, - } - } -} - -/// PrefilterState tracks state associated with the effectiveness of a -/// prefilter. It is used to track how many bytes, on average, are skipped by -/// the prefilter. If this average dips below a certain threshold over time, -/// then the state renders the prefilter inert and stops using it. -/// -/// A prefilter state should be created for each search. (Where creating an -/// iterator is treated as a single search.) A prefilter state should only be -/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert -/// `PrefilterState`. -#[derive(Clone, Debug)] -pub(crate) struct PrefilterState { - /// The number of skips that has been executed. This is always 1 greater - /// than the actual number of skips. The special sentinel value of 0 - /// indicates that the prefilter is inert. This is useful to avoid - /// additional checks to determine whether the prefilter is still - /// "effective." Once a prefilter becomes inert, it should no longer be - /// used (according to our heuristics). - skips: u32, - /// The total number of bytes that have been skipped. - skipped: u32, -} - -impl PrefilterState { - /// The minimum number of skip attempts to try before considering whether - /// a prefilter is effective or not. - const MIN_SKIPS: u32 = 50; - - /// The minimum amount of bytes that skipping must average. - /// - /// This value was chosen based on varying it and checking - /// the microbenchmarks. In particular, this can impact the - /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set - /// too low. - const MIN_SKIP_BYTES: u32 = 8; - - /// Create a fresh prefilter state. - pub(crate) fn new() -> PrefilterState { - PrefilterState { skips: 1, skipped: 0 } - } - - /// Create a fresh prefilter state that is always inert. - pub(crate) fn inert() -> PrefilterState { - PrefilterState { skips: 0, skipped: 0 } - } - - /// Update this state with the number of bytes skipped on the last - /// invocation of the prefilter. - #[inline] - pub(crate) fn update(&mut self, skipped: usize) { - self.skips = self.skips.saturating_add(1); - // We need to do this dance since it's technically possible for - // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the - // size of a prefilter state.) - if skipped > core::u32::MAX as usize { - self.skipped = core::u32::MAX; - } else { - self.skipped = self.skipped.saturating_add(skipped as u32); - } - } - - /// Return true if and only if this state indicates that a prefilter is - /// still effective. - #[inline] - pub(crate) fn is_effective(&mut self) -> bool { - if self.is_inert() { - return false; - } - if self.skips() < PrefilterState::MIN_SKIPS { - return true; - } - if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { - return true; - } - - // We're inert. - self.skips = 0; - false - } - - #[inline] - fn is_inert(&self) -> bool { - self.skips == 0 - } - - #[inline] - fn skips(&self) -> u32 { - self.skips.saturating_sub(1) - } -} - -/// Determine which prefilter function, if any, to use. -/// -/// This only applies to x86_64 when runtime SIMD detection is enabled (which -/// is the default). In general, we try to use an AVX prefilter, followed by -/// SSE and then followed by a generic one based on memchr. -#[inline(always)] -pub(crate) fn forward( - config: &Prefilter, - rare: &RareNeedleBytes, - needle: &[u8], -) -> Option<PrefilterFn> { - if config.is_none() || needle.len() <= 1 { - return None; - } - - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - { - #[cfg(feature = "std")] - { - if cfg!(memchr_runtime_avx) { - if is_x86_feature_detected!("avx2") { - // SAFETY: x86::avx::find only requires the avx2 feature, - // which we've just checked above. - return unsafe { Some(PrefilterFn::new(x86::avx::find)) }; - } - } - } - if cfg!(memchr_runtime_sse2) { - // SAFETY: x86::sse::find only requires the sse2 feature, which is - // guaranteed to be available on x86_64. - return unsafe { Some(PrefilterFn::new(x86::sse::find)) }; - } - } - #[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))] - { - // SAFETY: `wasm::find` is actually a safe function - // - // Also note that the `if true` is here to prevent, on wasm with simd, - // rustc warning about the code below being dead code. - if true { - return unsafe { Some(PrefilterFn::new(wasm::find)) }; - } - } - // Check that our rarest byte has a reasonably low rank. The main issue - // here is that the fallback prefilter can perform pretty poorly if it's - // given common bytes. So we try to avoid the worst cases here. - let (rare1_rank, _) = rare.as_ranks(needle); - if rare1_rank <= MAX_FALLBACK_RANK { - // SAFETY: fallback::find is safe to call in all environments. - return unsafe { Some(PrefilterFn::new(fallback::find)) }; - } - None -} - -/// Return the minimum length of the haystack in which a prefilter should be -/// used. If the haystack is below this length, then it's probably not worth -/// the overhead of running the prefilter. -/// -/// We used to look at the length of a haystack here. That is, if it was too -/// small, then don't bother with the prefilter. But two things changed: -/// the prefilter falls back to memchr for small haystacks, and, at the -/// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway. -/// -/// We keep it around for now in case we want to bring it back. -#[allow(dead_code)] -pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize { - // If the haystack length isn't greater than needle.len() * FACTOR, then - // no prefilter will be used. The presumption here is that since there - // are so few bytes to check, it's not worth running the prefilter since - // there will need to be a validation step anyway. Thus, the prefilter is - // largely redundant work. - // - // Increase the factor noticeably hurts the - // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks. - const PREFILTER_LENGTH_FACTOR: usize = 2; - const VECTOR_MIN_LENGTH: usize = 16; - let min = core::cmp::max( - VECTOR_MIN_LENGTH, - PREFILTER_LENGTH_FACTOR * needle.len(), - ); - // For haystacks with length==min, we still want to avoid the prefilter, - // so add 1. - min + 1 -} - -#[cfg(all(test, feature = "std", not(miri)))] -pub(crate) mod tests { - use std::convert::{TryFrom, TryInto}; - - use super::*; - use crate::memmem::{ - prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes, - }; - - // Below is a small jig that generates prefilter tests. The main purpose - // of this jig is to generate tests of varying needle/haystack lengths - // in order to try and exercise all code paths in our prefilters. And in - // particular, this is especially important for vectorized prefilters where - // certain code paths might only be exercised at certain lengths. - - /// A test that represents the input and expected output to a prefilter - /// function. The test should be able to run with any prefilter function - /// and get the expected output. - pub(crate) struct PrefilterTest { - // These fields represent the inputs and expected output of a forwards - // prefilter function. - pub(crate) ninfo: NeedleInfo, - pub(crate) haystack: Vec<u8>, - pub(crate) needle: Vec<u8>, - pub(crate) output: Option<usize>, - } - - impl PrefilterTest { - /// Run all generated forward prefilter tests on the given prefn. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) { - PrefilterTest::run_all_tests_filter(prefn, |_| true) - } - - /// Run all generated forward prefilter tests that pass the given - /// predicate on the given prefn. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - pub(crate) unsafe fn run_all_tests_filter( - prefn: PrefilterFnTy, - mut predicate: impl FnMut(&PrefilterTest) -> bool, - ) { - for seed in PREFILTER_TEST_SEEDS { - for test in seed.generate() { - if predicate(&test) { - test.run(prefn); - } - } - } - } - - /// Create a new prefilter test from a seed and some chose offsets to - /// rare bytes in the seed's needle. - /// - /// If a valid test could not be constructed, then None is returned. - /// (Currently, we take the approach of massaging tests to be valid - /// instead of rejecting them outright.) - fn new( - seed: PrefilterTestSeed, - rare1i: usize, - rare2i: usize, - haystack_len: usize, - needle_len: usize, - output: Option<usize>, - ) -> Option<PrefilterTest> { - let mut rare1i: u8 = rare1i.try_into().unwrap(); - let mut rare2i: u8 = rare2i.try_into().unwrap(); - // The '#' byte is never used in a haystack (unless we're expecting - // a match), while the '@' byte is never used in a needle. - let mut haystack = vec![b'@'; haystack_len]; - let mut needle = vec![b'#'; needle_len]; - needle[0] = seed.first; - needle[rare1i as usize] = seed.rare1; - needle[rare2i as usize] = seed.rare2; - // If we're expecting a match, then make sure the needle occurs - // in the haystack at the expected position. - if let Some(i) = output { - haystack[i..i + needle.len()].copy_from_slice(&needle); - } - // If the operations above lead to rare offsets pointing to the - // non-first occurrence of a byte, then adjust it. This might lead - // to redundant tests, but it's simpler than trying to change the - // generation process I think. - if let Some(i) = crate::memchr(seed.rare1, &needle) { - rare1i = u8::try_from(i).unwrap(); - } - if let Some(i) = crate::memchr(seed.rare2, &needle) { - rare2i = u8::try_from(i).unwrap(); - } - let ninfo = NeedleInfo { - rarebytes: RareNeedleBytes::new(rare1i, rare2i), - nhash: rabinkarp::NeedleHash::forward(&needle), - }; - Some(PrefilterTest { ninfo, haystack, needle, output }) - } - - /// Run this specific test on the given prefilter function. If the - /// outputs do no match, then this routine panics with a failure - /// message. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - unsafe fn run(&self, prefn: PrefilterFnTy) { - let mut prestate = PrefilterState::new(); - assert_eq!( - self.output, - prefn( - &mut prestate, - &self.ninfo, - &self.haystack, - &self.needle - ), - "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}", - self.ninfo, - self.haystack.len(), - std::str::from_utf8(&self.haystack).unwrap(), - self.needle.len(), - std::str::from_utf8(&self.needle).unwrap(), - ); - } - } - - /// A set of prefilter test seeds. Each seed serves as the base for the - /// generation of many other tests. In essence, the seed captures the - /// "rare" and first bytes among our needle. The tests generated from each - /// seed essentially vary the length of the needle and haystack, while - /// using the rare/first byte configuration from the seed. - /// - /// The purpose of this is to test many different needle/haystack lengths. - /// In particular, some of the vector optimizations might only have bugs - /// in haystacks of a certain size. - const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[ - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' }, - PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' }, - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' }, - PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' }, - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' }, - ]; - - /// Data that describes a single prefilter test seed. - #[derive(Clone, Copy)] - struct PrefilterTestSeed { - first: u8, - rare1: u8, - rare2: u8, - } - - impl PrefilterTestSeed { - /// Generate a series of prefilter tests from this seed. - fn generate(self) -> impl Iterator<Item = PrefilterTest> { - let len_start = 2; - // The iterator below generates *a lot* of tests. The number of - // tests was chosen somewhat empirically to be "bearable" when - // running the test suite. - // - // We use an iterator here because the collective haystacks of all - // these test cases add up to enough memory to OOM a conservative - // sandbox or a small laptop. - (len_start..=40).flat_map(move |needle_len| { - let rare_start = len_start - 1; - (rare_start..needle_len).flat_map(move |rare1i| { - (rare1i..needle_len).flat_map(move |rare2i| { - (needle_len..=66).flat_map(move |haystack_len| { - PrefilterTest::new( - self, - rare1i, - rare2i, - haystack_len, - needle_len, - None, - ) - .into_iter() - .chain( - (0..=(haystack_len - needle_len)).flat_map( - move |output| { - PrefilterTest::new( - self, - rare1i, - rare2i, - haystack_len, - needle_len, - Some(output), - ) - }, - ), - ) - }) - }) - }) - }) - } - } -} diff --git a/vendor/memchr/src/memmem/prefilter/wasm.rs b/vendor/memchr/src/memmem/prefilter/wasm.rs deleted file mode 100644 index 5470c922a..000000000 --- a/vendor/memchr/src/memmem/prefilter/wasm.rs +++ /dev/null @@ -1,39 +0,0 @@ -use core::arch::wasm32::v128; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// A `v128`-accelerated candidate finder for single-substring search. -#[target_feature(enable = "simd128")] -pub(crate) fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option<usize> { - unsafe { - super::genericsimd::find::<v128>( - prestate, - ninfo, - haystack, - needle, - super::simple_memchr_fallback, - ) - } -} - -#[cfg(all(test, feature = "std"))] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - // SAFETY: super::find is safe to call for all inputs on x86. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/x86/avx.rs b/vendor/memchr/src/memmem/prefilter/x86/avx.rs deleted file mode 100644 index fb11f335b..000000000 --- a/vendor/memchr/src/memmem/prefilter/x86/avx.rs +++ /dev/null @@ -1,46 +0,0 @@ -use core::arch::x86_64::__m256i; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// An AVX2 accelerated candidate finder for single-substring search. -/// -/// # Safety -/// -/// Callers must ensure that the avx2 CPU feature is enabled in the current -/// environment. -#[target_feature(enable = "avx2")] -pub(crate) unsafe fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option<usize> { - super::super::genericsimd::find::<__m256i>( - prestate, - ninfo, - haystack, - needle, - super::sse::find, - ) -} - -#[cfg(test)] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - if !is_x86_feature_detected!("avx2") { - return; - } - // SAFETY: The safety of super::find only requires that the current - // CPU support AVX2, which we checked above. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/x86/mod.rs b/vendor/memchr/src/memmem/prefilter/x86/mod.rs deleted file mode 100644 index 91381e516..000000000 --- a/vendor/memchr/src/memmem/prefilter/x86/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -// We only use AVX when we can detect at runtime whether it's available, which -// requires std. -#[cfg(feature = "std")] -pub(crate) mod avx; -pub(crate) mod sse; diff --git a/vendor/memchr/src/memmem/prefilter/x86/sse.rs b/vendor/memchr/src/memmem/prefilter/x86/sse.rs deleted file mode 100644 index b1c48e1e1..000000000 --- a/vendor/memchr/src/memmem/prefilter/x86/sse.rs +++ /dev/null @@ -1,42 +0,0 @@ -use core::arch::x86_64::__m128i; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// An SSE2 accelerated candidate finder for single-substring search. -/// -/// # Safety -/// -/// Callers must ensure that the sse2 CPU feature is enabled in the current -/// environment. This feature should be enabled in all x86_64 targets. -#[target_feature(enable = "sse2")] -pub(crate) unsafe fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option<usize> { - super::super::genericsimd::find::<__m128i>( - prestate, - ninfo, - haystack, - needle, - super::super::simple_memchr_fallback, - ) -} - -#[cfg(all(test, feature = "std"))] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - // SAFETY: super::find is safe to call for all inputs on x86. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/rabinkarp.rs b/vendor/memchr/src/memmem/rabinkarp.rs deleted file mode 100644 index daa4015d5..000000000 --- a/vendor/memchr/src/memmem/rabinkarp.rs +++ /dev/null @@ -1,233 +0,0 @@ -/* -This module implements the classical Rabin-Karp substring search algorithm, -with no extra frills. While its use would seem to break our time complexity -guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only -ever use RK on a constant subset of haystacks. The main point here is that -RK has good latency properties for small needles/haystacks. It's very quick -to compute a needle hash and zip through the haystack when compared to -initializing Two-Way, for example. And this is especially useful for cases -where the haystack is just too short for vector instructions to do much good. - -The hashing function used here is the same one recommended by ESMAJ. - -Another choice instead of Rabin-Karp would be Shift-Or. But its latency -isn't quite as good since its preprocessing time is a bit more expensive -(both in practice and in theory). However, perhaps Shift-Or has a place -somewhere else for short patterns. I think the main problem is that it -requires space proportional to the alphabet and the needle. If we, for -example, supported needles up to length 16, then the total table size would be -len(alphabet)*size_of::<u16>()==512 bytes. Which isn't exactly small, and it's -probably bad to put that on the stack. So ideally, we'd throw it on the heap, -but we'd really like to write as much code without using alloc/std as possible. -But maybe it's worth the special casing. It's a TODO to benchmark. - -Wikipedia has a decent explanation, if a bit heavy on the theory: -https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm - -But ESMAJ provides something a bit more concrete: -http://www-igm.univ-mlv.fr/~lecroq/string/node5.html - -Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases: -https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs -*/ - -/// Whether RK is believed to be very fast for the given needle/haystack. -pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool { - haystack.len() < 16 -} - -/// Search for the first occurrence of needle in haystack using Rabin-Karp. -pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { - find_with(&NeedleHash::forward(needle), haystack, needle) -} - -/// Search for the first occurrence of needle in haystack using Rabin-Karp with -/// a pre-computed needle hash. -pub(crate) fn find_with( - nhash: &NeedleHash, - mut haystack: &[u8], - needle: &[u8], -) -> Option<usize> { - if haystack.len() < needle.len() { - return None; - } - let start = haystack.as_ptr() as usize; - let mut hash = Hash::from_bytes_fwd(&haystack[..needle.len()]); - // N.B. I've experimented with unrolling this loop, but couldn't realize - // any obvious gains. - loop { - if nhash.eq(hash) && is_prefix(haystack, needle) { - return Some(haystack.as_ptr() as usize - start); - } - if needle.len() >= haystack.len() { - return None; - } - hash.roll(&nhash, haystack[0], haystack[needle.len()]); - haystack = &haystack[1..]; - } -} - -/// Search for the last occurrence of needle in haystack using Rabin-Karp. -pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { - rfind_with(&NeedleHash::reverse(needle), haystack, needle) -} - -/// Search for the last occurrence of needle in haystack using Rabin-Karp with -/// a pre-computed needle hash. -pub(crate) fn rfind_with( - nhash: &NeedleHash, - mut haystack: &[u8], - needle: &[u8], -) -> Option<usize> { - if haystack.len() < needle.len() { - return None; - } - let mut hash = - Hash::from_bytes_rev(&haystack[haystack.len() - needle.len()..]); - loop { - if nhash.eq(hash) && is_suffix(haystack, needle) { - return Some(haystack.len() - needle.len()); - } - if needle.len() >= haystack.len() { - return None; - } - hash.roll( - &nhash, - haystack[haystack.len() - 1], - haystack[haystack.len() - needle.len() - 1], - ); - haystack = &haystack[..haystack.len() - 1]; - } -} - -/// A hash derived from a needle. -#[derive(Clone, Copy, Debug, Default)] -pub(crate) struct NeedleHash { - /// The actual hash. - hash: Hash, - /// The factor needed to multiply a byte by in order to subtract it from - /// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation), - /// where n is the length of the needle. This is how we "remove" a byte - /// from the hash once the hash window rolls past it. - hash_2pow: u32, -} - -impl NeedleHash { - /// Create a new Rabin-Karp hash for the given needle for use in forward - /// searching. - pub(crate) fn forward(needle: &[u8]) -> NeedleHash { - let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; - if needle.is_empty() { - return nh; - } - nh.hash.add(needle[0]); - for &b in needle.iter().skip(1) { - nh.hash.add(b); - nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); - } - nh - } - - /// Create a new Rabin-Karp hash for the given needle for use in reverse - /// searching. - pub(crate) fn reverse(needle: &[u8]) -> NeedleHash { - let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; - if needle.is_empty() { - return nh; - } - nh.hash.add(needle[needle.len() - 1]); - for &b in needle.iter().rev().skip(1) { - nh.hash.add(b); - nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); - } - nh - } - - /// Return true if the hashes are equivalent. - fn eq(&self, hash: Hash) -> bool { - self.hash == hash - } -} - -/// A Rabin-Karp hash. This might represent the hash of a needle, or the hash -/// of a rolling window in the haystack. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub(crate) struct Hash(u32); - -impl Hash { - /// Create a new hash that represents the empty string. - pub(crate) fn new() -> Hash { - Hash(0) - } - - /// Create a new hash from the bytes given for use in forward searches. - pub(crate) fn from_bytes_fwd(bytes: &[u8]) -> Hash { - let mut hash = Hash::new(); - for &b in bytes { - hash.add(b); - } - hash - } - - /// Create a new hash from the bytes given for use in reverse searches. - fn from_bytes_rev(bytes: &[u8]) -> Hash { - let mut hash = Hash::new(); - for &b in bytes.iter().rev() { - hash.add(b); - } - hash - } - - /// Add 'new' and remove 'old' from this hash. The given needle hash should - /// correspond to the hash computed for the needle being searched for. - /// - /// This is meant to be used when the rolling window of the haystack is - /// advanced. - fn roll(&mut self, nhash: &NeedleHash, old: u8, new: u8) { - self.del(nhash, old); - self.add(new); - } - - /// Add a byte to this hash. - fn add(&mut self, byte: u8) { - self.0 = self.0.wrapping_shl(1).wrapping_add(byte as u32); - } - - /// Remove a byte from this hash. The given needle hash should correspond - /// to the hash computed for the needle being searched for. - fn del(&mut self, nhash: &NeedleHash, byte: u8) { - let factor = nhash.hash_2pow; - self.0 = self.0.wrapping_sub((byte as u32).wrapping_mul(factor)); - } -} - -/// Returns true if the given needle is a prefix of the given haystack. -/// -/// We forcefully don't inline the is_prefix call and hint at the compiler that -/// it is unlikely to be called. This causes the inner rabinkarp loop above -/// to be a bit tighter and leads to some performance improvement. See the -/// memmem/krate/prebuilt/sliceslice-words/words benchmark. -#[cold] -#[inline(never)] -fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { - crate::memmem::util::is_prefix(haystack, needle) -} - -/// Returns true if the given needle is a suffix of the given haystack. -/// -/// See is_prefix for why this is forcefully not inlined. -#[cold] -#[inline(never)] -fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { - crate::memmem::util::is_suffix(haystack, needle) -} - -#[cfg(test)] -mod simpletests { - define_memmem_simple_tests!(super::find, super::rfind); -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod proptests { - define_memmem_quickcheck_tests!(super::find, super::rfind); -} diff --git a/vendor/memchr/src/memmem/rarebytes.rs b/vendor/memchr/src/memmem/rarebytes.rs deleted file mode 100644 index fb33f6894..000000000 --- a/vendor/memchr/src/memmem/rarebytes.rs +++ /dev/null @@ -1,136 +0,0 @@ -/// A heuristic frequency based detection of rare bytes for substring search. -/// -/// This detector attempts to pick out two bytes in a needle that are predicted -/// to occur least frequently. The purpose is to use these bytes to implement -/// fast candidate search using vectorized code. -/// -/// A set of offsets is only computed for needles of length 2 or greater. -/// Smaller needles should be special cased by the substring search algorithm -/// in use. (e.g., Use memchr for single byte needles.) -/// -/// Note that we use `u8` to represent the offsets of the rare bytes in a -/// needle to reduce space usage. This means that rare byte occurring after the -/// first 255 bytes in a needle will never be used. -#[derive(Clone, Copy, Debug, Default)] -pub(crate) struct RareNeedleBytes { - /// The leftmost offset of the rarest byte in the needle, according to - /// pre-computed frequency analysis. The "leftmost offset" means that - /// rare1i <= i for all i where needle[i] == needle[rare1i]. - rare1i: u8, - /// The leftmost offset of the second rarest byte in the needle, according - /// to pre-computed frequency analysis. The "leftmost offset" means that - /// rare2i <= i for all i where needle[i] == needle[rare2i]. - /// - /// The second rarest byte is used as a type of guard for quickly detecting - /// a mismatch if the first byte matches. This is a hedge against - /// pathological cases where the pre-computed frequency analysis may be - /// off. (But of course, does not prevent *all* pathological cases.) - /// - /// In general, rare1i != rare2i by construction, although there is no hard - /// requirement that they be different. However, since the case of a single - /// byte needle is handled specially by memchr itself, rare2i generally - /// always should be different from rare1i since it would otherwise be - /// ineffective as a guard. - rare2i: u8, -} - -impl RareNeedleBytes { - /// Create a new pair of rare needle bytes with the given offsets. This is - /// only used in tests for generating input data. - #[cfg(all(test, feature = "std"))] - pub(crate) fn new(rare1i: u8, rare2i: u8) -> RareNeedleBytes { - RareNeedleBytes { rare1i, rare2i } - } - - /// Detect the leftmost offsets of the two rarest bytes in the given - /// needle. - pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes { - if needle.len() <= 1 || needle.len() > core::u8::MAX as usize { - // For needles bigger than u8::MAX, our offsets aren't big enough. - // (We make our offsets small to reduce stack copying.) - // If you have a use case for it, please file an issue. In that - // case, we should probably just adjust the routine below to pick - // some rare bytes from the first 255 bytes of the needle. - // - // Also note that for needles of size 0 or 1, they are special - // cased in Two-Way. - // - // TODO: Benchmar this. - return RareNeedleBytes { rare1i: 0, rare2i: 0 }; - } - - // Find the rarest two bytes. We make them distinct by construction. - let (mut rare1, mut rare1i) = (needle[0], 0); - let (mut rare2, mut rare2i) = (needle[1], 1); - if rank(rare2) < rank(rare1) { - core::mem::swap(&mut rare1, &mut rare2); - core::mem::swap(&mut rare1i, &mut rare2i); - } - for (i, &b) in needle.iter().enumerate().skip(2) { - if rank(b) < rank(rare1) { - rare2 = rare1; - rare2i = rare1i; - rare1 = b; - rare1i = i as u8; - } else if b != rare1 && rank(b) < rank(rare2) { - rare2 = b; - rare2i = i as u8; - } - } - // While not strictly required, we really don't want these to be - // equivalent. If they were, it would reduce the effectiveness of - // candidate searching using these rare bytes by increasing the rate of - // false positives. - assert_ne!(rare1i, rare2i); - RareNeedleBytes { rare1i, rare2i } - } - - /// Return the rare bytes in the given needle in the forward direction. - /// The needle given must be the same one given to the RareNeedleBytes - /// constructor. - pub(crate) fn as_rare_bytes(&self, needle: &[u8]) -> (u8, u8) { - (needle[self.rare1i as usize], needle[self.rare2i as usize]) - } - - /// Return the rare offsets such that the first offset is always <= to the - /// second offset. This is useful when the caller doesn't care whether - /// rare1 is rarer than rare2, but just wants to ensure that they are - /// ordered with respect to one another. - #[cfg(memchr_runtime_simd)] - pub(crate) fn as_rare_ordered_usize(&self) -> (usize, usize) { - let (rare1i, rare2i) = self.as_rare_ordered_u8(); - (rare1i as usize, rare2i as usize) - } - - /// Like as_rare_ordered_usize, but returns the offsets as their native - /// u8 values. - #[cfg(memchr_runtime_simd)] - pub(crate) fn as_rare_ordered_u8(&self) -> (u8, u8) { - if self.rare1i <= self.rare2i { - (self.rare1i, self.rare2i) - } else { - (self.rare2i, self.rare1i) - } - } - - /// Return the rare offsets as usize values in the order in which they were - /// constructed. rare1, for example, is constructed as the "rarer" byte, - /// and thus, callers may want to treat it differently from rare2. - pub(crate) fn as_rare_usize(&self) -> (usize, usize) { - (self.rare1i as usize, self.rare2i as usize) - } - - /// Return the byte frequency rank of each byte. The higher the rank, the - /// more frequency the byte is predicted to be. The needle given must be - /// the same one given to the RareNeedleBytes constructor. - pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) { - let (b1, b2) = self.as_rare_bytes(needle); - (rank(b1), rank(b2)) - } -} - -/// Return the heuristical frequency rank of the given byte. A lower rank -/// means the byte is believed to occur less frequently. -fn rank(b: u8) -> usize { - crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize -} diff --git a/vendor/memchr/src/memmem/searcher.rs b/vendor/memchr/src/memmem/searcher.rs new file mode 100644 index 000000000..98b9bd614 --- /dev/null +++ b/vendor/memchr/src/memmem/searcher.rs @@ -0,0 +1,1030 @@ +use crate::arch::all::{ + packedpair::{HeuristicFrequencyRank, Pair}, + rabinkarp, twoway, +}; + +#[cfg(target_arch = "aarch64")] +use crate::arch::aarch64::neon::packedpair as neon; +#[cfg(target_arch = "wasm32")] +use crate::arch::wasm32::simd128::packedpair as simd128; +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +use crate::arch::x86_64::{ + avx2::packedpair as avx2, sse2::packedpair as sse2, +}; + +/// A "meta" substring searcher. +/// +/// To a first approximation, this chooses what it believes to be the "best" +/// substring search implemnetation based on the needle at construction time. +/// Then, every call to `find` will execute that particular implementation. To +/// a second approximation, multiple substring search algorithms may be used, +/// depending on the haystack. For example, for supremely short haystacks, +/// Rabin-Karp is typically used. +/// +/// See the documentation on `Prefilter` for an explanation of the dispatching +/// mechanism. The quick summary is that an enum has too much overhead and +/// we can't use dynamic dispatch via traits because we need to work in a +/// core-only environment. (Dynamic dispatch works in core-only, but you +/// need `&dyn Trait` and we really need a `Box<dyn Trait>` here. The latter +/// requires `alloc`.) So instead, we use a union and an appropriately paired +/// free function to read from the correct field on the union and execute the +/// chosen substring search implementation. +#[derive(Clone)] +pub(crate) struct Searcher { + call: SearcherKindFn, + kind: SearcherKind, + rabinkarp: rabinkarp::Finder, +} + +impl Searcher { + /// Creates a new "meta" substring searcher that attempts to choose the + /// best algorithm based on the needle, heuristics and what the current + /// target supports. + #[inline] + pub(crate) fn new<R: HeuristicFrequencyRank>( + prefilter: PrefilterConfig, + ranker: R, + needle: &[u8], + ) -> Searcher { + let rabinkarp = rabinkarp::Finder::new(needle); + if needle.len() <= 1 { + return if needle.is_empty() { + trace!("building empty substring searcher"); + Searcher { + call: searcher_kind_empty, + kind: SearcherKind { empty: () }, + rabinkarp, + } + } else { + trace!("building one-byte substring searcher"); + debug_assert_eq!(1, needle.len()); + Searcher { + call: searcher_kind_one_byte, + kind: SearcherKind { one_byte: needle[0] }, + rabinkarp, + } + }; + } + let pair = match Pair::with_ranker(needle, &ranker) { + Some(pair) => pair, + None => return Searcher::twoway(needle, rabinkarp, None), + }; + debug_assert_ne!( + pair.index1(), + pair.index2(), + "pair offsets should not be equivalent" + ); + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + { + if let Some(pp) = avx2::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building x86_64 AVX2 substring searcher"); + let kind = SearcherKind { avx2: pp }; + Searcher { call: searcher_kind_avx2, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::avx2(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building x86_64 SSE2 substring searcher"); + let kind = SearcherKind { sse2: pp }; + Searcher { call: searcher_kind_sse2, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::sse2(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + // We're pretty unlikely to get to this point, but it is + // possible to be running on x86_64 without SSE2. Namely, it's + // really up to the OS whether it wants to support vector + // registers or not. + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(target_arch = "wasm32")] + { + if let Some(pp) = simd128::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building wasm32 simd128 substring searcher"); + let kind = SearcherKind { simd128: pp }; + Searcher { call: searcher_kind_simd128, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::simd128(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(target_arch = "aarch64")] + { + if let Some(pp) = neon::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building aarch64 neon substring searcher"); + let kind = SearcherKind { neon: pp }; + Searcher { call: searcher_kind_neon, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::neon(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + } + + /// Creates a new searcher that always uses the Two-Way algorithm. This is + /// typically used when vector algorithms are unavailable or inappropriate. + /// (For example, when the needle is "too long.") + /// + /// If a prefilter is given, then the searcher returned will be accelerated + /// by the prefilter. + #[inline] + fn twoway( + needle: &[u8], + rabinkarp: rabinkarp::Finder, + prestrat: Option<Prefilter>, + ) -> Searcher { + let finder = twoway::Finder::new(needle); + match prestrat { + None => { + trace!("building scalar two-way substring searcher"); + let kind = SearcherKind { two_way: finder }; + Searcher { call: searcher_kind_two_way, kind, rabinkarp } + } + Some(prestrat) => { + trace!( + "building scalar two-way \ + substring searcher with a prefilter" + ); + let two_way_with_prefilter = + TwoWayWithPrefilter { finder, prestrat }; + let kind = SearcherKind { two_way_with_prefilter }; + Searcher { + call: searcher_kind_two_way_with_prefilter, + kind, + rabinkarp, + } + } + } + } + + /// Searches the given haystack for the given needle. The needle given + /// should be the same as the needle that this finder was initialized + /// with. + /// + /// Inlining this can lead to big wins for latency, and #[inline] doesn't + /// seem to be enough in some cases. + #[inline(always)] + pub(crate) fn find( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + if haystack.len() < needle.len() { + None + } else { + // SAFETY: By construction, we've ensured that the function + // in `self.call` is properly paired with the union used in + // `self.kind`. + unsafe { (self.call)(self, prestate, haystack, needle) } + } + } +} + +impl core::fmt::Debug for Searcher { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Searcher") + .field("call", &"<searcher function>") + .field("kind", &"<searcher kind union>") + .field("rabinkarp", &self.rabinkarp) + .finish() + } +} + +/// A union indicating one of several possible substring search implementations +/// that are in active use. +/// +/// This union should only be read by one of the functions prefixed with +/// `searcher_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union SearcherKind { + empty: (), + one_byte: u8, + two_way: twoway::Finder, + two_way_with_prefilter: TwoWayWithPrefilter, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + sse2: crate::arch::x86_64::sse2::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + avx2: crate::arch::x86_64::avx2::packedpair::Finder, + #[cfg(target_arch = "wasm32")] + simd128: crate::arch::wasm32::simd128::packedpair::Finder, + #[cfg(target_arch = "aarch64")] + neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// A two-way substring searcher with a prefilter. +#[derive(Copy, Clone, Debug)] +struct TwoWayWithPrefilter { + finder: twoway::Finder, + prestrat: Prefilter, +} + +/// The type of a substring search function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `SearcherKind` union. +type SearcherKindFn = unsafe fn( + searcher: &Searcher, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize>; + +/// Reads from the `empty` field of `SearcherKind` to handle the case of +/// searching for the empty needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.empty` union field is set. +unsafe fn searcher_kind_empty( + _searcher: &Searcher, + _prestate: &mut PrefilterState, + _haystack: &[u8], + _needle: &[u8], +) -> Option<usize> { + Some(0) +} + +/// Reads from the `one_byte` field of `SearcherKind` to handle the case of +/// searching for a single byte needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.one_byte` union field is set. +unsafe fn searcher_kind_one_byte( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + _needle: &[u8], +) -> Option<usize> { + let needle = searcher.kind.one_byte; + crate::memchr(needle, haystack) +} + +/// Reads from the `two_way` field of `SearcherKind` to handle the case of +/// searching for an arbitrary needle without prefilter acceleration. Works on +/// all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way` union field is set. +unsafe fn searcher_kind_two_way( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + if rabinkarp::is_fast(haystack, needle) { + searcher.rabinkarp.find(haystack, needle) + } else { + searcher.kind.two_way.find(haystack, needle) + } +} + +/// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle +/// the case of searching for an arbitrary needle with prefilter acceleration. +/// Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union +/// field is set. +unsafe fn searcher_kind_two_way_with_prefilter( + searcher: &Searcher, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + if rabinkarp::is_fast(haystack, needle) { + searcher.rabinkarp.find(haystack, needle) + } else { + let TwoWayWithPrefilter { ref finder, ref prestrat } = + searcher.kind.two_way_with_prefilter; + let pre = Pre { prestate, prestrat }; + finder.find_with_prefilter(Some(pre), haystack, needle) + } +} + +/// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_sse2( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let finder = &searcher.kind.sse2; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_avx2( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let finder = &searcher.kind.avx2; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `simd128` field of `SearcherKind` to execute the wasm32 +/// simd128 vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn searcher_kind_simd128( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let finder = &searcher.kind.simd128; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn searcher_kind_neon( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let finder = &searcher.kind.neon; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// A reverse substring searcher. +#[derive(Clone, Debug)] +pub(crate) struct SearcherRev { + kind: SearcherRevKind, + rabinkarp: rabinkarp::FinderRev, +} + +/// The kind of the reverse searcher. +/// +/// For the reverse case, we don't do any SIMD acceleration or prefilters. +/// There is no specific technical reason why we don't, but rather don't do it +/// because it's not clear it's worth the extra code to do so. If you have a +/// use case for it, please file an issue. +/// +/// We also don't do the union trick as we do with the forward case and +/// prefilters. Basically for the same reason we don't have prefilters or +/// vector algorithms for reverse searching: it's not clear it's worth doing. +/// Please file an issue if you have a compelling use case for fast reverse +/// substring search. +#[derive(Clone, Debug)] +enum SearcherRevKind { + Empty, + OneByte { needle: u8 }, + TwoWay { finder: twoway::FinderRev }, +} + +impl SearcherRev { + /// Creates a new searcher for finding occurrences of the given needle in + /// reverse. That is, it reports the last (instead of the first) occurrence + /// of a needle in a haystack. + #[inline] + pub(crate) fn new(needle: &[u8]) -> SearcherRev { + let kind = if needle.len() <= 1 { + if needle.is_empty() { + trace!("building empty reverse substring searcher"); + SearcherRevKind::Empty + } else { + trace!("building one-byte reverse substring searcher"); + debug_assert_eq!(1, needle.len()); + SearcherRevKind::OneByte { needle: needle[0] } + } + } else { + trace!("building scalar two-way reverse substring searcher"); + let finder = twoway::FinderRev::new(needle); + SearcherRevKind::TwoWay { finder } + }; + let rabinkarp = rabinkarp::FinderRev::new(needle); + SearcherRev { kind, rabinkarp } + } + + /// Searches the given haystack for the last occurrence of the given + /// needle. The needle given should be the same as the needle that this + /// finder was initialized with. + #[inline] + pub(crate) fn rfind( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + if haystack.len() < needle.len() { + return None; + } + match self.kind { + SearcherRevKind::Empty => Some(haystack.len()), + SearcherRevKind::OneByte { needle } => { + crate::memrchr(needle, haystack) + } + SearcherRevKind::TwoWay { ref finder } => { + if rabinkarp::is_fast(haystack, needle) { + self.rabinkarp.rfind(haystack, needle) + } else { + finder.rfind(haystack, needle) + } + } + } + } +} + +/// Prefilter controls whether heuristics are used to accelerate searching. +/// +/// A prefilter refers to the idea of detecting candidate matches very quickly, +/// and then confirming whether those candidates are full matches. This +/// idea can be quite effective since it's often the case that looking for +/// candidates can be a lot faster than running a complete substring search +/// over the entire input. Namely, looking for candidates can be done with +/// extremely fast vectorized code. +/// +/// The downside of a prefilter is that it assumes false positives (which are +/// candidates generated by a prefilter that aren't matches) are somewhat rare +/// relative to the frequency of full matches. That is, if a lot of false +/// positives are generated, then it's possible for search time to be worse +/// than if the prefilter wasn't enabled in the first place. +/// +/// Another downside of a prefilter is that it can result in highly variable +/// performance, where some cases are extraordinarily fast and others aren't. +/// Typically, variable performance isn't a problem, but it may be for your use +/// case. +/// +/// The use of prefilters in this implementation does use a heuristic to detect +/// when a prefilter might not be carrying its weight, and will dynamically +/// disable its use. Nevertheless, this configuration option gives callers +/// the ability to disable prefilters if you have knowledge that they won't be +/// useful. +#[derive(Clone, Copy, Debug)] +#[non_exhaustive] +pub enum PrefilterConfig { + /// Never used a prefilter in substring search. + None, + /// Automatically detect whether a heuristic prefilter should be used. If + /// it is used, then heuristics will be used to dynamically disable the + /// prefilter if it is believed to not be carrying its weight. + Auto, +} + +impl Default for PrefilterConfig { + fn default() -> PrefilterConfig { + PrefilterConfig::Auto + } +} + +impl PrefilterConfig { + /// Returns true when this prefilter is set to the `None` variant. + fn is_none(&self) -> bool { + matches!(*self, PrefilterConfig::None) + } +} + +/// The implementation of a prefilter. +/// +/// This type encapsulates dispatch to one of several possible choices for a +/// prefilter. Generally speaking, all prefilters have the same approximate +/// algorithm: they choose a couple of bytes from the needle that are believed +/// to be rare, use a fast vector algorithm to look for those bytes and return +/// positions as candidates for some substring search algorithm (currently only +/// Two-Way) to confirm as a match or not. +/// +/// The differences between the algorithms are actually at the vector +/// implementation level. Namely, we need different routines based on both +/// which target architecture we're on and what CPU features are supported. +/// +/// The straight-forwardly obvious approach here is to use an enum, and make +/// `Prefilter::find` do case analysis to determine which algorithm was +/// selected and invoke it. However, I've observed that this leads to poor +/// codegen in some cases, especially in latency sensitive benchmarks. That is, +/// this approach comes with overhead that I wasn't able to eliminate. +/// +/// The second obvious approach is to use dynamic dispatch with traits. Doing +/// that in this context where `Prefilter` owns the selection generally +/// requires heap allocation, and this code is designed to run in core-only +/// environments. +/// +/// So we settle on using a union (that's `PrefilterKind`) and a function +/// pointer (that's `PrefilterKindFn`). We select the right function pointer +/// based on which field in the union we set, and that function in turn +/// knows which field of the union to access. The downside of this approach +/// is that it forces us to think about safety, but the upside is that +/// there are some nice latency improvements to benchmarks. (Especially the +/// `memmem/sliceslice/short` benchmark.) +/// +/// In cases where we've selected a vector algorithm and the haystack given +/// is too short, we fallback to the scalar version of `memchr` on the +/// `rarest_byte`. (The scalar version of `memchr` is still better than a naive +/// byte-at-a-time loop because it will read in `usize`-sized chunks at a +/// time.) +#[derive(Clone, Copy)] +struct Prefilter { + call: PrefilterKindFn, + kind: PrefilterKind, + rarest_byte: u8, + rarest_offset: u8, +} + +impl Prefilter { + /// Return a "fallback" prefilter, but only if it is believed to be + /// effective. + #[inline] + fn fallback<R: HeuristicFrequencyRank>( + ranker: R, + pair: Pair, + needle: &[u8], + ) -> Option<Prefilter> { + /// The maximum frequency rank permitted for the fallback prefilter. + /// If the rarest byte in the needle has a frequency rank above this + /// value, then no prefilter is used if the fallback prefilter would + /// otherwise be selected. + const MAX_FALLBACK_RANK: u8 = 250; + + trace!("building fallback prefilter"); + let rarest_offset = pair.index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + let rarest_rank = ranker.rank(rarest_byte); + if rarest_rank > MAX_FALLBACK_RANK { + None + } else { + let finder = crate::arch::all::packedpair::Finder::with_pair( + needle, + pair.clone(), + )?; + let call = prefilter_kind_fallback; + let kind = PrefilterKind { fallback: finder }; + Some(Prefilter { call, kind, rarest_byte, rarest_offset }) + } + } + + /// Return a prefilter using a x86_64 SSE2 vector algorithm. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter { + trace!("building x86_64 SSE2 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_sse2, + kind: PrefilterKind { sse2: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a x86_64 AVX2 vector algorithm. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter { + trace!("building x86_64 AVX2 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_avx2, + kind: PrefilterKind { avx2: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a wasm32 simd128 vector algorithm. + #[cfg(target_arch = "wasm32")] + #[inline] + fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter { + trace!("building wasm32 simd128 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_simd128, + kind: PrefilterKind { simd128: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a aarch64 neon vector algorithm. + #[cfg(target_arch = "aarch64")] + #[inline] + fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter { + trace!("building aarch64 neon prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_neon, + kind: PrefilterKind { neon: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a *candidate* position for a match. + /// + /// When this returns an offset, it implies that a match could begin at + /// that offset, but it may not. That is, it is possible for a false + /// positive to be returned. + /// + /// When `None` is returned, then it is guaranteed that there are no + /// matches for the needle in the given haystack. That is, it is impossible + /// for a false negative to be returned. + /// + /// The purpose of this routine is to look for candidate matching positions + /// as quickly as possible before running a (likely) slower confirmation + /// step. + #[inline] + fn find(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: By construction, we've ensured that the function in + // `self.call` is properly paired with the union used in `self.kind`. + unsafe { (self.call)(self, haystack) } + } + + /// A "simple" prefilter that just looks for the occurrence of the rarest + /// byte from the needle. This is generally only used for very small + /// haystacks. + #[inline] + fn find_simple(&self, haystack: &[u8]) -> Option<usize> { + // We don't use crate::memchr here because the haystack should be small + // enough that memchr won't be able to use vector routines anyway. So + // we just skip straight to the fallback implementation which is likely + // faster. (A byte-at-a-time loop is only used when the haystack is + // smaller than `size_of::<usize>()`.) + crate::arch::all::memchr::One::new(self.rarest_byte) + .find(haystack) + .map(|i| i.saturating_sub(usize::from(self.rarest_offset))) + } +} + +impl core::fmt::Debug for Prefilter { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Prefilter") + .field("call", &"<prefilter function>") + .field("kind", &"<prefilter kind union>") + .field("rarest_byte", &self.rarest_byte) + .field("rarest_offset", &self.rarest_offset) + .finish() + } +} + +/// A union indicating one of several possible prefilters that are in active +/// use. +/// +/// This union should only be read by one of the functions prefixed with +/// `prefilter_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union PrefilterKind { + fallback: crate::arch::all::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + sse2: crate::arch::x86_64::sse2::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + avx2: crate::arch::x86_64::avx2::packedpair::Finder, + #[cfg(target_arch = "wasm32")] + simd128: crate::arch::wasm32::simd128::packedpair::Finder, + #[cfg(target_arch = "aarch64")] + neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// The type of a prefilter function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `PrefilterKind` union. +type PrefilterKindFn = + unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option<usize>; + +/// Reads from the `fallback` field of `PrefilterKind` to execute the fallback +/// prefilter. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.fallback` union field is set. +unsafe fn prefilter_kind_fallback( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + strat.kind.fallback.find_prefilter(haystack) +} + +/// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_sse2( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + let finder = &strat.kind.sse2; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_avx2( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + let finder = &strat.kind.avx2; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32 +/// simd128 prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn prefilter_kind_simd128( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + let finder = &strat.kind.simd128; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn prefilter_kind_neon( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + let finder = &strat.kind.neon; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// PrefilterState tracks state associated with the effectiveness of a +/// prefilter. It is used to track how many bytes, on average, are skipped by +/// the prefilter. If this average dips below a certain threshold over time, +/// then the state renders the prefilter inert and stops using it. +/// +/// A prefilter state should be created for each search. (Where creating an +/// iterator is treated as a single search.) A prefilter state should only be +/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert +/// `PrefilterState`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct PrefilterState { + /// The number of skips that has been executed. This is always 1 greater + /// than the actual number of skips. The special sentinel value of 0 + /// indicates that the prefilter is inert. This is useful to avoid + /// additional checks to determine whether the prefilter is still + /// "effective." Once a prefilter becomes inert, it should no longer be + /// used (according to our heuristics). + skips: u32, + /// The total number of bytes that have been skipped. + skipped: u32, +} + +impl PrefilterState { + /// The minimum number of skip attempts to try before considering whether + /// a prefilter is effective or not. + const MIN_SKIPS: u32 = 50; + + /// The minimum amount of bytes that skipping must average. + /// + /// This value was chosen based on varying it and checking + /// the microbenchmarks. In particular, this can impact the + /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set + /// too low. + const MIN_SKIP_BYTES: u32 = 8; + + /// Create a fresh prefilter state. + #[inline] + pub(crate) fn new() -> PrefilterState { + PrefilterState { skips: 1, skipped: 0 } + } + + /// Update this state with the number of bytes skipped on the last + /// invocation of the prefilter. + #[inline] + fn update(&mut self, skipped: usize) { + self.skips = self.skips.saturating_add(1); + // We need to do this dance since it's technically possible for + // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the + // size of a prefilter state.) + self.skipped = match u32::try_from(skipped) { + Err(_) => core::u32::MAX, + Ok(skipped) => self.skipped.saturating_add(skipped), + }; + } + + /// Return true if and only if this state indicates that a prefilter is + /// still effective. + #[inline] + fn is_effective(&mut self) -> bool { + if self.is_inert() { + return false; + } + if self.skips() < PrefilterState::MIN_SKIPS { + return true; + } + if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { + return true; + } + + // We're inert. + self.skips = 0; + false + } + + /// Returns true if the prefilter this state represents should no longer + /// be used. + #[inline] + fn is_inert(&self) -> bool { + self.skips == 0 + } + + /// Returns the total number of times the prefilter has been used. + #[inline] + fn skips(&self) -> u32 { + // Remember, `0` is a sentinel value indicating inertness, so we + // always need to subtract `1` to get our actual number of skips. + self.skips.saturating_sub(1) + } +} + +/// A combination of prefilter effectiveness state and the prefilter itself. +#[derive(Debug)] +pub(crate) struct Pre<'a> { + /// State that tracks the effectiveness of a prefilter. + prestate: &'a mut PrefilterState, + /// The actual prefilter. + prestrat: &'a Prefilter, +} + +impl<'a> Pre<'a> { + /// Call this prefilter on the given haystack with the given needle. + #[inline] + pub(crate) fn find(&mut self, haystack: &[u8]) -> Option<usize> { + let result = self.prestrat.find(haystack); + self.prestate.update(result.unwrap_or(haystack.len())); + result + } + + /// Return true if and only if this prefilter should be used. + #[inline] + pub(crate) fn is_effective(&mut self) -> bool { + self.prestate.is_effective() + } +} + +/// Returns true if the needle has the right characteristics for a vector +/// algorithm to handle the entirety of substring search. +/// +/// Vector algorithms can be used for prefilters for other substring search +/// algorithms (like Two-Way), but they can also be used for substring search +/// on their own. When used for substring search, vector algorithms will +/// quickly identify candidate match positions (just like in the prefilter +/// case), but instead of returning the candidate position they will try to +/// confirm the match themselves. Confirmation happens via `memcmp`. This +/// works well for short needles, but can break down when many false candidate +/// positions are generated for large needles. Thus, we only permit vector +/// algorithms to own substring search when the needle is of a certain length. +#[inline] +fn do_packed_search(needle: &[u8]) -> bool { + /// The minimum length of a needle required for this algorithm. The minimum + /// is 2 since a length of 1 should just use memchr and a length of 0 isn't + /// a case handled by this searcher. + const MIN_LEN: usize = 2; + + /// The maximum length of a needle required for this algorithm. + /// + /// In reality, there is no hard max here. The code below can handle any + /// length needle. (Perhaps that suggests there are missing optimizations.) + /// Instead, this is a heuristic and a bound guaranteeing our linear time + /// complexity. + /// + /// It is a heuristic because when a candidate match is found, memcmp is + /// run. For very large needles with lots of false positives, memcmp can + /// make the code run quite slow. + /// + /// It is a bound because the worst case behavior with memcmp is + /// multiplicative in the size of the needle and haystack, and we want + /// to keep that additive. This bound ensures we still meet that bound + /// theoretically, since it's just a constant. We aren't acting in bad + /// faith here, memcmp on tiny needles is so fast that even in pathological + /// cases (see pathological vector benchmarks), this is still just as fast + /// or faster in practice. + /// + /// This specific number was chosen by tweaking a bit and running + /// benchmarks. The rare-medium-needle, for example, gets about 5% faster + /// by using this algorithm instead of a prefilter-accelerated Two-Way. + /// There's also a theoretical desire to keep this number reasonably + /// low, to mitigate the impact of pathological cases. I did try 64, and + /// some benchmarks got a little better, and others (particularly the + /// pathological ones), got a lot worse. So... 32 it is? + const MAX_LEN: usize = 32; + MIN_LEN <= needle.len() && needle.len() <= MAX_LEN +} diff --git a/vendor/memchr/src/memmem/twoway.rs b/vendor/memchr/src/memmem/twoway.rs deleted file mode 100644 index 7f82ed15d..000000000 --- a/vendor/memchr/src/memmem/twoway.rs +++ /dev/null @@ -1,878 +0,0 @@ -use core::cmp; - -use crate::memmem::{prefilter::Pre, util}; - -/// Two-Way search in the forward direction. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(TwoWay); - -/// Two-Way search in the reverse direction. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Reverse(TwoWay); - -/// An implementation of the TwoWay substring search algorithm, with heuristics -/// for accelerating search based on frequency analysis. -/// -/// This searcher supports forward and reverse search, although not -/// simultaneously. It runs in O(n + m) time and O(1) space, where -/// `n ~ len(needle)` and `m ~ len(haystack)`. -/// -/// The implementation here roughly matches that which was developed by -/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The -/// changes in this implementation are 1) the use of zero-based indices, 2) a -/// heuristic skip table based on the last byte (borrowed from Rust's standard -/// library) and 3) the addition of heuristics for a fast skip loop. That is, -/// (3) this will detect bytes that are believed to be rare in the needle and -/// use fast vectorized instructions to find their occurrences quickly. The -/// Two-Way algorithm is then used to confirm whether a match at that location -/// occurred. -/// -/// The heuristic for fast skipping is automatically shut off if it's -/// detected to be ineffective at search time. Generally, this only occurs in -/// pathological cases. But this is generally necessary in order to preserve -/// a `O(n + m)` time bound. -/// -/// The code below is fairly complex and not obviously correct at all. It's -/// likely necessary to read the Two-Way paper cited above in order to fully -/// grok this code. The essence of it is: -/// -/// 1) Do something to detect a "critical" position in the needle. -/// 2) For the current position in the haystack, look if needle[critical..] -/// matches at that position. -/// 3) If so, look if needle[..critical] matches. -/// 4) If a mismatch occurs, shift the search by some amount based on the -/// critical position and a pre-computed shift. -/// -/// This type is wrapped in Forward and Reverse types that expose consistent -/// forward or reverse APIs. -#[derive(Clone, Copy, Debug)] -struct TwoWay { - /// A small bitset used as a quick prefilter (in addition to the faster - /// SIMD based prefilter). Namely, a bit 'i' is set if and only if b%64==i - /// for any b in the needle. - /// - /// When used as a prefilter, if the last byte at the current candidate - /// position is NOT in this set, then we can skip that entire candidate - /// position (the length of the needle). This is essentially the shift - /// trick found in Boyer-Moore, but only applied to bytes that don't appear - /// in the needle. - /// - /// N.B. This trick was inspired by something similar in std's - /// implementation of Two-Way. - byteset: ApproximateByteSet, - /// A critical position in needle. Specifically, this position corresponds - /// to beginning of either the minimal or maximal suffix in needle. (N.B. - /// See SuffixType below for why "minimal" isn't quite the correct word - /// here.) - /// - /// This is the position at which every search begins. Namely, search - /// starts by scanning text to the right of this position, and only if - /// there's a match does the text to the left of this position get scanned. - critical_pos: usize, - /// The amount we shift by in the Two-Way search algorithm. This - /// corresponds to the "small period" and "large period" cases. - shift: Shift, -} - -impl Forward { - /// Create a searcher that uses the Two-Way algorithm by searching forwards - /// through any haystack. - pub(crate) fn new(needle: &[u8]) -> Forward { - if needle.is_empty() { - return Forward(TwoWay::empty()); - } - - let byteset = ApproximateByteSet::new(needle); - let min_suffix = Suffix::forward(needle, SuffixKind::Minimal); - let max_suffix = Suffix::forward(needle, SuffixKind::Maximal); - let (period_lower_bound, critical_pos) = - if min_suffix.pos > max_suffix.pos { - (min_suffix.period, min_suffix.pos) - } else { - (max_suffix.period, max_suffix.pos) - }; - let shift = Shift::forward(needle, period_lower_bound, critical_pos); - Forward(TwoWay { byteset, critical_pos, shift }) - } - - /// Find the position of the first occurrence of this searcher's needle in - /// the given haystack. If one does not exist, then return None. - /// - /// This accepts prefilter state that is useful when using the same - /// searcher multiple times, such as in an iterator. - /// - /// Callers must guarantee that the needle is non-empty and its length is - /// <= the haystack's length. - #[inline(always)] - pub(crate) fn find( - &self, - pre: Option<&mut Pre<'_>>, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - debug_assert!(!needle.is_empty(), "needle should not be empty"); - debug_assert!(needle.len() <= haystack.len(), "haystack too short"); - - match self.0.shift { - Shift::Small { period } => { - self.find_small_imp(pre, haystack, needle, period) - } - Shift::Large { shift } => { - self.find_large_imp(pre, haystack, needle, shift) - } - } - } - - /// Like find, but handles the degenerate substring test cases. This is - /// only useful for conveniently testing this substring implementation in - /// isolation. - #[cfg(test)] - fn find_general( - &self, - pre: Option<&mut Pre<'_>>, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - if needle.is_empty() { - Some(0) - } else if haystack.len() < needle.len() { - None - } else { - self.find(pre, haystack, needle) - } - } - - // Each of the two search implementations below can be accelerated by a - // prefilter, but it is not always enabled. To avoid its overhead when - // its disabled, we explicitly inline each search implementation based on - // whether a prefilter will be used or not. The decision on which to use - // is made in the parent meta searcher. - - #[inline(always)] - fn find_small_imp( - &self, - mut pre: Option<&mut Pre<'_>>, - haystack: &[u8], - needle: &[u8], - period: usize, - ) -> Option<usize> { - let last_byte = needle.len() - 1; - let mut pos = 0; - let mut shift = 0; - while pos + needle.len() <= haystack.len() { - let mut i = cmp::max(self.0.critical_pos, shift); - if let Some(pre) = pre.as_mut() { - if pre.should_call() { - pos += pre.call(&haystack[pos..], needle)?; - shift = 0; - i = self.0.critical_pos; - if pos + needle.len() > haystack.len() { - return None; - } - } - } - if !self.0.byteset.contains(haystack[pos + last_byte]) { - pos += needle.len(); - shift = 0; - continue; - } - while i < needle.len() && needle[i] == haystack[pos + i] { - i += 1; - } - if i < needle.len() { - pos += i - self.0.critical_pos + 1; - shift = 0; - } else { - let mut j = self.0.critical_pos; - while j > shift && needle[j] == haystack[pos + j] { - j -= 1; - } - if j <= shift && needle[shift] == haystack[pos + shift] { - return Some(pos); - } - pos += period; - shift = needle.len() - period; - } - } - None - } - - #[inline(always)] - fn find_large_imp( - &self, - mut pre: Option<&mut Pre<'_>>, - haystack: &[u8], - needle: &[u8], - shift: usize, - ) -> Option<usize> { - let last_byte = needle.len() - 1; - let mut pos = 0; - 'outer: while pos + needle.len() <= haystack.len() { - if let Some(pre) = pre.as_mut() { - if pre.should_call() { - pos += pre.call(&haystack[pos..], needle)?; - if pos + needle.len() > haystack.len() { - return None; - } - } - } - - if !self.0.byteset.contains(haystack[pos + last_byte]) { - pos += needle.len(); - continue; - } - let mut i = self.0.critical_pos; - while i < needle.len() && needle[i] == haystack[pos + i] { - i += 1; - } - if i < needle.len() { - pos += i - self.0.critical_pos + 1; - } else { - for j in (0..self.0.critical_pos).rev() { - if needle[j] != haystack[pos + j] { - pos += shift; - continue 'outer; - } - } - return Some(pos); - } - } - None - } -} - -impl Reverse { - /// Create a searcher that uses the Two-Way algorithm by searching in - /// reverse through any haystack. - pub(crate) fn new(needle: &[u8]) -> Reverse { - if needle.is_empty() { - return Reverse(TwoWay::empty()); - } - - let byteset = ApproximateByteSet::new(needle); - let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal); - let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal); - let (period_lower_bound, critical_pos) = - if min_suffix.pos < max_suffix.pos { - (min_suffix.period, min_suffix.pos) - } else { - (max_suffix.period, max_suffix.pos) - }; - // let critical_pos = needle.len() - critical_pos; - let shift = Shift::reverse(needle, period_lower_bound, critical_pos); - Reverse(TwoWay { byteset, critical_pos, shift }) - } - - /// Find the position of the last occurrence of this searcher's needle - /// in the given haystack. If one does not exist, then return None. - /// - /// This will automatically initialize prefilter state. This should only - /// be used for one-off searches. - /// - /// Callers must guarantee that the needle is non-empty and its length is - /// <= the haystack's length. - #[inline(always)] - pub(crate) fn rfind( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - debug_assert!(!needle.is_empty(), "needle should not be empty"); - debug_assert!(needle.len() <= haystack.len(), "haystack too short"); - // For the reverse case, we don't use a prefilter. It's plausible that - // perhaps we should, but it's a lot of additional code to do it, and - // it's not clear that it's actually worth it. If you have a really - // compelling use case for this, please file an issue. - match self.0.shift { - Shift::Small { period } => { - self.rfind_small_imp(haystack, needle, period) - } - Shift::Large { shift } => { - self.rfind_large_imp(haystack, needle, shift) - } - } - } - - /// Like rfind, but handles the degenerate substring test cases. This is - /// only useful for conveniently testing this substring implementation in - /// isolation. - #[cfg(test)] - fn rfind_general(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { - if needle.is_empty() { - Some(haystack.len()) - } else if haystack.len() < needle.len() { - None - } else { - self.rfind(haystack, needle) - } - } - - #[inline(always)] - fn rfind_small_imp( - &self, - haystack: &[u8], - needle: &[u8], - period: usize, - ) -> Option<usize> { - let nlen = needle.len(); - let mut pos = haystack.len(); - let mut shift = nlen; - while pos >= nlen { - if !self.0.byteset.contains(haystack[pos - nlen]) { - pos -= nlen; - shift = nlen; - continue; - } - let mut i = cmp::min(self.0.critical_pos, shift); - while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { - i -= 1; - } - if i > 0 || needle[0] != haystack[pos - nlen] { - pos -= self.0.critical_pos - i + 1; - shift = nlen; - } else { - let mut j = self.0.critical_pos; - while j < shift && needle[j] == haystack[pos - nlen + j] { - j += 1; - } - if j >= shift { - return Some(pos - nlen); - } - pos -= period; - shift = period; - } - } - None - } - - #[inline(always)] - fn rfind_large_imp( - &self, - haystack: &[u8], - needle: &[u8], - shift: usize, - ) -> Option<usize> { - let nlen = needle.len(); - let mut pos = haystack.len(); - while pos >= nlen { - if !self.0.byteset.contains(haystack[pos - nlen]) { - pos -= nlen; - continue; - } - let mut i = self.0.critical_pos; - while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { - i -= 1; - } - if i > 0 || needle[0] != haystack[pos - nlen] { - pos -= self.0.critical_pos - i + 1; - } else { - let mut j = self.0.critical_pos; - while j < nlen && needle[j] == haystack[pos - nlen + j] { - j += 1; - } - if j == nlen { - return Some(pos - nlen); - } - pos -= shift; - } - } - None - } -} - -impl TwoWay { - fn empty() -> TwoWay { - TwoWay { - byteset: ApproximateByteSet::new(b""), - critical_pos: 0, - shift: Shift::Large { shift: 0 }, - } - } -} - -/// A representation of the amount we're allowed to shift by during Two-Way -/// search. -/// -/// When computing a critical factorization of the needle, we find the position -/// of the critical factorization by finding the needle's maximal (or minimal) -/// suffix, along with the period of that suffix. It turns out that the period -/// of that suffix is a lower bound on the period of the needle itself. -/// -/// This lower bound is equivalent to the actual period of the needle in -/// some cases. To describe that case, we denote the needle as `x` where -/// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower -/// bound given here is always the period of `v`, which is `<= period(x)`. The -/// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and -/// where `u` is a suffix of `v[0..period(v)]`. -/// -/// This case is important because the search algorithm for when the -/// periods are equivalent is slightly different than the search algorithm -/// for when the periods are not equivalent. In particular, when they aren't -/// equivalent, we know that the period of the needle is no less than half its -/// length. In this case, we shift by an amount less than or equal to the -/// period of the needle (determined by the maximum length of the components -/// of the critical factorization of `x`, i.e., `max(len(u), len(v))`).. -/// -/// The above two cases are represented by the variants below. Each entails -/// a different instantiation of the Two-Way search algorithm. -/// -/// N.B. If we could find a way to compute the exact period in all cases, -/// then we could collapse this case analysis and simplify the algorithm. The -/// Two-Way paper suggests this is possible, but more reading is required to -/// grok why the authors didn't pursue that path. -#[derive(Clone, Copy, Debug)] -enum Shift { - Small { period: usize }, - Large { shift: usize }, -} - -impl Shift { - /// Compute the shift for a given needle in the forward direction. - /// - /// This requires a lower bound on the period and a critical position. - /// These can be computed by extracting both the minimal and maximal - /// lexicographic suffixes, and choosing the right-most starting position. - /// The lower bound on the period is then the period of the chosen suffix. - fn forward( - needle: &[u8], - period_lower_bound: usize, - critical_pos: usize, - ) -> Shift { - let large = cmp::max(critical_pos, needle.len() - critical_pos); - if critical_pos * 2 >= needle.len() { - return Shift::Large { shift: large }; - } - - let (u, v) = needle.split_at(critical_pos); - if !util::is_suffix(&v[..period_lower_bound], u) { - return Shift::Large { shift: large }; - } - Shift::Small { period: period_lower_bound } - } - - /// Compute the shift for a given needle in the reverse direction. - /// - /// This requires a lower bound on the period and a critical position. - /// These can be computed by extracting both the minimal and maximal - /// lexicographic suffixes, and choosing the left-most starting position. - /// The lower bound on the period is then the period of the chosen suffix. - fn reverse( - needle: &[u8], - period_lower_bound: usize, - critical_pos: usize, - ) -> Shift { - let large = cmp::max(critical_pos, needle.len() - critical_pos); - if (needle.len() - critical_pos) * 2 >= needle.len() { - return Shift::Large { shift: large }; - } - - let (v, u) = needle.split_at(critical_pos); - if !util::is_prefix(&v[v.len() - period_lower_bound..], u) { - return Shift::Large { shift: large }; - } - Shift::Small { period: period_lower_bound } - } -} - -/// A suffix extracted from a needle along with its period. -#[derive(Debug)] -struct Suffix { - /// The starting position of this suffix. - /// - /// If this is a forward suffix, then `&bytes[pos..]` can be used. If this - /// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for - /// forward suffixes, this is an inclusive starting position, where as for - /// reverse suffixes, this is an exclusive ending position. - pos: usize, - /// The period of this suffix. - /// - /// Note that this is NOT necessarily the period of the string from which - /// this suffix comes from. (It is always less than or equal to the period - /// of the original string.) - period: usize, -} - -impl Suffix { - fn forward(needle: &[u8], kind: SuffixKind) -> Suffix { - debug_assert!(!needle.is_empty()); - - // suffix represents our maximal (or minimal) suffix, along with - // its period. - let mut suffix = Suffix { pos: 0, period: 1 }; - // The start of a suffix in `needle` that we are considering as a - // more maximal (or minimal) suffix than what's in `suffix`. - let mut candidate_start = 1; - // The current offset of our suffixes that we're comparing. - // - // When the characters at this offset are the same, then we mush on - // to the next position since no decision is possible. When the - // candidate's character is greater (or lesser) than the corresponding - // character than our current maximal (or minimal) suffix, then the - // current suffix is changed over to the candidate and we restart our - // search. Otherwise, the candidate suffix is no good and we restart - // our search on the next candidate. - // - // The three cases above correspond to the three cases in the loop - // below. - let mut offset = 0; - - while candidate_start + offset < needle.len() { - let current = needle[suffix.pos + offset]; - let candidate = needle[candidate_start + offset]; - match kind.cmp(current, candidate) { - SuffixOrdering::Accept => { - suffix = Suffix { pos: candidate_start, period: 1 }; - candidate_start += 1; - offset = 0; - } - SuffixOrdering::Skip => { - candidate_start += offset + 1; - offset = 0; - suffix.period = candidate_start - suffix.pos; - } - SuffixOrdering::Push => { - if offset + 1 == suffix.period { - candidate_start += suffix.period; - offset = 0; - } else { - offset += 1; - } - } - } - } - suffix - } - - fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix { - debug_assert!(!needle.is_empty()); - - // See the comments in `forward` for how this works. - let mut suffix = Suffix { pos: needle.len(), period: 1 }; - if needle.len() == 1 { - return suffix; - } - let mut candidate_start = needle.len() - 1; - let mut offset = 0; - - while offset < candidate_start { - let current = needle[suffix.pos - offset - 1]; - let candidate = needle[candidate_start - offset - 1]; - match kind.cmp(current, candidate) { - SuffixOrdering::Accept => { - suffix = Suffix { pos: candidate_start, period: 1 }; - candidate_start -= 1; - offset = 0; - } - SuffixOrdering::Skip => { - candidate_start -= offset + 1; - offset = 0; - suffix.period = suffix.pos - candidate_start; - } - SuffixOrdering::Push => { - if offset + 1 == suffix.period { - candidate_start -= suffix.period; - offset = 0; - } else { - offset += 1; - } - } - } - } - suffix - } -} - -/// The kind of suffix to extract. -#[derive(Clone, Copy, Debug)] -enum SuffixKind { - /// Extract the smallest lexicographic suffix from a string. - /// - /// Technically, this doesn't actually pick the smallest lexicographic - /// suffix. e.g., Given the choice between `a` and `aa`, this will choose - /// the latter over the former, even though `a < aa`. The reasoning for - /// this isn't clear from the paper, but it still smells like a minimal - /// suffix. - Minimal, - /// Extract the largest lexicographic suffix from a string. - /// - /// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given - /// the choice between `z` and `zz`, this will choose the latter over the - /// former. - Maximal, -} - -/// The result of comparing corresponding bytes between two suffixes. -#[derive(Clone, Copy, Debug)] -enum SuffixOrdering { - /// This occurs when the given candidate byte indicates that the candidate - /// suffix is better than the current maximal (or minimal) suffix. That is, - /// the current candidate suffix should supplant the current maximal (or - /// minimal) suffix. - Accept, - /// This occurs when the given candidate byte excludes the candidate suffix - /// from being better than the current maximal (or minimal) suffix. That - /// is, the current candidate suffix should be dropped and the next one - /// should be considered. - Skip, - /// This occurs when no decision to accept or skip the candidate suffix - /// can be made, e.g., when corresponding bytes are equivalent. In this - /// case, the next corresponding bytes should be compared. - Push, -} - -impl SuffixKind { - /// Returns true if and only if the given candidate byte indicates that - /// it should replace the current suffix as the maximal (or minimal) - /// suffix. - fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering { - use self::SuffixOrdering::*; - - match self { - SuffixKind::Minimal if candidate < current => Accept, - SuffixKind::Minimal if candidate > current => Skip, - SuffixKind::Minimal => Push, - SuffixKind::Maximal if candidate > current => Accept, - SuffixKind::Maximal if candidate < current => Skip, - SuffixKind::Maximal => Push, - } - } -} - -/// A bitset used to track whether a particular byte exists in a needle or not. -/// -/// Namely, bit 'i' is set if and only if byte%64==i for any byte in the -/// needle. If a particular byte in the haystack is NOT in this set, then one -/// can conclude that it is also not in the needle, and thus, one can advance -/// in the haystack by needle.len() bytes. -#[derive(Clone, Copy, Debug)] -struct ApproximateByteSet(u64); - -impl ApproximateByteSet { - /// Create a new set from the given needle. - fn new(needle: &[u8]) -> ApproximateByteSet { - let mut bits = 0; - for &b in needle { - bits |= 1 << (b % 64); - } - ApproximateByteSet(bits) - } - - /// Return true if and only if the given byte might be in this set. This - /// may return a false positive, but will never return a false negative. - #[inline(always)] - fn contains(&self, byte: u8) -> bool { - self.0 & (1 << (byte % 64)) != 0 - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use quickcheck::quickcheck; - - use super::*; - - define_memmem_quickcheck_tests!( - super::simpletests::twoway_find, - super::simpletests::twoway_rfind - ); - - /// Convenience wrapper for computing the suffix as a byte string. - fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { - let s = Suffix::forward(needle, kind); - (&needle[s.pos..], s.period) - } - - /// Convenience wrapper for computing the reverse suffix as a byte string. - fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { - let s = Suffix::reverse(needle, kind); - (&needle[..s.pos], s.period) - } - - /// Return all of the non-empty suffixes in the given byte string. - fn suffixes(bytes: &[u8]) -> Vec<&[u8]> { - (0..bytes.len()).map(|i| &bytes[i..]).collect() - } - - /// Return the lexicographically maximal suffix of the given byte string. - fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] { - let mut sufs = suffixes(needle); - sufs.sort(); - sufs.pop().unwrap() - } - - /// Return the lexicographically maximal suffix of the reverse of the given - /// byte string. - fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec<u8> { - let mut reversed = needle.to_vec(); - reversed.reverse(); - let mut got = naive_maximal_suffix_forward(&reversed).to_vec(); - got.reverse(); - got - } - - #[test] - fn suffix_forward() { - macro_rules! assert_suffix_min { - ($given:expr, $expected:expr, $period:expr) => { - let (got_suffix, got_period) = - get_suffix_forward($given.as_bytes(), SuffixKind::Minimal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); - assert_eq!(($expected, $period), (got_suffix, got_period)); - }; - } - - macro_rules! assert_suffix_max { - ($given:expr, $expected:expr, $period:expr) => { - let (got_suffix, got_period) = - get_suffix_forward($given.as_bytes(), SuffixKind::Maximal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); - assert_eq!(($expected, $period), (got_suffix, got_period)); - }; - } - - assert_suffix_min!("a", "a", 1); - assert_suffix_max!("a", "a", 1); - - assert_suffix_min!("ab", "ab", 2); - assert_suffix_max!("ab", "b", 1); - - assert_suffix_min!("ba", "a", 1); - assert_suffix_max!("ba", "ba", 2); - - assert_suffix_min!("abc", "abc", 3); - assert_suffix_max!("abc", "c", 1); - - assert_suffix_min!("acb", "acb", 3); - assert_suffix_max!("acb", "cb", 2); - - assert_suffix_min!("cba", "a", 1); - assert_suffix_max!("cba", "cba", 3); - - assert_suffix_min!("abcabc", "abcabc", 3); - assert_suffix_max!("abcabc", "cabc", 3); - - assert_suffix_min!("abcabcabc", "abcabcabc", 3); - assert_suffix_max!("abcabcabc", "cabcabc", 3); - - assert_suffix_min!("abczz", "abczz", 5); - assert_suffix_max!("abczz", "zz", 1); - - assert_suffix_min!("zzabc", "abc", 3); - assert_suffix_max!("zzabc", "zzabc", 5); - - assert_suffix_min!("aaa", "aaa", 1); - assert_suffix_max!("aaa", "aaa", 1); - - assert_suffix_min!("foobar", "ar", 2); - assert_suffix_max!("foobar", "r", 1); - } - - #[test] - fn suffix_reverse() { - macro_rules! assert_suffix_min { - ($given:expr, $expected:expr, $period:expr) => { - let (got_suffix, got_period) = - get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); - assert_eq!(($expected, $period), (got_suffix, got_period)); - }; - } - - macro_rules! assert_suffix_max { - ($given:expr, $expected:expr, $period:expr) => { - let (got_suffix, got_period) = - get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); - assert_eq!(($expected, $period), (got_suffix, got_period)); - }; - } - - assert_suffix_min!("a", "a", 1); - assert_suffix_max!("a", "a", 1); - - assert_suffix_min!("ab", "a", 1); - assert_suffix_max!("ab", "ab", 2); - - assert_suffix_min!("ba", "ba", 2); - assert_suffix_max!("ba", "b", 1); - - assert_suffix_min!("abc", "a", 1); - assert_suffix_max!("abc", "abc", 3); - - assert_suffix_min!("acb", "a", 1); - assert_suffix_max!("acb", "ac", 2); - - assert_suffix_min!("cba", "cba", 3); - assert_suffix_max!("cba", "c", 1); - - assert_suffix_min!("abcabc", "abca", 3); - assert_suffix_max!("abcabc", "abcabc", 3); - - assert_suffix_min!("abcabcabc", "abcabca", 3); - assert_suffix_max!("abcabcabc", "abcabcabc", 3); - - assert_suffix_min!("abczz", "a", 1); - assert_suffix_max!("abczz", "abczz", 5); - - assert_suffix_min!("zzabc", "zza", 3); - assert_suffix_max!("zzabc", "zz", 1); - - assert_suffix_min!("aaa", "aaa", 1); - assert_suffix_max!("aaa", "aaa", 1); - } - - quickcheck! { - fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool { - if bytes.is_empty() { - return true; - } - - let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal); - let expected = naive_maximal_suffix_forward(&bytes); - got == expected - } - - fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool { - if bytes.is_empty() { - return true; - } - - let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal); - let expected = naive_maximal_suffix_reverse(&bytes); - expected == got - } - } -} - -#[cfg(test)] -mod simpletests { - use super::*; - - pub(crate) fn twoway_find( - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - Forward::new(needle).find_general(None, haystack, needle) - } - - pub(crate) fn twoway_rfind( - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - Reverse::new(needle).rfind_general(haystack, needle) - } - - define_memmem_simple_tests!(twoway_find, twoway_rfind); - - // This is a regression test caught by quickcheck that exercised a bug in - // the reverse small period handling. The bug was that we were using 'if j - // == shift' to determine if a match occurred, but the correct guard is 'if - // j >= shift', which matches the corresponding guard in the forward impl. - #[test] - fn regression_rev_small_period() { - let rfind = super::simpletests::twoway_rfind; - let haystack = "ababaz"; - let needle = "abab"; - assert_eq!(Some(0), rfind(haystack.as_bytes(), needle.as_bytes())); - } -} diff --git a/vendor/memchr/src/memmem/util.rs b/vendor/memchr/src/memmem/util.rs deleted file mode 100644 index de0e385e1..000000000 --- a/vendor/memchr/src/memmem/util.rs +++ /dev/null @@ -1,88 +0,0 @@ -// These routines are meant to be optimized specifically for low latency as -// compared to the equivalent routines offered by std. (Which may invoke the -// dynamic linker and call out to libc, which introduces a bit more latency -// than we'd like.) - -/// Returns true if and only if needle is a prefix of haystack. -#[inline(always)] -pub(crate) fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { - needle.len() <= haystack.len() && memcmp(&haystack[..needle.len()], needle) -} - -/// Returns true if and only if needle is a suffix of haystack. -#[inline(always)] -pub(crate) fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { - needle.len() <= haystack.len() - && memcmp(&haystack[haystack.len() - needle.len()..], needle) -} - -/// Return true if and only if x.len() == y.len() && x[i] == y[i] for all -/// 0 <= i < x.len(). -/// -/// Why not just use actual memcmp for this? Well, memcmp requires calling out -/// to libc, and this routine is called in fairly hot code paths. Other than -/// just calling out to libc, it also seems to result in worse codegen. By -/// rolling our own memcmp in pure Rust, it seems to appear more friendly to -/// the optimizer. -/// -/// We mark this as inline always, although, some callers may not want it -/// inlined for better codegen (like Rabin-Karp). In that case, callers are -/// advised to create a non-inlineable wrapper routine that calls memcmp. -#[inline(always)] -pub(crate) fn memcmp(x: &[u8], y: &[u8]) -> bool { - if x.len() != y.len() { - return false; - } - // If we don't have enough bytes to do 4-byte at a time loads, then - // fall back to the naive slow version. - // - // TODO: We could do a copy_nonoverlapping combined with a mask instead - // of a loop. Benchmark it. - if x.len() < 4 { - for (&b1, &b2) in x.iter().zip(y) { - if b1 != b2 { - return false; - } - } - return true; - } - // When we have 4 or more bytes to compare, then proceed in chunks of 4 at - // a time using unaligned loads. - // - // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is - // that this particular version of memcmp is likely to be called with tiny - // needles. That means that if we do 8 byte loads, then a higher proportion - // of memcmp calls will use the slower variant above. With that said, this - // is a hypothesis and is only loosely supported by benchmarks. There's - // likely some improvement that could be made here. The main thing here - // though is to optimize for latency, not throughput. - - // SAFETY: Via the conditional above, we know that both `px` and `py` - // have the same length, so `px < pxend` implies that `py < pyend`. - // Thus, derefencing both `px` and `py` in the loop below is safe. - // - // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual - // end of of `px` and `py`. Thus, the final dereference outside of the - // loop is guaranteed to be valid. (The final comparison will overlap with - // the last comparison done in the loop for lengths that aren't multiples - // of four.) - // - // Finally, we needn't worry about alignment here, since we do unaligned - // loads. - unsafe { - let (mut px, mut py) = (x.as_ptr(), y.as_ptr()); - let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4)); - while px < pxend { - let vx = (px as *const u32).read_unaligned(); - let vy = (py as *const u32).read_unaligned(); - if vx != vy { - return false; - } - px = px.add(4); - py = py.add(4); - } - let vx = (pxend as *const u32).read_unaligned(); - let vy = (pyend as *const u32).read_unaligned(); - vx == vy - } -} diff --git a/vendor/memchr/src/memmem/vector.rs b/vendor/memchr/src/memmem/vector.rs deleted file mode 100644 index b81165f8b..000000000 --- a/vendor/memchr/src/memmem/vector.rs +++ /dev/null @@ -1,131 +0,0 @@ -/// A trait for describing vector operations used by vectorized searchers. -/// -/// The trait is highly constrained to low level vector operations needed. In -/// general, it was invented mostly to be generic over x86's __m128i and -/// __m256i types. It's likely that once std::simd becomes a thing, we can -/// migrate to that since the operations required are quite simple. -/// -/// TODO: Consider moving this trait up a level and using it to implement -/// memchr as well. The trait might need to grow one or two methods, but -/// otherwise should be close to sufficient already. -/// -/// # Safety -/// -/// All methods are not safe since they are intended to be implemented using -/// vendor intrinsics, which are also not safe. Callers must ensure that the -/// appropriate target features are enabled in the calling function, and that -/// the current CPU supports them. All implementations should avoid marking the -/// routines with #[target_feature] and instead mark them as #[inline(always)] -/// to ensure they get appropriately inlined. (inline(always) cannot be used -/// with target_feature.) -pub(crate) trait Vector: Copy + core::fmt::Debug { - /// _mm_set1_epi8 or _mm256_set1_epi8 - unsafe fn splat(byte: u8) -> Self; - /// _mm_loadu_si128 or _mm256_loadu_si256 - unsafe fn load_unaligned(data: *const u8) -> Self; - /// _mm_movemask_epi8 or _mm256_movemask_epi8 - unsafe fn movemask(self) -> u32; - /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 - unsafe fn cmpeq(self, vector2: Self) -> Self; - /// _mm_and_si128 or _mm256_and_si256 - unsafe fn and(self, vector2: Self) -> Self; -} - -#[cfg(target_arch = "x86_64")] -mod x86sse { - use super::Vector; - use core::arch::x86_64::*; - - impl Vector for __m128i { - #[inline(always)] - unsafe fn splat(byte: u8) -> __m128i { - _mm_set1_epi8(byte as i8) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> __m128i { - _mm_loadu_si128(data as *const __m128i) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - _mm_movemask_epi8(self) as u32 - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> __m128i { - _mm_cmpeq_epi8(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> __m128i { - _mm_and_si128(self, vector2) - } - } -} - -#[cfg(all(feature = "std", target_arch = "x86_64"))] -mod x86avx { - use super::Vector; - use core::arch::x86_64::*; - - impl Vector for __m256i { - #[inline(always)] - unsafe fn splat(byte: u8) -> __m256i { - _mm256_set1_epi8(byte as i8) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> __m256i { - _mm256_loadu_si256(data as *const __m256i) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - _mm256_movemask_epi8(self) as u32 - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> __m256i { - _mm256_cmpeq_epi8(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> __m256i { - _mm256_and_si256(self, vector2) - } - } -} - -#[cfg(target_arch = "wasm32")] -mod wasm_simd128 { - use super::Vector; - use core::arch::wasm32::*; - - impl Vector for v128 { - #[inline(always)] - unsafe fn splat(byte: u8) -> v128 { - u8x16_splat(byte) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> v128 { - v128_load(data.cast()) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - u8x16_bitmask(self).into() - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> v128 { - u8x16_eq(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> v128 { - v128_and(self, vector2) - } - } -} diff --git a/vendor/memchr/src/memmem/wasm.rs b/vendor/memchr/src/memmem/wasm.rs deleted file mode 100644 index 4e3ea985c..000000000 --- a/vendor/memchr/src/memmem/wasm.rs +++ /dev/null @@ -1,75 +0,0 @@ -use core::arch::wasm32::v128; - -use crate::memmem::{genericsimd, NeedleInfo}; - -/// A `v128` accelerated vectorized substring search routine that only works on -/// small needles. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(genericsimd::Forward); - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option<Forward> { - if !cfg!(memchr_runtime_simd) { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work. Passing a haystack with a length smaller than this will cause - /// `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::<v128>() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - self.find_impl(haystack, needle) - } - - /// The implementation of find marked with the appropriate target feature. - #[target_feature(enable = "simd128")] - fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { - unsafe { genericsimd::fwd_find::<v128>(&self.0, haystack, needle) } - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} diff --git a/vendor/memchr/src/memmem/x86/avx.rs b/vendor/memchr/src/memmem/x86/avx.rs deleted file mode 100644 index ce168dd37..000000000 --- a/vendor/memchr/src/memmem/x86/avx.rs +++ /dev/null @@ -1,139 +0,0 @@ -#[cfg(not(feature = "std"))] -pub(crate) use self::nostd::Forward; -#[cfg(feature = "std")] -pub(crate) use self::std::Forward; - -#[cfg(feature = "std")] -mod std { - use core::arch::x86_64::{__m128i, __m256i}; - - use crate::memmem::{genericsimd, NeedleInfo}; - - /// An AVX accelerated vectorized substring search routine that only works - /// on small needles. - #[derive(Clone, Copy, Debug)] - pub(crate) struct Forward(genericsimd::Forward); - - impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new( - ninfo: &NeedleInfo, - needle: &[u8], - ) -> Option<Forward> { - if !cfg!(memchr_runtime_avx) || !is_x86_feature_detected!("avx2") { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this - /// searcher to work. Passing a haystack with a length smaller than - /// this will cause `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::<__m128i>() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - // SAFETY: The only way a Forward value can exist is if the avx2 - // target feature is enabled. This is the only safety requirement - // for calling the genericsimd searcher. - unsafe { self.find_impl(haystack, needle) } - } - - /// The implementation of find marked with the appropriate target - /// feature. - /// - /// # Safety - /// - /// Callers must ensure that the avx2 CPU feature is enabled in the - /// current environment. - #[target_feature(enable = "avx2")] - unsafe fn find_impl( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - if haystack.len() < self.0.min_haystack_len::<__m256i>() { - genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) - } else { - genericsimd::fwd_find::<__m256i>(&self.0, haystack, needle) - } - } - } -} - -// We still define the avx "forward" type on nostd to make caller code a bit -// simpler. This avoids needing a lot more conditional compilation. -#[cfg(not(feature = "std"))] -mod nostd { - use crate::memmem::NeedleInfo; - - #[derive(Clone, Copy, Debug)] - pub(crate) struct Forward(()); - - impl Forward { - pub(crate) fn new( - ninfo: &NeedleInfo, - needle: &[u8], - ) -> Option<Forward> { - None - } - - pub(crate) fn min_haystack_len(&self) -> usize { - unreachable!() - } - - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - unreachable!() - } - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - if !is_x86_feature_detected!("avx2") { - return; - } - // SAFETY: The safety of find only requires that the current CPU - // support AVX2, which we checked above. - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} diff --git a/vendor/memchr/src/memmem/x86/mod.rs b/vendor/memchr/src/memmem/x86/mod.rs deleted file mode 100644 index c1cc73fee..000000000 --- a/vendor/memchr/src/memmem/x86/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod avx; -pub(crate) mod sse; diff --git a/vendor/memchr/src/memmem/x86/sse.rs b/vendor/memchr/src/memmem/x86/sse.rs deleted file mode 100644 index 22e7d9933..000000000 --- a/vendor/memchr/src/memmem/x86/sse.rs +++ /dev/null @@ -1,89 +0,0 @@ -use core::arch::x86_64::__m128i; - -use crate::memmem::{genericsimd, NeedleInfo}; - -/// An SSE accelerated vectorized substring search routine that only works on -/// small needles. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(genericsimd::Forward); - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option<Forward> { - if !cfg!(memchr_runtime_sse2) { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work. Passing a haystack with a length smaller than this will cause - /// `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::<__m128i>() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - // SAFETY: sse2 is enabled on all x86_64 targets, so this is always - // safe to call. - unsafe { self.find_impl(haystack, needle) } - } - - /// The implementation of find marked with the appropriate target feature. - /// - /// # Safety - /// - /// This is safe to call in all cases since sse2 is guaranteed to be part - /// of x86_64. It is marked as unsafe because of the target feature - /// attribute. - #[target_feature(enable = "sse2")] - unsafe fn find_impl( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option<usize> { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - // SAFETY: sse2 is enabled on all x86_64 targets, so this is always - // safe to call. - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} |