diff options
Diffstat (limited to 'third_party/rust/memchr/src/memmem')
19 files changed, 4647 insertions, 0 deletions
diff --git a/third_party/rust/memchr/src/memmem/byte_frequencies.rs b/third_party/rust/memchr/src/memmem/byte_frequencies.rs new file mode 100644 index 0000000000..c313b629db --- /dev/null +++ b/third_party/rust/memchr/src/memmem/byte_frequencies.rs @@ -0,0 +1,258 @@ +pub const BYTE_FREQUENCIES: [u8; 256] = [ + 55, // '\x00' + 52, // '\x01' + 51, // '\x02' + 50, // '\x03' + 49, // '\x04' + 48, // '\x05' + 47, // '\x06' + 46, // '\x07' + 45, // '\x08' + 103, // '\t' + 242, // '\n' + 66, // '\x0b' + 67, // '\x0c' + 229, // '\r' + 44, // '\x0e' + 43, // '\x0f' + 42, // '\x10' + 41, // '\x11' + 40, // '\x12' + 39, // '\x13' + 38, // '\x14' + 37, // '\x15' + 36, // '\x16' + 35, // '\x17' + 34, // '\x18' + 33, // '\x19' + 56, // '\x1a' + 32, // '\x1b' + 31, // '\x1c' + 30, // '\x1d' + 29, // '\x1e' + 28, // '\x1f' + 255, // ' ' + 148, // '!' + 164, // '"' + 149, // '#' + 136, // '$' + 160, // '%' + 155, // '&' + 173, // "'" + 221, // '(' + 222, // ')' + 134, // '*' + 122, // '+' + 232, // ',' + 202, // '-' + 215, // '.' + 224, // '/' + 208, // '0' + 220, // '1' + 204, // '2' + 187, // '3' + 183, // '4' + 179, // '5' + 177, // '6' + 168, // '7' + 178, // '8' + 200, // '9' + 226, // ':' + 195, // ';' + 154, // '<' + 184, // '=' + 174, // '>' + 126, // '?' + 120, // '@' + 191, // 'A' + 157, // 'B' + 194, // 'C' + 170, // 'D' + 189, // 'E' + 162, // 'F' + 161, // 'G' + 150, // 'H' + 193, // 'I' + 142, // 'J' + 137, // 'K' + 171, // 'L' + 176, // 'M' + 185, // 'N' + 167, // 'O' + 186, // 'P' + 112, // 'Q' + 175, // 'R' + 192, // 'S' + 188, // 'T' + 156, // 'U' + 140, // 'V' + 143, // 'W' + 123, // 'X' + 133, // 'Y' + 128, // 'Z' + 147, // '[' + 138, // '\\' + 146, // ']' + 114, // '^' + 223, // '_' + 151, // '`' + 249, // 'a' + 216, // 'b' + 238, // 'c' + 236, // 'd' + 253, // 'e' + 227, // 'f' + 218, // 'g' + 230, // 'h' + 247, // 'i' + 135, // 'j' + 180, // 'k' + 241, // 'l' + 233, // 'm' + 246, // 'n' + 244, // 'o' + 231, // 'p' + 139, // 'q' + 245, // 'r' + 243, // 's' + 251, // 't' + 235, // 'u' + 201, // 'v' + 196, // 'w' + 240, // 'x' + 214, // 'y' + 152, // 'z' + 182, // '{' + 205, // '|' + 181, // '}' + 127, // '~' + 27, // '\x7f' + 212, // '\x80' + 211, // '\x81' + 210, // '\x82' + 213, // '\x83' + 228, // '\x84' + 197, // '\x85' + 169, // '\x86' + 159, // '\x87' + 131, // '\x88' + 172, // '\x89' + 105, // '\x8a' + 80, // '\x8b' + 98, // '\x8c' + 96, // '\x8d' + 97, // '\x8e' + 81, // '\x8f' + 207, // '\x90' + 145, // '\x91' + 116, // '\x92' + 115, // '\x93' + 144, // '\x94' + 130, // '\x95' + 153, // '\x96' + 121, // '\x97' + 107, // '\x98' + 132, // '\x99' + 109, // '\x9a' + 110, // '\x9b' + 124, // '\x9c' + 111, // '\x9d' + 82, // '\x9e' + 108, // '\x9f' + 118, // '\xa0' + 141, // '¡' + 113, // '¢' + 129, // '£' + 119, // '¤' + 125, // '¥' + 165, // '¦' + 117, // '§' + 92, // '¨' + 106, // '©' + 83, // 'ª' + 72, // '«' + 99, // '¬' + 93, // '\xad' + 65, // '®' + 79, // '¯' + 166, // '°' + 237, // '±' + 163, // '²' + 199, // '³' + 190, // '´' + 225, // 'µ' + 209, // '¶' + 203, // '·' + 198, // '¸' + 217, // '¹' + 219, // 'º' + 206, // '»' + 234, // '¼' + 248, // '½' + 158, // '¾' + 239, // '¿' + 255, // 'À' + 255, // 'Á' + 255, // 'Â' + 255, // 'Ã' + 255, // 'Ä' + 255, // 'Å' + 255, // 'Æ' + 255, // 'Ç' + 255, // 'È' + 255, // 'É' + 255, // 'Ê' + 255, // 'Ë' + 255, // 'Ì' + 255, // 'Í' + 255, // 'Î' + 255, // 'Ï' + 255, // 'Ð' + 255, // 'Ñ' + 255, // 'Ò' + 255, // 'Ó' + 255, // 'Ô' + 255, // 'Õ' + 255, // 'Ö' + 255, // '×' + 255, // 'Ø' + 255, // 'Ù' + 255, // 'Ú' + 255, // 'Û' + 255, // 'Ü' + 255, // 'Ý' + 255, // 'Þ' + 255, // 'ß' + 255, // 'à' + 255, // 'á' + 255, // 'â' + 255, // 'ã' + 255, // 'ä' + 255, // 'å' + 255, // 'æ' + 255, // 'ç' + 255, // 'è' + 255, // 'é' + 255, // 'ê' + 255, // 'ë' + 255, // 'ì' + 255, // 'í' + 255, // 'î' + 255, // 'ï' + 255, // 'ð' + 255, // 'ñ' + 255, // 'ò' + 255, // 'ó' + 255, // 'ô' + 255, // 'õ' + 255, // 'ö' + 255, // '÷' + 255, // 'ø' + 255, // 'ù' + 255, // 'ú' + 255, // 'û' + 255, // 'ü' + 255, // 'ý' + 255, // 'þ' + 255, // 'ÿ' +]; diff --git a/third_party/rust/memchr/src/memmem/genericsimd.rs b/third_party/rust/memchr/src/memmem/genericsimd.rs new file mode 100644 index 0000000000..28bfdab880 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/genericsimd.rs @@ -0,0 +1,266 @@ +use core::mem::size_of; + +use crate::memmem::{util::memcmp, vector::Vector, NeedleInfo}; + +/// The minimum length of a needle required for this algorithm. The minimum +/// is 2 since a length of 1 should just use memchr and a length of 0 isn't +/// a case handled by this searcher. +pub(crate) const MIN_NEEDLE_LEN: usize = 2; + +/// The maximum length of a needle required for this algorithm. +/// +/// In reality, there is no hard max here. The code below can handle any +/// length needle. (Perhaps that suggests there are missing optimizations.) +/// Instead, this is a heuristic and a bound guaranteeing our linear time +/// complexity. +/// +/// It is a heuristic because when a candidate match is found, memcmp is run. +/// For very large needles with lots of false positives, memcmp can make the +/// code run quite slow. +/// +/// It is a bound because the worst case behavior with memcmp is multiplicative +/// in the size of the needle and haystack, and we want to keep that additive. +/// This bound ensures we still meet that bound theoretically, since it's just +/// a constant. We aren't acting in bad faith here, memcmp on tiny needles +/// is so fast that even in pathological cases (see pathological vector +/// benchmarks), this is still just as fast or faster in practice. +/// +/// This specific number was chosen by tweaking a bit and running benchmarks. +/// The rare-medium-needle, for example, gets about 5% faster by using this +/// algorithm instead of a prefilter-accelerated Two-Way. There's also a +/// theoretical desire to keep this number reasonably low, to mitigate the +/// impact of pathological cases. I did try 64, and some benchmarks got a +/// little better, and others (particularly the pathological ones), got a lot +/// worse. So... 32 it is? +pub(crate) const MAX_NEEDLE_LEN: usize = 32; + +/// The implementation of the forward vector accelerated substring search. +/// +/// This is extremely similar to the prefilter vector module by the same name. +/// The key difference is that this is not a prefilter. Instead, it handles +/// confirming its own matches. The trade off is that this only works with +/// smaller needles. The speed up here is that an inlined memcmp on a tiny +/// needle is very quick, even on pathological inputs. This is much better than +/// combining a prefilter with Two-Way, where using Two-Way to confirm the +/// match has higher latency. +/// +/// So why not use this for all needles? We could, and it would probably work +/// really well on most inputs. But its worst case is multiplicative and we +/// want to guarantee worst case additive time. Some of the benchmarks try to +/// justify this (see the pathological ones). +/// +/// The prefilter variant of this has more comments. Also note that we only +/// implement this for forward searches for now. If you have a compelling use +/// case for accelerated reverse search, please file an issue. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Forward { + rare1i: u8, + rare2i: u8, +} + +impl Forward { + /// Create a new "generic simd" forward searcher. If one could not be + /// created from the given inputs, then None is returned. + pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option<Forward> { + let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_u8(); + // If the needle is too short or too long, give up. Also, give up + // if the rare bytes detected are at the same position. (It likely + // suggests a degenerate case, although it should technically not be + // possible.) + if needle.len() < MIN_NEEDLE_LEN + || needle.len() > MAX_NEEDLE_LEN + || rare1i == rare2i + { + return None; + } + Some(Forward { rare1i, rare2i }) + } + + /// Returns the minimum length of haystack that is needed for this searcher + /// to work for a particular vector. Passing a haystack with a length + /// smaller than this will cause `fwd_find` to panic. + #[inline(always)] + pub(crate) fn min_haystack_len<V: Vector>(&self) -> usize { + self.rare2i as usize + size_of::<V>() + } +} + +/// Searches the given haystack for the given needle. The needle given should +/// be the same as the needle that this searcher was initialized with. +/// +/// # Panics +/// +/// When the given haystack has a length smaller than `min_haystack_len`. +/// +/// # Safety +/// +/// Since this is meant to be used with vector functions, callers need to +/// specialize this inside of a function with a `target_feature` attribute. +/// Therefore, callers must ensure that whatever target feature is being used +/// supports the vector functions that this function is specialized for. (For +/// the specific vector functions used, see the Vector trait implementations.) +#[inline(always)] +pub(crate) unsafe fn fwd_find<V: Vector>( + fwd: &Forward, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + // It would be nice if we didn't have this check here, since the meta + // searcher should handle it for us. But without this, I don't think we + // guarantee that end_ptr.sub(needle.len()) won't result in UB. We could + // put it as part of the safety contract, but it makes it more complicated + // than necessary. + if haystack.len() < needle.len() { + return None; + } + let min_haystack_len = fwd.min_haystack_len::<V>(); + assert!(haystack.len() >= min_haystack_len, "haystack too small"); + debug_assert!(needle.len() <= haystack.len()); + debug_assert!( + needle.len() >= MIN_NEEDLE_LEN, + "needle must be at least {} bytes", + MIN_NEEDLE_LEN, + ); + debug_assert!( + needle.len() <= MAX_NEEDLE_LEN, + "needle must be at most {} bytes", + MAX_NEEDLE_LEN, + ); + + let (rare1i, rare2i) = (fwd.rare1i as usize, fwd.rare2i as usize); + let rare1chunk = V::splat(needle[rare1i]); + let rare2chunk = V::splat(needle[rare2i]); + + let start_ptr = haystack.as_ptr(); + let end_ptr = start_ptr.add(haystack.len()); + let max_ptr = end_ptr.sub(min_haystack_len); + let mut ptr = start_ptr; + + // N.B. I did experiment with unrolling the loop to deal with size(V) + // bytes at a time and 2*size(V) bytes at a time. The double unroll was + // marginally faster while the quadruple unroll was unambiguously slower. + // In the end, I decided the complexity from unrolling wasn't worth it. I + // used the memmem/krate/prebuilt/huge-en/ benchmarks to compare. + while ptr <= max_ptr { + let m = fwd_find_in_chunk( + fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, !0, + ); + if let Some(chunki) = m { + return Some(matched(start_ptr, ptr, chunki)); + } + ptr = ptr.add(size_of::<V>()); + } + if ptr < end_ptr { + let remaining = diff(end_ptr, ptr); + debug_assert!( + remaining < min_haystack_len, + "remaining bytes should be smaller than the minimum haystack \ + length of {}, but there are {} bytes remaining", + min_haystack_len, + remaining, + ); + if remaining < needle.len() { + return None; + } + debug_assert!( + max_ptr < ptr, + "after main loop, ptr should have exceeded max_ptr", + ); + let overlap = diff(ptr, max_ptr); + debug_assert!( + overlap > 0, + "overlap ({}) must always be non-zero", + overlap, + ); + debug_assert!( + overlap < size_of::<V>(), + "overlap ({}) cannot possibly be >= than a vector ({})", + overlap, + size_of::<V>(), + ); + // The mask has all of its bits set except for the first N least + // significant bits, where N=overlap. This way, any matches that + // occur in find_in_chunk within the overlap are automatically + // ignored. + let mask = !((1 << overlap) - 1); + ptr = max_ptr; + let m = fwd_find_in_chunk( + fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, mask, + ); + if let Some(chunki) = m { + return Some(matched(start_ptr, ptr, chunki)); + } + } + None +} + +/// Search for an occurrence of two rare bytes from the needle in the chunk +/// pointed to by ptr, with the end of the haystack pointed to by end_ptr. When +/// an occurrence is found, memcmp is run to check if a match occurs at the +/// corresponding position. +/// +/// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 +/// bytes repeated in each 8-bit lane, respectively. +/// +/// mask should have bits set corresponding the positions in the chunk in which +/// matches are considered. This is only used for the last vector load where +/// the beginning of the vector might have overlapped with the last load in +/// the main loop. The mask lets us avoid visiting positions that have already +/// been discarded as matches. +/// +/// # Safety +/// +/// It must be safe to do an unaligned read of size(V) bytes starting at both +/// (ptr + rare1i) and (ptr + rare2i). It must also be safe to do unaligned +/// loads on ptr up to (end_ptr - needle.len()). +#[inline(always)] +unsafe fn fwd_find_in_chunk<V: Vector>( + fwd: &Forward, + needle: &[u8], + ptr: *const u8, + end_ptr: *const u8, + rare1chunk: V, + rare2chunk: V, + mask: u32, +) -> Option<usize> { + let chunk0 = V::load_unaligned(ptr.add(fwd.rare1i as usize)); + let chunk1 = V::load_unaligned(ptr.add(fwd.rare2i as usize)); + + let eq0 = chunk0.cmpeq(rare1chunk); + let eq1 = chunk1.cmpeq(rare2chunk); + + let mut match_offsets = eq0.and(eq1).movemask() & mask; + while match_offsets != 0 { + let offset = match_offsets.trailing_zeros() as usize; + let ptr = ptr.add(offset); + if end_ptr.sub(needle.len()) < ptr { + return None; + } + let chunk = core::slice::from_raw_parts(ptr, needle.len()); + if memcmp(needle, chunk) { + return Some(offset); + } + match_offsets &= match_offsets - 1; + } + None +} + +/// Accepts a chunk-relative offset and returns a haystack relative offset +/// after updating the prefilter state. +/// +/// See the same function with the same name in the prefilter variant of this +/// algorithm to learned why it's tagged with inline(never). Even here, where +/// the function is simpler, inlining it leads to poorer codegen. (Although +/// it does improve some benchmarks, like prebuiltiter/huge-en/common-you.) +#[cold] +#[inline(never)] +fn matched(start_ptr: *const u8, ptr: *const u8, chunki: usize) -> usize { + diff(ptr, start_ptr) + chunki +} + +/// Subtract `b` from `a` and return the difference. `a` must be greater than +/// or equal to `b`. +fn diff(a: *const u8, b: *const u8) -> usize { + debug_assert!(a >= b); + (a as usize) - (b as usize) +} diff --git a/third_party/rust/memchr/src/memmem/mod.rs b/third_party/rust/memchr/src/memmem/mod.rs new file mode 100644 index 0000000000..e1cd1aec76 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/mod.rs @@ -0,0 +1,1321 @@ +/*! +This module provides forward and reverse substring search routines. + +Unlike the standard library's substring search routines, these work on +arbitrary bytes. For all non-empty needles, these routines will report exactly +the same values as the corresponding routines in the standard library. For +the empty needle, the standard library reports matches only at valid UTF-8 +boundaries, where as these routines will report matches at every position. + +Other than being able to work on arbitrary bytes, the primary reason to prefer +these routines over the standard library routines is that these will generally +be faster. In some cases, significantly so. + +# Example: iterating over substring matches + +This example shows how to use [`find_iter`] to find occurrences of a substring +in a haystack. + +``` +use memchr::memmem; + +let haystack = b"foo bar foo baz foo"; + +let mut it = memmem::find_iter(haystack, "foo"); +assert_eq!(Some(0), it.next()); +assert_eq!(Some(8), it.next()); +assert_eq!(Some(16), it.next()); +assert_eq!(None, it.next()); +``` + +# Example: iterating over substring matches in reverse + +This example shows how to use [`rfind_iter`] to find occurrences of a substring +in a haystack starting from the end of the haystack. + +**NOTE:** This module does not implement double ended iterators, so reverse +searches aren't done by calling `rev` on a forward iterator. + +``` +use memchr::memmem; + +let haystack = b"foo bar foo baz foo"; + +let mut it = memmem::rfind_iter(haystack, "foo"); +assert_eq!(Some(16), it.next()); +assert_eq!(Some(8), it.next()); +assert_eq!(Some(0), it.next()); +assert_eq!(None, it.next()); +``` + +# Example: repeating a search for the same needle + +It may be possible for the overhead of constructing a substring searcher to be +measurable in some workloads. In cases where the same needle is used to search +many haystacks, it is possible to do construction once and thus to avoid it for +subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for +reverse searches). + +``` +use memchr::memmem; + +let finder = memmem::Finder::new("foo"); + +assert_eq!(Some(4), finder.find(b"baz foo quux")); +assert_eq!(None, finder.find(b"quux baz bar")); +``` +*/ + +pub use self::prefilter::Prefilter; + +use crate::{ + cow::CowBytes, + memmem::{ + prefilter::{Pre, PrefilterFn, PrefilterState}, + rabinkarp::NeedleHash, + rarebytes::RareNeedleBytes, + }, +}; + +/// Defines a suite of quickcheck properties for forward and reverse +/// substring searching. +/// +/// This is defined in this specific spot so that it can be used freely among +/// the different substring search implementations. I couldn't be bothered to +/// fight with the macro-visibility rules enough to figure out how to stuff it +/// somewhere more convenient. +#[cfg(all(test, feature = "std"))] +macro_rules! define_memmem_quickcheck_tests { + ($fwd:expr, $rev:expr) => { + use crate::memmem::proptests; + + quickcheck::quickcheck! { + fn qc_fwd_prefix_is_substring(bs: Vec<u8>) -> bool { + proptests::prefix_is_substring(false, &bs, $fwd) + } + + fn qc_fwd_suffix_is_substring(bs: Vec<u8>) -> bool { + proptests::suffix_is_substring(false, &bs, $fwd) + } + + fn qc_fwd_matches_naive( + haystack: Vec<u8>, + needle: Vec<u8> + ) -> bool { + proptests::matches_naive(false, &haystack, &needle, $fwd) + } + + fn qc_rev_prefix_is_substring(bs: Vec<u8>) -> bool { + proptests::prefix_is_substring(true, &bs, $rev) + } + + fn qc_rev_suffix_is_substring(bs: Vec<u8>) -> bool { + proptests::suffix_is_substring(true, &bs, $rev) + } + + fn qc_rev_matches_naive( + haystack: Vec<u8>, + needle: Vec<u8> + ) -> bool { + proptests::matches_naive(true, &haystack, &needle, $rev) + } + } + }; +} + +/// Defines a suite of "simple" hand-written tests for a substring +/// implementation. +/// +/// This is defined here for the same reason that +/// define_memmem_quickcheck_tests is defined here. +#[cfg(test)] +macro_rules! define_memmem_simple_tests { + ($fwd:expr, $rev:expr) => { + use crate::memmem::testsimples; + + #[test] + fn simple_forward() { + testsimples::run_search_tests_fwd($fwd); + } + + #[test] + fn simple_reverse() { + testsimples::run_search_tests_rev($rev); + } + }; +} + +mod byte_frequencies; +#[cfg(memchr_runtime_simd)] +mod genericsimd; +mod prefilter; +mod rabinkarp; +mod rarebytes; +mod twoway; +mod util; +#[cfg(memchr_runtime_simd)] +mod vector; +#[cfg(all(memchr_runtime_wasm128))] +mod wasm; +#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] +mod x86; + +/// Returns an iterator over all non-overlapping occurrences of a substring in +/// a haystack. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar foo baz foo"; +/// let mut it = memmem::find_iter(haystack, b"foo"); +/// assert_eq!(Some(0), it.next()); +/// assert_eq!(Some(8), it.next()); +/// assert_eq!(Some(16), it.next()); +/// assert_eq!(None, it.next()); +/// ``` +#[inline] +pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( + haystack: &'h [u8], + needle: &'n N, +) -> FindIter<'h, 'n> { + FindIter::new(haystack, Finder::new(needle)) +} + +/// Returns a reverse iterator over all non-overlapping occurrences of a +/// substring in a haystack. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar foo baz foo"; +/// let mut it = memmem::rfind_iter(haystack, b"foo"); +/// assert_eq!(Some(16), it.next()); +/// assert_eq!(Some(8), it.next()); +/// assert_eq!(Some(0), it.next()); +/// assert_eq!(None, it.next()); +/// ``` +#[inline] +pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( + haystack: &'h [u8], + needle: &'n N, +) -> FindRevIter<'h, 'n> { + FindRevIter::new(haystack, FinderRev::new(needle)) +} + +/// Returns the index of the first occurrence of the given needle. +/// +/// Note that if you're are searching for the same needle in many different +/// small haystacks, it may be faster to initialize a [`Finder`] once, +/// and reuse it for each search. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar baz"; +/// assert_eq!(Some(0), memmem::find(haystack, b"foo")); +/// assert_eq!(Some(4), memmem::find(haystack, b"bar")); +/// assert_eq!(None, memmem::find(haystack, b"quux")); +/// ``` +#[inline] +pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { + if haystack.len() < 64 { + rabinkarp::find(haystack, needle) + } else { + Finder::new(needle).find(haystack) + } +} + +/// Returns the index of the last occurrence of the given needle. +/// +/// Note that if you're are searching for the same needle in many different +/// small haystacks, it may be faster to initialize a [`FinderRev`] once, +/// and reuse it for each search. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar baz"; +/// assert_eq!(Some(0), memmem::rfind(haystack, b"foo")); +/// assert_eq!(Some(4), memmem::rfind(haystack, b"bar")); +/// assert_eq!(Some(8), memmem::rfind(haystack, b"ba")); +/// assert_eq!(None, memmem::rfind(haystack, b"quux")); +/// ``` +#[inline] +pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { + if haystack.len() < 64 { + rabinkarp::rfind(haystack, needle) + } else { + FinderRev::new(needle).rfind(haystack) + } +} + +/// An iterator over non-overlapping substring matches. +/// +/// Matches are reported by the byte offset at which they begin. +/// +/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the +/// needle. +#[derive(Debug)] +pub struct FindIter<'h, 'n> { + haystack: &'h [u8], + prestate: PrefilterState, + finder: Finder<'n>, + pos: usize, +} + +impl<'h, 'n> FindIter<'h, 'n> { + #[inline(always)] + pub(crate) fn new( + haystack: &'h [u8], + finder: Finder<'n>, + ) -> FindIter<'h, 'n> { + let prestate = finder.searcher.prefilter_state(); + FindIter { haystack, prestate, finder, pos: 0 } + } + + /// Convert this iterator into its owned variant, such that it no longer + /// borrows the finder and needle. + /// + /// If this is already an owned iterator, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `std` feature is enabled. + #[cfg(feature = "std")] + #[inline] + pub fn into_owned(self) -> FindIter<'h, 'static> { + FindIter { + haystack: self.haystack, + prestate: self.prestate, + finder: self.finder.into_owned(), + pos: self.pos, + } + } +} + +impl<'h, 'n> Iterator for FindIter<'h, 'n> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + if self.pos > self.haystack.len() { + return None; + } + let result = self + .finder + .searcher + .find(&mut self.prestate, &self.haystack[self.pos..]); + match result { + None => None, + Some(i) => { + let pos = self.pos + i; + self.pos = pos + core::cmp::max(1, self.finder.needle().len()); + Some(pos) + } + } + } +} + +/// An iterator over non-overlapping substring matches in reverse. +/// +/// Matches are reported by the byte offset at which they begin. +/// +/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the +/// needle. +#[derive(Debug)] +pub struct FindRevIter<'h, 'n> { + haystack: &'h [u8], + finder: FinderRev<'n>, + /// When searching with an empty needle, this gets set to `None` after + /// we've yielded the last element at `0`. + pos: Option<usize>, +} + +impl<'h, 'n> FindRevIter<'h, 'n> { + #[inline(always)] + pub(crate) fn new( + haystack: &'h [u8], + finder: FinderRev<'n>, + ) -> FindRevIter<'h, 'n> { + let pos = Some(haystack.len()); + FindRevIter { haystack, finder, pos } + } + + /// Convert this iterator into its owned variant, such that it no longer + /// borrows the finder and needle. + /// + /// If this is already an owned iterator, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `std` feature is enabled. + #[cfg(feature = "std")] + #[inline] + pub fn into_owned(self) -> FindRevIter<'h, 'static> { + FindRevIter { + haystack: self.haystack, + finder: self.finder.into_owned(), + pos: self.pos, + } + } +} + +impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + let pos = match self.pos { + None => return None, + Some(pos) => pos, + }; + let result = self.finder.rfind(&self.haystack[..pos]); + match result { + None => None, + Some(i) => { + if pos == i { + self.pos = pos.checked_sub(1); + } else { + self.pos = Some(i); + } + Some(i) + } + } + } +} + +/// A single substring searcher fixed to a particular needle. +/// +/// The purpose of this type is to permit callers to construct a substring +/// searcher that can be used to search haystacks without the overhead of +/// constructing the searcher in the first place. This is a somewhat niche +/// concern when it's necessary to re-use the same needle to search multiple +/// different haystacks with as little overhead as possible. In general, using +/// [`find`] is good enough, but `Finder` is useful when you can meaningfully +/// observe searcher construction time in a profile. +/// +/// When the `std` feature is enabled, then this type has an `into_owned` +/// version which permits building a `Finder` that is not connected to +/// the lifetime of its needle. +#[derive(Clone, Debug)] +pub struct Finder<'n> { + searcher: Searcher<'n>, +} + +impl<'n> Finder<'n> { + /// Create a new finder for the given needle. + #[inline] + pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> { + FinderBuilder::new().build_forward(needle) + } + + /// Returns the index of the first occurrence of this needle in the given + /// haystack. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use memchr::memmem::Finder; + /// + /// let haystack = b"foo bar baz"; + /// assert_eq!(Some(0), Finder::new("foo").find(haystack)); + /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); + /// assert_eq!(None, Finder::new("quux").find(haystack)); + /// ``` + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + self.searcher.find(&mut self.searcher.prefilter_state(), haystack) + } + + /// Returns an iterator over all occurrences of a substring in a haystack. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use memchr::memmem::Finder; + /// + /// let haystack = b"foo bar foo baz foo"; + /// let finder = Finder::new(b"foo"); + /// let mut it = finder.find_iter(haystack); + /// assert_eq!(Some(0), it.next()); + /// assert_eq!(Some(8), it.next()); + /// assert_eq!(Some(16), it.next()); + /// assert_eq!(None, it.next()); + /// ``` + #[inline] + pub fn find_iter<'a, 'h>( + &'a self, + haystack: &'h [u8], + ) -> FindIter<'h, 'a> { + FindIter::new(haystack, self.as_ref()) + } + + /// Convert this finder into its owned variant, such that it no longer + /// borrows the needle. + /// + /// If this is already an owned finder, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `std` feature is enabled. + #[cfg(feature = "std")] + #[inline] + pub fn into_owned(self) -> Finder<'static> { + Finder { searcher: self.searcher.into_owned() } + } + + /// Convert this finder into its borrowed variant. + /// + /// This is primarily useful if your finder is owned and you'd like to + /// store its borrowed variant in some intermediate data structure. + /// + /// Note that the lifetime parameter of the returned finder is tied to the + /// lifetime of `self`, and may be shorter than the `'n` lifetime of the + /// needle itself. Namely, a finder's needle can be either borrowed or + /// owned, so the lifetime of the needle returned must necessarily be the + /// shorter of the two. + #[inline] + pub fn as_ref(&self) -> Finder<'_> { + Finder { searcher: self.searcher.as_ref() } + } + + /// Returns the needle that this finder searches for. + /// + /// Note that the lifetime of the needle returned is tied to the lifetime + /// of the finder, and may be shorter than the `'n` lifetime. Namely, a + /// finder's needle can be either borrowed or owned, so the lifetime of the + /// needle returned must necessarily be the shorter of the two. + #[inline] + pub fn needle(&self) -> &[u8] { + self.searcher.needle() + } +} + +/// A single substring reverse searcher fixed to a particular needle. +/// +/// The purpose of this type is to permit callers to construct a substring +/// searcher that can be used to search haystacks without the overhead of +/// constructing the searcher in the first place. This is a somewhat niche +/// concern when it's necessary to re-use the same needle to search multiple +/// different haystacks with as little overhead as possible. In general, +/// using [`rfind`] is good enough, but `FinderRev` is useful when you can +/// meaningfully observe searcher construction time in a profile. +/// +/// When the `std` feature is enabled, then this type has an `into_owned` +/// version which permits building a `FinderRev` that is not connected to +/// the lifetime of its needle. +#[derive(Clone, Debug)] +pub struct FinderRev<'n> { + searcher: SearcherRev<'n>, +} + +impl<'n> FinderRev<'n> { + /// Create a new reverse finder for the given needle. + #[inline] + pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> { + FinderBuilder::new().build_reverse(needle) + } + + /// Returns the index of the last occurrence of this needle in the given + /// haystack. + /// + /// The haystack may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use memchr::memmem::FinderRev; + /// + /// let haystack = b"foo bar baz"; + /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack)); + /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack)); + /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); + /// ``` + pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { + self.searcher.rfind(haystack.as_ref()) + } + + /// Returns a reverse iterator over all occurrences of a substring in a + /// haystack. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use memchr::memmem::FinderRev; + /// + /// let haystack = b"foo bar foo baz foo"; + /// let finder = FinderRev::new(b"foo"); + /// let mut it = finder.rfind_iter(haystack); + /// assert_eq!(Some(16), it.next()); + /// assert_eq!(Some(8), it.next()); + /// assert_eq!(Some(0), it.next()); + /// assert_eq!(None, it.next()); + /// ``` + #[inline] + pub fn rfind_iter<'a, 'h>( + &'a self, + haystack: &'h [u8], + ) -> FindRevIter<'h, 'a> { + FindRevIter::new(haystack, self.as_ref()) + } + + /// Convert this finder into its owned variant, such that it no longer + /// borrows the needle. + /// + /// If this is already an owned finder, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `std` feature is enabled. + #[cfg(feature = "std")] + #[inline] + pub fn into_owned(self) -> FinderRev<'static> { + FinderRev { searcher: self.searcher.into_owned() } + } + + /// Convert this finder into its borrowed variant. + /// + /// This is primarily useful if your finder is owned and you'd like to + /// store its borrowed variant in some intermediate data structure. + /// + /// Note that the lifetime parameter of the returned finder is tied to the + /// lifetime of `self`, and may be shorter than the `'n` lifetime of the + /// needle itself. Namely, a finder's needle can be either borrowed or + /// owned, so the lifetime of the needle returned must necessarily be the + /// shorter of the two. + #[inline] + pub fn as_ref(&self) -> FinderRev<'_> { + FinderRev { searcher: self.searcher.as_ref() } + } + + /// Returns the needle that this finder searches for. + /// + /// Note that the lifetime of the needle returned is tied to the lifetime + /// of the finder, and may be shorter than the `'n` lifetime. Namely, a + /// finder's needle can be either borrowed or owned, so the lifetime of the + /// needle returned must necessarily be the shorter of the two. + #[inline] + pub fn needle(&self) -> &[u8] { + self.searcher.needle() + } +} + +/// A builder for constructing non-default forward or reverse memmem finders. +/// +/// A builder is primarily useful for configuring a substring searcher. +/// Currently, the only configuration exposed is the ability to disable +/// heuristic prefilters used to speed up certain searches. +#[derive(Clone, Debug, Default)] +pub struct FinderBuilder { + config: SearcherConfig, +} + +impl FinderBuilder { + /// Create a new finder builder with default settings. + pub fn new() -> FinderBuilder { + FinderBuilder::default() + } + + /// Build a forward finder using the given needle from the current + /// settings. + pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>( + &self, + needle: &'n B, + ) -> Finder<'n> { + Finder { searcher: Searcher::new(self.config, needle.as_ref()) } + } + + /// Build a reverse finder using the given needle from the current + /// settings. + pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>( + &self, + needle: &'n B, + ) -> FinderRev<'n> { + FinderRev { searcher: SearcherRev::new(needle.as_ref()) } + } + + /// Configure the prefilter setting for the finder. + /// + /// See the documentation for [`Prefilter`] for more discussion on why + /// you might want to configure this. + pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { + self.config.prefilter = prefilter; + self + } +} + +/// The internal implementation of a forward substring searcher. +/// +/// The reality is that this is a "meta" searcher. Namely, depending on a +/// variety of parameters (CPU support, target, needle size, haystack size and +/// even dynamic properties such as prefilter effectiveness), the actual +/// algorithm employed to do substring search may change. +#[derive(Clone, Debug)] +struct Searcher<'n> { + /// The actual needle we're searching for. + /// + /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is + /// specialized to a single variant (the borrowed form). + needle: CowBytes<'n>, + /// A collection of facts computed on the needle that are useful for more + /// than one substring search algorithm. + ninfo: NeedleInfo, + /// A prefilter function, if it was deemed appropriate. + /// + /// Some substring search implementations (like Two-Way) benefit greatly + /// if we can quickly find candidate starting positions for a match. + prefn: Option<PrefilterFn>, + /// The actual substring implementation in use. + kind: SearcherKind, +} + +/// A collection of facts computed about a search needle. +/// +/// We group these things together because it's useful to be able to hand them +/// to prefilters or substring algorithms that want them. +#[derive(Clone, Copy, Debug)] +pub(crate) struct NeedleInfo { + /// The offsets of "rare" bytes detected in the needle. + /// + /// This is meant to be a heuristic in order to maximize the effectiveness + /// of vectorized code. Namely, vectorized code tends to focus on only + /// one or two bytes. If we pick bytes from the needle that occur + /// infrequently, then more time will be spent in the vectorized code and + /// will likely make the overall search (much) faster. + /// + /// Of course, this is only a heuristic based on a background frequency + /// distribution of bytes. But it tends to work very well in practice. + pub(crate) rarebytes: RareNeedleBytes, + /// A Rabin-Karp hash of the needle. + /// + /// This is store here instead of in a more specific Rabin-Karp search + /// since Rabin-Karp may be used even if another SearchKind corresponds + /// to some other search implementation. e.g., If measurements suggest RK + /// is faster in some cases or if a search implementation can't handle + /// particularly small haystack. (Moreover, we cannot use RK *generally*, + /// since its worst case time is multiplicative. Instead, we only use it + /// some small haystacks, where "small" is a constant.) + pub(crate) nhash: NeedleHash, +} + +/// Configuration for substring search. +#[derive(Clone, Copy, Debug, Default)] +struct SearcherConfig { + /// This permits changing the behavior of the prefilter, since it can have + /// a variable impact on performance. + prefilter: Prefilter, +} + +#[derive(Clone, Debug)] +enum SearcherKind { + /// A special case for empty needles. An empty needle always matches, even + /// in an empty haystack. + Empty, + /// This is used whenever the needle is a single byte. In this case, we + /// always use memchr. + OneByte(u8), + /// Two-Way is the generic work horse and is what provides our additive + /// linear time guarantee. In general, it's used when the needle is bigger + /// than 8 bytes or so. + TwoWay(twoway::Forward), + #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] + GenericSIMD128(x86::sse::Forward), + #[cfg(memchr_runtime_wasm128)] + GenericSIMD128(wasm::Forward), + #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] + GenericSIMD256(x86::avx::Forward), +} + +impl<'n> Searcher<'n> { + fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { + use self::SearcherKind::*; + + let ninfo = NeedleInfo::new(needle); + let mk = |kind: SearcherKind| { + let prefn = prefilter::forward( + &config.prefilter, + &ninfo.rarebytes, + needle, + ); + Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } + }; + if needle.len() == 0 { + return mk(Empty); + } + if needle.len() == 1 { + return mk(OneByte(needle[0])); + } + #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] + { + if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) { + return mk(GenericSIMD256(fwd)); + } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) { + return mk(GenericSIMD128(fwd)); + } + } + #[cfg(all(target_arch = "wasm32", memchr_runtime_simd))] + { + if let Some(fwd) = wasm::Forward::new(&ninfo, needle) { + return mk(GenericSIMD128(fwd)); + } + } + + mk(TwoWay(twoway::Forward::new(needle))) + } + + /// Return a fresh prefilter state that can be used with this searcher. + /// A prefilter state is used to track the effectiveness of a searcher's + /// prefilter for speeding up searches. Therefore, the prefilter state + /// should generally be reused on subsequent searches (such as in an + /// iterator). For searches on a different haystack, then a new prefilter + /// state should be used. + /// + /// This always initializes a valid (but possibly inert) prefilter state + /// even if this searcher does not have a prefilter enabled. + fn prefilter_state(&self) -> PrefilterState { + if self.prefn.is_none() { + PrefilterState::inert() + } else { + PrefilterState::new() + } + } + + fn needle(&self) -> &[u8] { + self.needle.as_slice() + } + + fn as_ref(&self) -> Searcher<'_> { + use self::SearcherKind::*; + + let kind = match self.kind { + Empty => Empty, + OneByte(b) => OneByte(b), + TwoWay(tw) => TwoWay(tw), + #[cfg(all(not(miri), memchr_runtime_simd))] + GenericSIMD128(gs) => GenericSIMD128(gs), + #[cfg(all( + not(miri), + target_arch = "x86_64", + memchr_runtime_simd + ))] + GenericSIMD256(gs) => GenericSIMD256(gs), + }; + Searcher { + needle: CowBytes::new(self.needle()), + ninfo: self.ninfo, + prefn: self.prefn, + kind, + } + } + + #[cfg(feature = "std")] + fn into_owned(self) -> Searcher<'static> { + use self::SearcherKind::*; + + let kind = match self.kind { + Empty => Empty, + OneByte(b) => OneByte(b), + TwoWay(tw) => TwoWay(tw), + #[cfg(all(not(miri), memchr_runtime_simd))] + GenericSIMD128(gs) => GenericSIMD128(gs), + #[cfg(all( + not(miri), + target_arch = "x86_64", + memchr_runtime_simd + ))] + GenericSIMD256(gs) => GenericSIMD256(gs), + }; + Searcher { + needle: self.needle.into_owned(), + ninfo: self.ninfo, + prefn: self.prefn, + kind, + } + } + + /// Implements forward substring search by selecting the implementation + /// chosen at construction and executing it on the given haystack with the + /// prefilter's current state of effectiveness. + #[inline(always)] + fn find( + &self, + state: &mut PrefilterState, + haystack: &[u8], + ) -> Option<usize> { + use self::SearcherKind::*; + + let needle = self.needle(); + if haystack.len() < needle.len() { + return None; + } + match self.kind { + Empty => Some(0), + OneByte(b) => crate::memchr(b, haystack), + TwoWay(ref tw) => { + // For very short haystacks (e.g., where the prefilter probably + // can't run), it's faster to just run RK. + if rabinkarp::is_fast(haystack, needle) { + rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) + } else { + self.find_tw(tw, state, haystack, needle) + } + } + #[cfg(all(not(miri), memchr_runtime_simd))] + GenericSIMD128(ref gs) => { + // The SIMD matcher can't handle particularly short haystacks, + // so we fall back to RK in these cases. + if haystack.len() < gs.min_haystack_len() { + rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) + } else { + gs.find(haystack, needle) + } + } + #[cfg(all( + not(miri), + target_arch = "x86_64", + memchr_runtime_simd + ))] + GenericSIMD256(ref gs) => { + // The SIMD matcher can't handle particularly short haystacks, + // so we fall back to RK in these cases. + if haystack.len() < gs.min_haystack_len() { + rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) + } else { + gs.find(haystack, needle) + } + } + } + } + + /// Calls Two-Way on the given haystack/needle. + /// + /// This is marked as unlineable since it seems to have a better overall + /// effect on benchmarks. However, this is one of those cases where + /// inlining it results an improvement in other benchmarks too, so I + /// suspect we just don't have enough data yet to make the right call here. + /// + /// I suspect the main problem is that this function contains two different + /// inlined copies of Two-Way: one with and one without prefilters enabled. + #[inline(never)] + fn find_tw( + &self, + tw: &twoway::Forward, + state: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + if let Some(prefn) = self.prefn { + // We used to look at the length of a haystack here. That is, if + // it was too small, then don't bother with the prefilter. But two + // things changed: the prefilter falls back to memchr for small + // haystacks, and, above, Rabin-Karp is employed for tiny haystacks + // anyway. + if state.is_effective() { + let mut pre = Pre { state, prefn, ninfo: &self.ninfo }; + return tw.find(Some(&mut pre), haystack, needle); + } + } + tw.find(None, haystack, needle) + } +} + +impl NeedleInfo { + pub(crate) fn new(needle: &[u8]) -> NeedleInfo { + NeedleInfo { + rarebytes: RareNeedleBytes::forward(needle), + nhash: NeedleHash::forward(needle), + } + } +} + +/// The internal implementation of a reverse substring searcher. +/// +/// See the forward searcher docs for more details. Currently, the reverse +/// searcher is considerably simpler since it lacks prefilter support. This +/// was done because it adds a lot of code, and more surface area to test. And +/// in particular, it's not clear whether a prefilter on reverse searching is +/// worth it. (If you have a compelling use case, please file an issue!) +#[derive(Clone, Debug)] +struct SearcherRev<'n> { + /// The actual needle we're searching for. + needle: CowBytes<'n>, + /// A Rabin-Karp hash of the needle. + nhash: NeedleHash, + /// The actual substring implementation in use. + kind: SearcherRevKind, +} + +#[derive(Clone, Debug)] +enum SearcherRevKind { + /// A special case for empty needles. An empty needle always matches, even + /// in an empty haystack. + Empty, + /// This is used whenever the needle is a single byte. In this case, we + /// always use memchr. + OneByte(u8), + /// Two-Way is the generic work horse and is what provides our additive + /// linear time guarantee. In general, it's used when the needle is bigger + /// than 8 bytes or so. + TwoWay(twoway::Reverse), +} + +impl<'n> SearcherRev<'n> { + fn new(needle: &'n [u8]) -> SearcherRev<'n> { + use self::SearcherRevKind::*; + + let kind = if needle.len() == 0 { + Empty + } else if needle.len() == 1 { + OneByte(needle[0]) + } else { + TwoWay(twoway::Reverse::new(needle)) + }; + SearcherRev { + needle: CowBytes::new(needle), + nhash: NeedleHash::reverse(needle), + kind, + } + } + + fn needle(&self) -> &[u8] { + self.needle.as_slice() + } + + fn as_ref(&self) -> SearcherRev<'_> { + use self::SearcherRevKind::*; + + let kind = match self.kind { + Empty => Empty, + OneByte(b) => OneByte(b), + TwoWay(tw) => TwoWay(tw), + }; + SearcherRev { + needle: CowBytes::new(self.needle()), + nhash: self.nhash, + kind, + } + } + + #[cfg(feature = "std")] + fn into_owned(self) -> SearcherRev<'static> { + use self::SearcherRevKind::*; + + let kind = match self.kind { + Empty => Empty, + OneByte(b) => OneByte(b), + TwoWay(tw) => TwoWay(tw), + }; + SearcherRev { + needle: self.needle.into_owned(), + nhash: self.nhash, + kind, + } + } + + /// Implements reverse substring search by selecting the implementation + /// chosen at construction and executing it on the given haystack with the + /// prefilter's current state of effectiveness. + #[inline(always)] + fn rfind(&self, haystack: &[u8]) -> Option<usize> { + use self::SearcherRevKind::*; + + let needle = self.needle(); + if haystack.len() < needle.len() { + return None; + } + match self.kind { + Empty => Some(haystack.len()), + OneByte(b) => crate::memrchr(b, haystack), + TwoWay(ref tw) => { + // For very short haystacks (e.g., where the prefilter probably + // can't run), it's faster to just run RK. + if rabinkarp::is_fast(haystack, needle) { + rabinkarp::rfind_with(&self.nhash, haystack, needle) + } else { + tw.rfind(haystack, needle) + } + } + } + } +} + +/// This module defines some generic quickcheck properties useful for testing +/// any substring search algorithm. It also runs those properties for the +/// top-level public API memmem routines. (The properties are also used to +/// test various substring search implementations more granularly elsewhere as +/// well.) +#[cfg(all(test, feature = "std", not(miri)))] +mod proptests { + // N.B. This defines the quickcheck tests using the properties defined + // below. Because of macro-visibility weirdness, the actual macro is + // defined at the top of this file. + define_memmem_quickcheck_tests!(super::find, super::rfind); + + /// Check that every prefix of the given byte string is a substring. + pub(crate) fn prefix_is_substring( + reverse: bool, + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, + ) -> bool { + if bs.is_empty() { + return true; + } + for i in 0..(bs.len() - 1) { + let prefix = &bs[..i]; + if reverse { + assert_eq!(naive_rfind(bs, prefix), search(bs, prefix)); + } else { + assert_eq!(naive_find(bs, prefix), search(bs, prefix)); + } + } + true + } + + /// Check that every suffix of the given byte string is a substring. + pub(crate) fn suffix_is_substring( + reverse: bool, + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, + ) -> bool { + if bs.is_empty() { + return true; + } + for i in 0..(bs.len() - 1) { + let suffix = &bs[i..]; + if reverse { + assert_eq!(naive_rfind(bs, suffix), search(bs, suffix)); + } else { + assert_eq!(naive_find(bs, suffix), search(bs, suffix)); + } + } + true + } + + /// Check that naive substring search matches the result of the given search + /// algorithm. + pub(crate) fn matches_naive( + reverse: bool, + haystack: &[u8], + needle: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, + ) -> bool { + if reverse { + naive_rfind(haystack, needle) == search(haystack, needle) + } else { + naive_find(haystack, needle) == search(haystack, needle) + } + } + + /// Naively search forwards for the given needle in the given haystack. + fn naive_find(haystack: &[u8], needle: &[u8]) -> Option<usize> { + if needle.is_empty() { + return Some(0); + } else if haystack.len() < needle.len() { + return None; + } + for i in 0..(haystack.len() - needle.len() + 1) { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None + } + + /// Naively search in reverse for the given needle in the given haystack. + fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { + if needle.is_empty() { + return Some(haystack.len()); + } else if haystack.len() < needle.len() { + return None; + } + for i in (0..(haystack.len() - needle.len() + 1)).rev() { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None + } +} + +/// This module defines some hand-written "simple" substring tests. It +/// also provides routines for easily running them on any substring search +/// implementation. +#[cfg(test)] +mod testsimples { + define_memmem_simple_tests!(super::find, super::rfind); + + /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple. + type SearchTest = + (&'static str, &'static str, Option<usize>, Option<usize>); + + const SEARCH_TESTS: &'static [SearchTest] = &[ + ("", "", Some(0), Some(0)), + ("", "a", Some(0), Some(1)), + ("", "ab", Some(0), Some(2)), + ("", "abc", Some(0), Some(3)), + ("a", "", None, None), + ("a", "a", Some(0), Some(0)), + ("a", "aa", Some(0), Some(1)), + ("a", "ba", Some(1), Some(1)), + ("a", "bba", Some(2), Some(2)), + ("a", "bbba", Some(3), Some(3)), + ("a", "bbbab", Some(3), Some(3)), + ("a", "bbbabb", Some(3), Some(3)), + ("a", "bbbabbb", Some(3), Some(3)), + ("a", "bbbbbb", None, None), + ("ab", "", None, None), + ("ab", "a", None, None), + ("ab", "b", None, None), + ("ab", "ab", Some(0), Some(0)), + ("ab", "aab", Some(1), Some(1)), + ("ab", "aaab", Some(2), Some(2)), + ("ab", "abaab", Some(0), Some(3)), + ("ab", "baaab", Some(3), Some(3)), + ("ab", "acb", None, None), + ("ab", "abba", Some(0), Some(0)), + ("abc", "ab", None, None), + ("abc", "abc", Some(0), Some(0)), + ("abc", "abcz", Some(0), Some(0)), + ("abc", "abczz", Some(0), Some(0)), + ("abc", "zabc", Some(1), Some(1)), + ("abc", "zzabc", Some(2), Some(2)), + ("abc", "azbc", None, None), + ("abc", "abzc", None, None), + ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), + ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), + ("xyz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", Some(32), Some(32)), + // Failures caught by quickcheck. + ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), + ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), + ]; + + /// Run the substring search tests. `search` should be a closure that + /// accepts a haystack and a needle and returns the starting position + /// of the first occurrence of needle in the haystack, or `None` if one + /// doesn't exist. + pub(crate) fn run_search_tests_fwd( + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, + ) { + for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS { + let (n, h) = (needle.as_bytes(), haystack.as_bytes()); + assert_eq!( + expected_fwd, + search(h, n), + "needle: {:?}, haystack: {:?}, expected: {:?}", + n, + h, + expected_fwd + ); + } + } + + /// Run the substring search tests. `search` should be a closure that + /// accepts a haystack and a needle and returns the starting position of + /// the last occurrence of needle in the haystack, or `None` if one doesn't + /// exist. + pub(crate) fn run_search_tests_rev( + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, + ) { + for &(needle, haystack, _, expected_rev) in SEARCH_TESTS { + let (n, h) = (needle.as_bytes(), haystack.as_bytes()); + assert_eq!( + expected_rev, + search(h, n), + "needle: {:?}, haystack: {:?}, expected: {:?}", + n, + h, + expected_rev + ); + } + } +} diff --git a/third_party/rust/memchr/src/memmem/prefilter/fallback.rs b/third_party/rust/memchr/src/memmem/prefilter/fallback.rs new file mode 100644 index 0000000000..ae1bbccb32 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/prefilter/fallback.rs @@ -0,0 +1,122 @@ +/* +This module implements a "fallback" prefilter that only relies on memchr to +function. While memchr works best when it's explicitly vectorized, its +fallback implementations are fast enough to make a prefilter like this +worthwhile. + +The essence of this implementation is to identify two rare bytes in a needle +based on a background frequency distribution of bytes. We then run memchr on the +rarer byte. For each match, we use the second rare byte as a guard to quickly +check if a match is possible. If the position passes the guard test, then we do +a naive memcmp to confirm the match. + +In practice, this formulation works amazingly well, primarily because of the +heuristic use of a background frequency distribution. However, it does have a +number of weaknesses where it can get quite slow when its background frequency +distribution doesn't line up with the haystack being searched. This is why we +have specialized vector routines that essentially take this idea and move the +guard check into vectorized code. (Those specialized vector routines do still +make use of the background frequency distribution of bytes though.) + +This fallback implementation was originally formulated in regex many moons ago: +https://github.com/rust-lang/regex/blob/3db8722d0b204a85380fe2a65e13d7065d7dd968/src/literal/imp.rs#L370-L501 +Prior to that, I'm not aware of anyone using this technique in any prominent +substring search implementation. Although, I'm sure folks have had this same +insight long before me. + +Another version of this also appeared in bstr: +https://github.com/BurntSushi/bstr/blob/a444256ca7407fe180ee32534688549655b7a38e/src/search/prefilter.rs#L83-L340 +*/ + +use crate::memmem::{ + prefilter::{PrefilterFnTy, PrefilterState}, + NeedleInfo, +}; + +// Check that the functions below satisfy the Prefilter function type. +const _: PrefilterFnTy = find; + +/// Look for a possible occurrence of needle. The position returned +/// corresponds to the beginning of the occurrence, if one exists. +/// +/// Callers may assume that this never returns false negatives (i.e., it +/// never misses an actual occurrence), but must check that the returned +/// position corresponds to a match. That is, it can return false +/// positives. +/// +/// This should only be used when Freqy is constructed for forward +/// searching. +pub(crate) fn find( + prestate: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let mut i = 0; + let (rare1i, rare2i) = ninfo.rarebytes.as_rare_usize(); + let (rare1, rare2) = ninfo.rarebytes.as_rare_bytes(needle); + while prestate.is_effective() { + // Use a fast vectorized implementation to skip to the next + // occurrence of the rarest byte (heuristically chosen) in the + // needle. + let found = crate::memchr(rare1, &haystack[i..])?; + prestate.update(found); + i += found; + + // If we can't align our first match with the haystack, then a + // match is impossible. + if i < rare1i { + i += 1; + continue; + } + + // Align our rare2 byte with the haystack. A mismatch means that + // a match is impossible. + let aligned_rare2i = i - rare1i + rare2i; + if haystack.get(aligned_rare2i) != Some(&rare2) { + i += 1; + continue; + } + + // We've done what we can. There might be a match here. + return Some(i - rare1i); + } + // The only way we get here is if we believe our skipping heuristic + // has become ineffective. We're allowed to return false positives, + // so return the position at which we advanced to, aligned to the + // haystack. + Some(i.saturating_sub(rare1i)) +} + +#[cfg(all(test, feature = "std"))] +mod tests { + use super::*; + + fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option<usize> { + let ninfo = NeedleInfo::new(needle); + let mut prestate = PrefilterState::new(); + find(&mut prestate, &ninfo, haystack, needle) + } + + #[test] + fn freqy_forward() { + assert_eq!(Some(0), freqy_find(b"BARFOO", b"BAR")); + assert_eq!(Some(3), freqy_find(b"FOOBAR", b"BAR")); + assert_eq!(Some(0), freqy_find(b"zyzz", b"zyzy")); + assert_eq!(Some(2), freqy_find(b"zzzy", b"zyzy")); + assert_eq!(None, freqy_find(b"zazb", b"zyzy")); + assert_eq!(Some(0), freqy_find(b"yzyy", b"yzyz")); + assert_eq!(Some(2), freqy_find(b"yyyz", b"yzyz")); + assert_eq!(None, freqy_find(b"yayb", b"yzyz")); + } + + #[test] + #[cfg(not(miri))] + fn prefilter_permutations() { + use crate::memmem::prefilter::tests::PrefilterTest; + + // SAFETY: super::find is safe to call for all inputs and on all + // platforms. + unsafe { PrefilterTest::run_all_tests(super::find) }; + } +} diff --git a/third_party/rust/memchr/src/memmem/prefilter/genericsimd.rs b/third_party/rust/memchr/src/memmem/prefilter/genericsimd.rs new file mode 100644 index 0000000000..1a6e387348 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/prefilter/genericsimd.rs @@ -0,0 +1,207 @@ +use core::mem::size_of; + +use crate::memmem::{ + prefilter::{PrefilterFnTy, PrefilterState}, + vector::Vector, + NeedleInfo, +}; + +/// The implementation of the forward vector accelerated candidate finder. +/// +/// This is inspired by the "generic SIMD" algorithm described here: +/// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd +/// +/// The main difference is that this is just a prefilter. That is, it reports +/// candidates once they are seen and doesn't attempt to confirm them. Also, +/// the bytes this routine uses to check for candidates are selected based on +/// an a priori background frequency distribution. This means that on most +/// haystacks, this will on average spend more time in vectorized code than you +/// would if you just selected the first and last bytes of the needle. +/// +/// Note that a non-prefilter variant of this algorithm can be found in the +/// parent module, but it only works on smaller needles. +/// +/// `prestate`, `ninfo`, `haystack` and `needle` are the four prefilter +/// function parameters. `fallback` is a prefilter that is used if the haystack +/// is too small to be handled with the given vector size. +/// +/// This routine is not safe because it is intended for callers to specialize +/// this with a particular vector (e.g., __m256i) and then call it with the +/// relevant target feature (e.g., avx2) enabled. +/// +/// # Panics +/// +/// If `needle.len() <= 1`, then this panics. +/// +/// # Safety +/// +/// Since this is meant to be used with vector functions, callers need to +/// specialize this inside of a function with a `target_feature` attribute. +/// Therefore, callers must ensure that whatever target feature is being used +/// supports the vector functions that this function is specialized for. (For +/// the specific vector functions used, see the Vector trait implementations.) +#[inline(always)] +pub(crate) unsafe fn find<V: Vector>( + prestate: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], + fallback: PrefilterFnTy, +) -> Option<usize> { + assert!(needle.len() >= 2, "needle must be at least 2 bytes"); + let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_usize(); + let min_haystack_len = rare2i + size_of::<V>(); + if haystack.len() < min_haystack_len { + return fallback(prestate, ninfo, haystack, needle); + } + + let start_ptr = haystack.as_ptr(); + let end_ptr = start_ptr.add(haystack.len()); + let max_ptr = end_ptr.sub(min_haystack_len); + let mut ptr = start_ptr; + + let rare1chunk = V::splat(needle[rare1i]); + let rare2chunk = V::splat(needle[rare2i]); + + // N.B. I did experiment with unrolling the loop to deal with size(V) + // bytes at a time and 2*size(V) bytes at a time. The double unroll + // was marginally faster while the quadruple unroll was unambiguously + // slower. In the end, I decided the complexity from unrolling wasn't + // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to + // compare. + while ptr <= max_ptr { + let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); + if let Some(chunki) = m { + return Some(matched(prestate, start_ptr, ptr, chunki)); + } + ptr = ptr.add(size_of::<V>()); + } + if ptr < end_ptr { + // This routine immediately quits if a candidate match is found. + // That means that if we're here, no candidate matches have been + // found at or before 'ptr'. Thus, we don't need to mask anything + // out even though we might technically search part of the haystack + // that we've already searched (because we know it can't match). + ptr = max_ptr; + let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); + if let Some(chunki) = m { + return Some(matched(prestate, start_ptr, ptr, chunki)); + } + } + prestate.update(haystack.len()); + None +} + +// Below are two different techniques for checking whether a candidate +// match exists in a given chunk or not. find_in_chunk2 checks two bytes +// where as find_in_chunk3 checks three bytes. The idea behind checking +// three bytes is that while we do a bit more work per iteration, we +// decrease the chances of a false positive match being reported and thus +// make the search faster overall. This actually works out for the +// memmem/krate/prebuilt/huge-en/never-all-common-bytes benchmark, where +// using find_in_chunk3 is about 25% faster than find_in_chunk2. However, +// it turns out that find_in_chunk2 is faster for all other benchmarks, so +// perhaps the extra check isn't worth it in practice. +// +// For now, we go with find_in_chunk2, but we leave find_in_chunk3 around +// to make it easy to switch to and benchmark when possible. + +/// Search for an occurrence of two rare bytes from the needle in the current +/// chunk pointed to by ptr. +/// +/// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 +/// bytes repeated in each 8-bit lane, respectively. +/// +/// # Safety +/// +/// It must be safe to do an unaligned read of size(V) bytes starting at both +/// (ptr + rare1i) and (ptr + rare2i). +#[inline(always)] +unsafe fn find_in_chunk2<V: Vector>( + ptr: *const u8, + rare1i: usize, + rare2i: usize, + rare1chunk: V, + rare2chunk: V, +) -> Option<usize> { + let chunk0 = V::load_unaligned(ptr.add(rare1i)); + let chunk1 = V::load_unaligned(ptr.add(rare2i)); + + let eq0 = chunk0.cmpeq(rare1chunk); + let eq1 = chunk1.cmpeq(rare2chunk); + + let match_offsets = eq0.and(eq1).movemask(); + if match_offsets == 0 { + return None; + } + Some(match_offsets.trailing_zeros() as usize) +} + +/// Search for an occurrence of two rare bytes and the first byte (even if one +/// of the rare bytes is equivalent to the first byte) from the needle in the +/// current chunk pointed to by ptr. +/// +/// firstchunk, rare1chunk and rare2chunk correspond to vectors with the first, +/// rare1 and rare2 bytes repeated in each 8-bit lane, respectively. +/// +/// # Safety +/// +/// It must be safe to do an unaligned read of size(V) bytes starting at ptr, +/// (ptr + rare1i) and (ptr + rare2i). +#[allow(dead_code)] +#[inline(always)] +unsafe fn find_in_chunk3<V: Vector>( + ptr: *const u8, + rare1i: usize, + rare2i: usize, + firstchunk: V, + rare1chunk: V, + rare2chunk: V, +) -> Option<usize> { + let chunk0 = V::load_unaligned(ptr); + let chunk1 = V::load_unaligned(ptr.add(rare1i)); + let chunk2 = V::load_unaligned(ptr.add(rare2i)); + + let eq0 = chunk0.cmpeq(firstchunk); + let eq1 = chunk1.cmpeq(rare1chunk); + let eq2 = chunk2.cmpeq(rare2chunk); + + let match_offsets = eq0.and(eq1).and(eq2).movemask(); + if match_offsets == 0 { + return None; + } + Some(match_offsets.trailing_zeros() as usize) +} + +/// Accepts a chunk-relative offset and returns a haystack relative offset +/// after updating the prefilter state. +/// +/// Why do we use this unlineable function when a search completes? Well, +/// I don't know. Really. Obviously this function was not here initially. +/// When doing profiling, the codegen for the inner loop here looked bad and +/// I didn't know why. There were a couple extra 'add' instructions and an +/// extra 'lea' instruction that I couldn't explain. I hypothesized that the +/// optimizer was having trouble untangling the hot code in the loop from the +/// code that deals with a candidate match. By putting the latter into an +/// unlineable function, it kind of forces the issue and it had the intended +/// effect: codegen improved measurably. It's good for a ~10% improvement +/// across the board on the memmem/krate/prebuilt/huge-en/ benchmarks. +#[cold] +#[inline(never)] +fn matched( + prestate: &mut PrefilterState, + start_ptr: *const u8, + ptr: *const u8, + chunki: usize, +) -> usize { + let found = diff(ptr, start_ptr) + chunki; + prestate.update(found); + found +} + +/// Subtract `b` from `a` and return the difference. `a` must be greater than +/// or equal to `b`. +fn diff(a: *const u8, b: *const u8) -> usize { + debug_assert!(a >= b); + (a as usize) - (b as usize) +} diff --git a/third_party/rust/memchr/src/memmem/prefilter/mod.rs b/third_party/rust/memchr/src/memmem/prefilter/mod.rs new file mode 100644 index 0000000000..015d3b27af --- /dev/null +++ b/third_party/rust/memchr/src/memmem/prefilter/mod.rs @@ -0,0 +1,570 @@ +use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo}; + +mod fallback; +#[cfg(memchr_runtime_simd)] +mod genericsimd; +#[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))] +mod wasm; +#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] +mod x86; + +/// The maximum frequency rank permitted for the fallback prefilter. If the +/// rarest byte in the needle has a frequency rank above this value, then no +/// prefilter is used if the fallback prefilter would otherwise be selected. +const MAX_FALLBACK_RANK: usize = 250; + +/// A combination of prefilter effectiveness state, the prefilter function and +/// the needle info required to run a prefilter. +/// +/// For the most part, these are grouped into a single type for convenience, +/// instead of needing to pass around all three as distinct function +/// parameters. +pub(crate) struct Pre<'a> { + /// State that tracks the effectiveness of a prefilter. + pub(crate) state: &'a mut PrefilterState, + /// The actual prefilter function. + pub(crate) prefn: PrefilterFn, + /// Information about a needle, such as its RK hash and rare byte offsets. + pub(crate) ninfo: &'a NeedleInfo, +} + +impl<'a> Pre<'a> { + /// Call this prefilter on the given haystack with the given needle. + #[inline(always)] + pub(crate) fn call( + &mut self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + self.prefn.call(self.state, self.ninfo, haystack, needle) + } + + /// Return true if and only if this prefilter should be used. + #[inline(always)] + pub(crate) fn should_call(&mut self) -> bool { + self.state.is_effective() + } +} + +/// A prefilter function. +/// +/// A prefilter function describes both forward and reverse searches. +/// (Although, we don't currently implement prefilters for reverse searching.) +/// In the case of a forward search, the position returned corresponds to +/// the starting offset of a match (confirmed or possible). Its minimum +/// value is `0`, and its maximum value is `haystack.len() - 1`. In the case +/// of a reverse search, the position returned corresponds to the position +/// immediately after a match (confirmed or possible). Its minimum value is `1` +/// and its maximum value is `haystack.len()`. +/// +/// In both cases, the position returned is the starting (or ending) point of a +/// _possible_ match. That is, returning a false positive is okay. A prefilter, +/// however, must never return any false negatives. That is, if a match exists +/// at a particular position `i`, then a prefilter _must_ return that position. +/// It cannot skip past it. +/// +/// # Safety +/// +/// A prefilter function is not safe to create, since not all prefilters are +/// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions +/// may only be called on x86_64 CPUs with the relevant AVX feature enabled.) +/// Thus, callers must ensure that when a prefilter function is created that it +/// is safe to call for the current environment. +#[derive(Clone, Copy)] +pub(crate) struct PrefilterFn(PrefilterFnTy); + +/// The type of a prefilter function. All prefilters must satisfy this +/// signature. +/// +/// Using a function pointer like this does inhibit inlining, but it does +/// eliminate branching and the extra costs associated with copying a larger +/// enum. Note also, that using Box<dyn SomePrefilterTrait> can't really work +/// here, since we want to work in contexts that don't have dynamic memory +/// allocation. Moreover, in the default configuration of this crate on x86_64 +/// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter, +/// which generally won't be inlineable into the surrounding code anyway. +/// (Unless AVX2 is enabled at compile time, but this is typically rare, since +/// it produces a non-portable binary.) +pub(crate) type PrefilterFnTy = unsafe fn( + prestate: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], +) -> Option<usize>; + +// If the haystack is too small for SSE2, then just run memchr on the +// rarest byte and be done with it. (It is likely that this code path is +// rarely exercised, since a higher level routine will probably dispatch to +// Rabin-Karp for such a small haystack.) +#[cfg(memchr_runtime_simd)] +fn simple_memchr_fallback( + _prestate: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let (rare, _) = ninfo.rarebytes.as_rare_ordered_usize(); + crate::memchr(needle[rare], haystack).map(|i| i.saturating_sub(rare)) +} + +impl PrefilterFn { + /// Create a new prefilter function from the function pointer given. + /// + /// # Safety + /// + /// Callers must ensure that the given prefilter function is safe to call + /// for all inputs in the current environment. For example, if the given + /// prefilter function uses AVX instructions, then the caller must ensure + /// that the appropriate AVX CPU features are enabled. + pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn { + PrefilterFn(prefn) + } + + /// Call the underlying prefilter function with the given arguments. + pub fn call( + self, + prestate: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + // SAFETY: Callers have the burden of ensuring that a prefilter + // function is safe to call for all inputs in the current environment. + unsafe { (self.0)(prestate, ninfo, haystack, needle) } + } +} + +impl core::fmt::Debug for PrefilterFn { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + "<prefilter-fn(...)>".fmt(f) + } +} + +/// Prefilter controls whether heuristics are used to accelerate searching. +/// +/// A prefilter refers to the idea of detecting candidate matches very quickly, +/// and then confirming whether those candidates are full matches. This +/// idea can be quite effective since it's often the case that looking for +/// candidates can be a lot faster than running a complete substring search +/// over the entire input. Namely, looking for candidates can be done with +/// extremely fast vectorized code. +/// +/// The downside of a prefilter is that it assumes false positives (which are +/// candidates generated by a prefilter that aren't matches) are somewhat rare +/// relative to the frequency of full matches. That is, if a lot of false +/// positives are generated, then it's possible for search time to be worse +/// than if the prefilter wasn't enabled in the first place. +/// +/// Another downside of a prefilter is that it can result in highly variable +/// performance, where some cases are extraordinarily fast and others aren't. +/// Typically, variable performance isn't a problem, but it may be for your use +/// case. +/// +/// The use of prefilters in this implementation does use a heuristic to detect +/// when a prefilter might not be carrying its weight, and will dynamically +/// disable its use. Nevertheless, this configuration option gives callers +/// the ability to disable prefilters if you have knowledge that they won't be +/// useful. +#[derive(Clone, Copy, Debug)] +#[non_exhaustive] +pub enum Prefilter { + /// Never used a prefilter in substring search. + None, + /// Automatically detect whether a heuristic prefilter should be used. If + /// it is used, then heuristics will be used to dynamically disable the + /// prefilter if it is believed to not be carrying its weight. + Auto, +} + +impl Default for Prefilter { + fn default() -> Prefilter { + Prefilter::Auto + } +} + +impl Prefilter { + pub(crate) fn is_none(&self) -> bool { + match *self { + Prefilter::None => true, + _ => false, + } + } +} + +/// PrefilterState tracks state associated with the effectiveness of a +/// prefilter. It is used to track how many bytes, on average, are skipped by +/// the prefilter. If this average dips below a certain threshold over time, +/// then the state renders the prefilter inert and stops using it. +/// +/// A prefilter state should be created for each search. (Where creating an +/// iterator is treated as a single search.) A prefilter state should only be +/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert +/// `PrefilterState`. +#[derive(Clone, Debug)] +pub(crate) struct PrefilterState { + /// The number of skips that has been executed. This is always 1 greater + /// than the actual number of skips. The special sentinel value of 0 + /// indicates that the prefilter is inert. This is useful to avoid + /// additional checks to determine whether the prefilter is still + /// "effective." Once a prefilter becomes inert, it should no longer be + /// used (according to our heuristics). + skips: u32, + /// The total number of bytes that have been skipped. + skipped: u32, +} + +impl PrefilterState { + /// The minimum number of skip attempts to try before considering whether + /// a prefilter is effective or not. + const MIN_SKIPS: u32 = 50; + + /// The minimum amount of bytes that skipping must average. + /// + /// This value was chosen based on varying it and checking + /// the microbenchmarks. In particular, this can impact the + /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set + /// too low. + const MIN_SKIP_BYTES: u32 = 8; + + /// Create a fresh prefilter state. + pub(crate) fn new() -> PrefilterState { + PrefilterState { skips: 1, skipped: 0 } + } + + /// Create a fresh prefilter state that is always inert. + pub(crate) fn inert() -> PrefilterState { + PrefilterState { skips: 0, skipped: 0 } + } + + /// Update this state with the number of bytes skipped on the last + /// invocation of the prefilter. + #[inline] + pub(crate) fn update(&mut self, skipped: usize) { + self.skips = self.skips.saturating_add(1); + // We need to do this dance since it's technically possible for + // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the + // size of a prefilter state.) + if skipped > core::u32::MAX as usize { + self.skipped = core::u32::MAX; + } else { + self.skipped = self.skipped.saturating_add(skipped as u32); + } + } + + /// Return true if and only if this state indicates that a prefilter is + /// still effective. + #[inline] + pub(crate) fn is_effective(&mut self) -> bool { + if self.is_inert() { + return false; + } + if self.skips() < PrefilterState::MIN_SKIPS { + return true; + } + if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { + return true; + } + + // We're inert. + self.skips = 0; + false + } + + #[inline] + fn is_inert(&self) -> bool { + self.skips == 0 + } + + #[inline] + fn skips(&self) -> u32 { + self.skips.saturating_sub(1) + } +} + +/// Determine which prefilter function, if any, to use. +/// +/// This only applies to x86_64 when runtime SIMD detection is enabled (which +/// is the default). In general, we try to use an AVX prefilter, followed by +/// SSE and then followed by a generic one based on memchr. +#[inline(always)] +pub(crate) fn forward( + config: &Prefilter, + rare: &RareNeedleBytes, + needle: &[u8], +) -> Option<PrefilterFn> { + if config.is_none() || needle.len() <= 1 { + return None; + } + + #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] + { + #[cfg(feature = "std")] + { + if cfg!(memchr_runtime_avx) { + if is_x86_feature_detected!("avx2") { + // SAFETY: x86::avx::find only requires the avx2 feature, + // which we've just checked above. + return unsafe { Some(PrefilterFn::new(x86::avx::find)) }; + } + } + } + if cfg!(memchr_runtime_sse2) { + // SAFETY: x86::sse::find only requires the sse2 feature, which is + // guaranteed to be available on x86_64. + return unsafe { Some(PrefilterFn::new(x86::sse::find)) }; + } + } + #[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))] + { + // SAFETY: `wasm::find` is actually a safe function + // + // Also note that the `if true` is here to prevent, on wasm with simd, + // rustc warning about the code below being dead code. + if true { + return unsafe { Some(PrefilterFn::new(wasm::find)) }; + } + } + // Check that our rarest byte has a reasonably low rank. The main issue + // here is that the fallback prefilter can perform pretty poorly if it's + // given common bytes. So we try to avoid the worst cases here. + let (rare1_rank, _) = rare.as_ranks(needle); + if rare1_rank <= MAX_FALLBACK_RANK { + // SAFETY: fallback::find is safe to call in all environments. + return unsafe { Some(PrefilterFn::new(fallback::find)) }; + } + None +} + +/// Return the minimum length of the haystack in which a prefilter should be +/// used. If the haystack is below this length, then it's probably not worth +/// the overhead of running the prefilter. +/// +/// We used to look at the length of a haystack here. That is, if it was too +/// small, then don't bother with the prefilter. But two things changed: +/// the prefilter falls back to memchr for small haystacks, and, at the +/// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway. +/// +/// We keep it around for now in case we want to bring it back. +#[allow(dead_code)] +pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize { + // If the haystack length isn't greater than needle.len() * FACTOR, then + // no prefilter will be used. The presumption here is that since there + // are so few bytes to check, it's not worth running the prefilter since + // there will need to be a validation step anyway. Thus, the prefilter is + // largely redundant work. + // + // Increase the factor noticeably hurts the + // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks. + const PREFILTER_LENGTH_FACTOR: usize = 2; + const VECTOR_MIN_LENGTH: usize = 16; + let min = core::cmp::max( + VECTOR_MIN_LENGTH, + PREFILTER_LENGTH_FACTOR * needle.len(), + ); + // For haystacks with length==min, we still want to avoid the prefilter, + // so add 1. + min + 1 +} + +#[cfg(all(test, feature = "std", not(miri)))] +pub(crate) mod tests { + use std::convert::{TryFrom, TryInto}; + + use super::*; + use crate::memmem::{ + prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes, + }; + + // Below is a small jig that generates prefilter tests. The main purpose + // of this jig is to generate tests of varying needle/haystack lengths + // in order to try and exercise all code paths in our prefilters. And in + // particular, this is especially important for vectorized prefilters where + // certain code paths might only be exercised at certain lengths. + + /// A test that represents the input and expected output to a prefilter + /// function. The test should be able to run with any prefilter function + /// and get the expected output. + pub(crate) struct PrefilterTest { + // These fields represent the inputs and expected output of a forwards + // prefilter function. + pub(crate) ninfo: NeedleInfo, + pub(crate) haystack: Vec<u8>, + pub(crate) needle: Vec<u8>, + pub(crate) output: Option<usize>, + } + + impl PrefilterTest { + /// Run all generated forward prefilter tests on the given prefn. + /// + /// # Safety + /// + /// Callers must ensure that the given prefilter function pointer is + /// safe to call for all inputs in the current environment. + pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) { + PrefilterTest::run_all_tests_filter(prefn, |_| true) + } + + /// Run all generated forward prefilter tests that pass the given + /// predicate on the given prefn. + /// + /// # Safety + /// + /// Callers must ensure that the given prefilter function pointer is + /// safe to call for all inputs in the current environment. + pub(crate) unsafe fn run_all_tests_filter( + prefn: PrefilterFnTy, + mut predicate: impl FnMut(&PrefilterTest) -> bool, + ) { + for seed in PREFILTER_TEST_SEEDS { + for test in seed.generate() { + if predicate(&test) { + test.run(prefn); + } + } + } + } + + /// Create a new prefilter test from a seed and some chose offsets to + /// rare bytes in the seed's needle. + /// + /// If a valid test could not be constructed, then None is returned. + /// (Currently, we take the approach of massaging tests to be valid + /// instead of rejecting them outright.) + fn new( + seed: PrefilterTestSeed, + rare1i: usize, + rare2i: usize, + haystack_len: usize, + needle_len: usize, + output: Option<usize>, + ) -> Option<PrefilterTest> { + let mut rare1i: u8 = rare1i.try_into().unwrap(); + let mut rare2i: u8 = rare2i.try_into().unwrap(); + // The '#' byte is never used in a haystack (unless we're expecting + // a match), while the '@' byte is never used in a needle. + let mut haystack = vec![b'@'; haystack_len]; + let mut needle = vec![b'#'; needle_len]; + needle[0] = seed.first; + needle[rare1i as usize] = seed.rare1; + needle[rare2i as usize] = seed.rare2; + // If we're expecting a match, then make sure the needle occurs + // in the haystack at the expected position. + if let Some(i) = output { + haystack[i..i + needle.len()].copy_from_slice(&needle); + } + // If the operations above lead to rare offsets pointing to the + // non-first occurrence of a byte, then adjust it. This might lead + // to redundant tests, but it's simpler than trying to change the + // generation process I think. + if let Some(i) = crate::memchr(seed.rare1, &needle) { + rare1i = u8::try_from(i).unwrap(); + } + if let Some(i) = crate::memchr(seed.rare2, &needle) { + rare2i = u8::try_from(i).unwrap(); + } + let ninfo = NeedleInfo { + rarebytes: RareNeedleBytes::new(rare1i, rare2i), + nhash: rabinkarp::NeedleHash::forward(&needle), + }; + Some(PrefilterTest { ninfo, haystack, needle, output }) + } + + /// Run this specific test on the given prefilter function. If the + /// outputs do no match, then this routine panics with a failure + /// message. + /// + /// # Safety + /// + /// Callers must ensure that the given prefilter function pointer is + /// safe to call for all inputs in the current environment. + unsafe fn run(&self, prefn: PrefilterFnTy) { + let mut prestate = PrefilterState::new(); + assert_eq!( + self.output, + prefn( + &mut prestate, + &self.ninfo, + &self.haystack, + &self.needle + ), + "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}", + self.ninfo, + self.haystack.len(), + std::str::from_utf8(&self.haystack).unwrap(), + self.needle.len(), + std::str::from_utf8(&self.needle).unwrap(), + ); + } + } + + /// A set of prefilter test seeds. Each seed serves as the base for the + /// generation of many other tests. In essence, the seed captures the + /// "rare" and first bytes among our needle. The tests generated from each + /// seed essentially vary the length of the needle and haystack, while + /// using the rare/first byte configuration from the seed. + /// + /// The purpose of this is to test many different needle/haystack lengths. + /// In particular, some of the vector optimizations might only have bugs + /// in haystacks of a certain size. + const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[ + PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' }, + PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' }, + PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' }, + PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' }, + PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' }, + ]; + + /// Data that describes a single prefilter test seed. + #[derive(Clone, Copy)] + struct PrefilterTestSeed { + first: u8, + rare1: u8, + rare2: u8, + } + + impl PrefilterTestSeed { + /// Generate a series of prefilter tests from this seed. + fn generate(self) -> impl Iterator<Item = PrefilterTest> { + let len_start = 2; + // The iterator below generates *a lot* of tests. The number of + // tests was chosen somewhat empirically to be "bearable" when + // running the test suite. + // + // We use an iterator here because the collective haystacks of all + // these test cases add up to enough memory to OOM a conservative + // sandbox or a small laptop. + (len_start..=40).flat_map(move |needle_len| { + let rare_start = len_start - 1; + (rare_start..needle_len).flat_map(move |rare1i| { + (rare1i..needle_len).flat_map(move |rare2i| { + (needle_len..=66).flat_map(move |haystack_len| { + PrefilterTest::new( + self, + rare1i, + rare2i, + haystack_len, + needle_len, + None, + ) + .into_iter() + .chain( + (0..=(haystack_len - needle_len)).flat_map( + move |output| { + PrefilterTest::new( + self, + rare1i, + rare2i, + haystack_len, + needle_len, + Some(output), + ) + }, + ), + ) + }) + }) + }) + }) + } + } +} diff --git a/third_party/rust/memchr/src/memmem/prefilter/wasm.rs b/third_party/rust/memchr/src/memmem/prefilter/wasm.rs new file mode 100644 index 0000000000..5470c922a3 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/prefilter/wasm.rs @@ -0,0 +1,39 @@ +use core::arch::wasm32::v128; + +use crate::memmem::{ + prefilter::{PrefilterFnTy, PrefilterState}, + NeedleInfo, +}; + +// Check that the functions below satisfy the Prefilter function type. +const _: PrefilterFnTy = find; + +/// A `v128`-accelerated candidate finder for single-substring search. +#[target_feature(enable = "simd128")] +pub(crate) fn find( + prestate: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + unsafe { + super::genericsimd::find::<v128>( + prestate, + ninfo, + haystack, + needle, + super::simple_memchr_fallback, + ) + } +} + +#[cfg(all(test, feature = "std"))] +mod tests { + #[test] + #[cfg(not(miri))] + fn prefilter_permutations() { + use crate::memmem::prefilter::tests::PrefilterTest; + // SAFETY: super::find is safe to call for all inputs on x86. + unsafe { PrefilterTest::run_all_tests(super::find) }; + } +} diff --git a/third_party/rust/memchr/src/memmem/prefilter/x86/avx.rs b/third_party/rust/memchr/src/memmem/prefilter/x86/avx.rs new file mode 100644 index 0000000000..fb11f335ba --- /dev/null +++ b/third_party/rust/memchr/src/memmem/prefilter/x86/avx.rs @@ -0,0 +1,46 @@ +use core::arch::x86_64::__m256i; + +use crate::memmem::{ + prefilter::{PrefilterFnTy, PrefilterState}, + NeedleInfo, +}; + +// Check that the functions below satisfy the Prefilter function type. +const _: PrefilterFnTy = find; + +/// An AVX2 accelerated candidate finder for single-substring search. +/// +/// # Safety +/// +/// Callers must ensure that the avx2 CPU feature is enabled in the current +/// environment. +#[target_feature(enable = "avx2")] +pub(crate) unsafe fn find( + prestate: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + super::super::genericsimd::find::<__m256i>( + prestate, + ninfo, + haystack, + needle, + super::sse::find, + ) +} + +#[cfg(test)] +mod tests { + #[test] + #[cfg(not(miri))] + fn prefilter_permutations() { + use crate::memmem::prefilter::tests::PrefilterTest; + if !is_x86_feature_detected!("avx2") { + return; + } + // SAFETY: The safety of super::find only requires that the current + // CPU support AVX2, which we checked above. + unsafe { PrefilterTest::run_all_tests(super::find) }; + } +} diff --git a/third_party/rust/memchr/src/memmem/prefilter/x86/mod.rs b/third_party/rust/memchr/src/memmem/prefilter/x86/mod.rs new file mode 100644 index 0000000000..91381e5162 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/prefilter/x86/mod.rs @@ -0,0 +1,5 @@ +// We only use AVX when we can detect at runtime whether it's available, which +// requires std. +#[cfg(feature = "std")] +pub(crate) mod avx; +pub(crate) mod sse; diff --git a/third_party/rust/memchr/src/memmem/prefilter/x86/sse.rs b/third_party/rust/memchr/src/memmem/prefilter/x86/sse.rs new file mode 100644 index 0000000000..b1c48e1e1c --- /dev/null +++ b/third_party/rust/memchr/src/memmem/prefilter/x86/sse.rs @@ -0,0 +1,42 @@ +use core::arch::x86_64::__m128i; + +use crate::memmem::{ + prefilter::{PrefilterFnTy, PrefilterState}, + NeedleInfo, +}; + +// Check that the functions below satisfy the Prefilter function type. +const _: PrefilterFnTy = find; + +/// An SSE2 accelerated candidate finder for single-substring search. +/// +/// # Safety +/// +/// Callers must ensure that the sse2 CPU feature is enabled in the current +/// environment. This feature should be enabled in all x86_64 targets. +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn find( + prestate: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + super::super::genericsimd::find::<__m128i>( + prestate, + ninfo, + haystack, + needle, + super::super::simple_memchr_fallback, + ) +} + +#[cfg(all(test, feature = "std"))] +mod tests { + #[test] + #[cfg(not(miri))] + fn prefilter_permutations() { + use crate::memmem::prefilter::tests::PrefilterTest; + // SAFETY: super::find is safe to call for all inputs on x86. + unsafe { PrefilterTest::run_all_tests(super::find) }; + } +} diff --git a/third_party/rust/memchr/src/memmem/rabinkarp.rs b/third_party/rust/memchr/src/memmem/rabinkarp.rs new file mode 100644 index 0000000000..daa4015d5f --- /dev/null +++ b/third_party/rust/memchr/src/memmem/rabinkarp.rs @@ -0,0 +1,233 @@ +/* +This module implements the classical Rabin-Karp substring search algorithm, +with no extra frills. While its use would seem to break our time complexity +guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only +ever use RK on a constant subset of haystacks. The main point here is that +RK has good latency properties for small needles/haystacks. It's very quick +to compute a needle hash and zip through the haystack when compared to +initializing Two-Way, for example. And this is especially useful for cases +where the haystack is just too short for vector instructions to do much good. + +The hashing function used here is the same one recommended by ESMAJ. + +Another choice instead of Rabin-Karp would be Shift-Or. But its latency +isn't quite as good since its preprocessing time is a bit more expensive +(both in practice and in theory). However, perhaps Shift-Or has a place +somewhere else for short patterns. I think the main problem is that it +requires space proportional to the alphabet and the needle. If we, for +example, supported needles up to length 16, then the total table size would be +len(alphabet)*size_of::<u16>()==512 bytes. Which isn't exactly small, and it's +probably bad to put that on the stack. So ideally, we'd throw it on the heap, +but we'd really like to write as much code without using alloc/std as possible. +But maybe it's worth the special casing. It's a TODO to benchmark. + +Wikipedia has a decent explanation, if a bit heavy on the theory: +https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm + +But ESMAJ provides something a bit more concrete: +http://www-igm.univ-mlv.fr/~lecroq/string/node5.html + +Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases: +https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs +*/ + +/// Whether RK is believed to be very fast for the given needle/haystack. +pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool { + haystack.len() < 16 +} + +/// Search for the first occurrence of needle in haystack using Rabin-Karp. +pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { + find_with(&NeedleHash::forward(needle), haystack, needle) +} + +/// Search for the first occurrence of needle in haystack using Rabin-Karp with +/// a pre-computed needle hash. +pub(crate) fn find_with( + nhash: &NeedleHash, + mut haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + if haystack.len() < needle.len() { + return None; + } + let start = haystack.as_ptr() as usize; + let mut hash = Hash::from_bytes_fwd(&haystack[..needle.len()]); + // N.B. I've experimented with unrolling this loop, but couldn't realize + // any obvious gains. + loop { + if nhash.eq(hash) && is_prefix(haystack, needle) { + return Some(haystack.as_ptr() as usize - start); + } + if needle.len() >= haystack.len() { + return None; + } + hash.roll(&nhash, haystack[0], haystack[needle.len()]); + haystack = &haystack[1..]; + } +} + +/// Search for the last occurrence of needle in haystack using Rabin-Karp. +pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { + rfind_with(&NeedleHash::reverse(needle), haystack, needle) +} + +/// Search for the last occurrence of needle in haystack using Rabin-Karp with +/// a pre-computed needle hash. +pub(crate) fn rfind_with( + nhash: &NeedleHash, + mut haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + if haystack.len() < needle.len() { + return None; + } + let mut hash = + Hash::from_bytes_rev(&haystack[haystack.len() - needle.len()..]); + loop { + if nhash.eq(hash) && is_suffix(haystack, needle) { + return Some(haystack.len() - needle.len()); + } + if needle.len() >= haystack.len() { + return None; + } + hash.roll( + &nhash, + haystack[haystack.len() - 1], + haystack[haystack.len() - needle.len() - 1], + ); + haystack = &haystack[..haystack.len() - 1]; + } +} + +/// A hash derived from a needle. +#[derive(Clone, Copy, Debug, Default)] +pub(crate) struct NeedleHash { + /// The actual hash. + hash: Hash, + /// The factor needed to multiply a byte by in order to subtract it from + /// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation), + /// where n is the length of the needle. This is how we "remove" a byte + /// from the hash once the hash window rolls past it. + hash_2pow: u32, +} + +impl NeedleHash { + /// Create a new Rabin-Karp hash for the given needle for use in forward + /// searching. + pub(crate) fn forward(needle: &[u8]) -> NeedleHash { + let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; + if needle.is_empty() { + return nh; + } + nh.hash.add(needle[0]); + for &b in needle.iter().skip(1) { + nh.hash.add(b); + nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); + } + nh + } + + /// Create a new Rabin-Karp hash for the given needle for use in reverse + /// searching. + pub(crate) fn reverse(needle: &[u8]) -> NeedleHash { + let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; + if needle.is_empty() { + return nh; + } + nh.hash.add(needle[needle.len() - 1]); + for &b in needle.iter().rev().skip(1) { + nh.hash.add(b); + nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); + } + nh + } + + /// Return true if the hashes are equivalent. + fn eq(&self, hash: Hash) -> bool { + self.hash == hash + } +} + +/// A Rabin-Karp hash. This might represent the hash of a needle, or the hash +/// of a rolling window in the haystack. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct Hash(u32); + +impl Hash { + /// Create a new hash that represents the empty string. + pub(crate) fn new() -> Hash { + Hash(0) + } + + /// Create a new hash from the bytes given for use in forward searches. + pub(crate) fn from_bytes_fwd(bytes: &[u8]) -> Hash { + let mut hash = Hash::new(); + for &b in bytes { + hash.add(b); + } + hash + } + + /// Create a new hash from the bytes given for use in reverse searches. + fn from_bytes_rev(bytes: &[u8]) -> Hash { + let mut hash = Hash::new(); + for &b in bytes.iter().rev() { + hash.add(b); + } + hash + } + + /// Add 'new' and remove 'old' from this hash. The given needle hash should + /// correspond to the hash computed for the needle being searched for. + /// + /// This is meant to be used when the rolling window of the haystack is + /// advanced. + fn roll(&mut self, nhash: &NeedleHash, old: u8, new: u8) { + self.del(nhash, old); + self.add(new); + } + + /// Add a byte to this hash. + fn add(&mut self, byte: u8) { + self.0 = self.0.wrapping_shl(1).wrapping_add(byte as u32); + } + + /// Remove a byte from this hash. The given needle hash should correspond + /// to the hash computed for the needle being searched for. + fn del(&mut self, nhash: &NeedleHash, byte: u8) { + let factor = nhash.hash_2pow; + self.0 = self.0.wrapping_sub((byte as u32).wrapping_mul(factor)); + } +} + +/// Returns true if the given needle is a prefix of the given haystack. +/// +/// We forcefully don't inline the is_prefix call and hint at the compiler that +/// it is unlikely to be called. This causes the inner rabinkarp loop above +/// to be a bit tighter and leads to some performance improvement. See the +/// memmem/krate/prebuilt/sliceslice-words/words benchmark. +#[cold] +#[inline(never)] +fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { + crate::memmem::util::is_prefix(haystack, needle) +} + +/// Returns true if the given needle is a suffix of the given haystack. +/// +/// See is_prefix for why this is forcefully not inlined. +#[cold] +#[inline(never)] +fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { + crate::memmem::util::is_suffix(haystack, needle) +} + +#[cfg(test)] +mod simpletests { + define_memmem_simple_tests!(super::find, super::rfind); +} + +#[cfg(all(test, feature = "std", not(miri)))] +mod proptests { + define_memmem_quickcheck_tests!(super::find, super::rfind); +} diff --git a/third_party/rust/memchr/src/memmem/rarebytes.rs b/third_party/rust/memchr/src/memmem/rarebytes.rs new file mode 100644 index 0000000000..fb33f68945 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/rarebytes.rs @@ -0,0 +1,136 @@ +/// A heuristic frequency based detection of rare bytes for substring search. +/// +/// This detector attempts to pick out two bytes in a needle that are predicted +/// to occur least frequently. The purpose is to use these bytes to implement +/// fast candidate search using vectorized code. +/// +/// A set of offsets is only computed for needles of length 2 or greater. +/// Smaller needles should be special cased by the substring search algorithm +/// in use. (e.g., Use memchr for single byte needles.) +/// +/// Note that we use `u8` to represent the offsets of the rare bytes in a +/// needle to reduce space usage. This means that rare byte occurring after the +/// first 255 bytes in a needle will never be used. +#[derive(Clone, Copy, Debug, Default)] +pub(crate) struct RareNeedleBytes { + /// The leftmost offset of the rarest byte in the needle, according to + /// pre-computed frequency analysis. The "leftmost offset" means that + /// rare1i <= i for all i where needle[i] == needle[rare1i]. + rare1i: u8, + /// The leftmost offset of the second rarest byte in the needle, according + /// to pre-computed frequency analysis. The "leftmost offset" means that + /// rare2i <= i for all i where needle[i] == needle[rare2i]. + /// + /// The second rarest byte is used as a type of guard for quickly detecting + /// a mismatch if the first byte matches. This is a hedge against + /// pathological cases where the pre-computed frequency analysis may be + /// off. (But of course, does not prevent *all* pathological cases.) + /// + /// In general, rare1i != rare2i by construction, although there is no hard + /// requirement that they be different. However, since the case of a single + /// byte needle is handled specially by memchr itself, rare2i generally + /// always should be different from rare1i since it would otherwise be + /// ineffective as a guard. + rare2i: u8, +} + +impl RareNeedleBytes { + /// Create a new pair of rare needle bytes with the given offsets. This is + /// only used in tests for generating input data. + #[cfg(all(test, feature = "std"))] + pub(crate) fn new(rare1i: u8, rare2i: u8) -> RareNeedleBytes { + RareNeedleBytes { rare1i, rare2i } + } + + /// Detect the leftmost offsets of the two rarest bytes in the given + /// needle. + pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes { + if needle.len() <= 1 || needle.len() > core::u8::MAX as usize { + // For needles bigger than u8::MAX, our offsets aren't big enough. + // (We make our offsets small to reduce stack copying.) + // If you have a use case for it, please file an issue. In that + // case, we should probably just adjust the routine below to pick + // some rare bytes from the first 255 bytes of the needle. + // + // Also note that for needles of size 0 or 1, they are special + // cased in Two-Way. + // + // TODO: Benchmar this. + return RareNeedleBytes { rare1i: 0, rare2i: 0 }; + } + + // Find the rarest two bytes. We make them distinct by construction. + let (mut rare1, mut rare1i) = (needle[0], 0); + let (mut rare2, mut rare2i) = (needle[1], 1); + if rank(rare2) < rank(rare1) { + core::mem::swap(&mut rare1, &mut rare2); + core::mem::swap(&mut rare1i, &mut rare2i); + } + for (i, &b) in needle.iter().enumerate().skip(2) { + if rank(b) < rank(rare1) { + rare2 = rare1; + rare2i = rare1i; + rare1 = b; + rare1i = i as u8; + } else if b != rare1 && rank(b) < rank(rare2) { + rare2 = b; + rare2i = i as u8; + } + } + // While not strictly required, we really don't want these to be + // equivalent. If they were, it would reduce the effectiveness of + // candidate searching using these rare bytes by increasing the rate of + // false positives. + assert_ne!(rare1i, rare2i); + RareNeedleBytes { rare1i, rare2i } + } + + /// Return the rare bytes in the given needle in the forward direction. + /// The needle given must be the same one given to the RareNeedleBytes + /// constructor. + pub(crate) fn as_rare_bytes(&self, needle: &[u8]) -> (u8, u8) { + (needle[self.rare1i as usize], needle[self.rare2i as usize]) + } + + /// Return the rare offsets such that the first offset is always <= to the + /// second offset. This is useful when the caller doesn't care whether + /// rare1 is rarer than rare2, but just wants to ensure that they are + /// ordered with respect to one another. + #[cfg(memchr_runtime_simd)] + pub(crate) fn as_rare_ordered_usize(&self) -> (usize, usize) { + let (rare1i, rare2i) = self.as_rare_ordered_u8(); + (rare1i as usize, rare2i as usize) + } + + /// Like as_rare_ordered_usize, but returns the offsets as their native + /// u8 values. + #[cfg(memchr_runtime_simd)] + pub(crate) fn as_rare_ordered_u8(&self) -> (u8, u8) { + if self.rare1i <= self.rare2i { + (self.rare1i, self.rare2i) + } else { + (self.rare2i, self.rare1i) + } + } + + /// Return the rare offsets as usize values in the order in which they were + /// constructed. rare1, for example, is constructed as the "rarer" byte, + /// and thus, callers may want to treat it differently from rare2. + pub(crate) fn as_rare_usize(&self) -> (usize, usize) { + (self.rare1i as usize, self.rare2i as usize) + } + + /// Return the byte frequency rank of each byte. The higher the rank, the + /// more frequency the byte is predicted to be. The needle given must be + /// the same one given to the RareNeedleBytes constructor. + pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) { + let (b1, b2) = self.as_rare_bytes(needle); + (rank(b1), rank(b2)) + } +} + +/// Return the heuristical frequency rank of the given byte. A lower rank +/// means the byte is believed to occur less frequently. +fn rank(b: u8) -> usize { + crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize +} diff --git a/third_party/rust/memchr/src/memmem/twoway.rs b/third_party/rust/memchr/src/memmem/twoway.rs new file mode 100644 index 0000000000..7f82ed15d9 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/twoway.rs @@ -0,0 +1,878 @@ +use core::cmp; + +use crate::memmem::{prefilter::Pre, util}; + +/// Two-Way search in the forward direction. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Forward(TwoWay); + +/// Two-Way search in the reverse direction. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Reverse(TwoWay); + +/// An implementation of the TwoWay substring search algorithm, with heuristics +/// for accelerating search based on frequency analysis. +/// +/// This searcher supports forward and reverse search, although not +/// simultaneously. It runs in O(n + m) time and O(1) space, where +/// `n ~ len(needle)` and `m ~ len(haystack)`. +/// +/// The implementation here roughly matches that which was developed by +/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The +/// changes in this implementation are 1) the use of zero-based indices, 2) a +/// heuristic skip table based on the last byte (borrowed from Rust's standard +/// library) and 3) the addition of heuristics for a fast skip loop. That is, +/// (3) this will detect bytes that are believed to be rare in the needle and +/// use fast vectorized instructions to find their occurrences quickly. The +/// Two-Way algorithm is then used to confirm whether a match at that location +/// occurred. +/// +/// The heuristic for fast skipping is automatically shut off if it's +/// detected to be ineffective at search time. Generally, this only occurs in +/// pathological cases. But this is generally necessary in order to preserve +/// a `O(n + m)` time bound. +/// +/// The code below is fairly complex and not obviously correct at all. It's +/// likely necessary to read the Two-Way paper cited above in order to fully +/// grok this code. The essence of it is: +/// +/// 1) Do something to detect a "critical" position in the needle. +/// 2) For the current position in the haystack, look if needle[critical..] +/// matches at that position. +/// 3) If so, look if needle[..critical] matches. +/// 4) If a mismatch occurs, shift the search by some amount based on the +/// critical position and a pre-computed shift. +/// +/// This type is wrapped in Forward and Reverse types that expose consistent +/// forward or reverse APIs. +#[derive(Clone, Copy, Debug)] +struct TwoWay { + /// A small bitset used as a quick prefilter (in addition to the faster + /// SIMD based prefilter). Namely, a bit 'i' is set if and only if b%64==i + /// for any b in the needle. + /// + /// When used as a prefilter, if the last byte at the current candidate + /// position is NOT in this set, then we can skip that entire candidate + /// position (the length of the needle). This is essentially the shift + /// trick found in Boyer-Moore, but only applied to bytes that don't appear + /// in the needle. + /// + /// N.B. This trick was inspired by something similar in std's + /// implementation of Two-Way. + byteset: ApproximateByteSet, + /// A critical position in needle. Specifically, this position corresponds + /// to beginning of either the minimal or maximal suffix in needle. (N.B. + /// See SuffixType below for why "minimal" isn't quite the correct word + /// here.) + /// + /// This is the position at which every search begins. Namely, search + /// starts by scanning text to the right of this position, and only if + /// there's a match does the text to the left of this position get scanned. + critical_pos: usize, + /// The amount we shift by in the Two-Way search algorithm. This + /// corresponds to the "small period" and "large period" cases. + shift: Shift, +} + +impl Forward { + /// Create a searcher that uses the Two-Way algorithm by searching forwards + /// through any haystack. + pub(crate) fn new(needle: &[u8]) -> Forward { + if needle.is_empty() { + return Forward(TwoWay::empty()); + } + + let byteset = ApproximateByteSet::new(needle); + let min_suffix = Suffix::forward(needle, SuffixKind::Minimal); + let max_suffix = Suffix::forward(needle, SuffixKind::Maximal); + let (period_lower_bound, critical_pos) = + if min_suffix.pos > max_suffix.pos { + (min_suffix.period, min_suffix.pos) + } else { + (max_suffix.period, max_suffix.pos) + }; + let shift = Shift::forward(needle, period_lower_bound, critical_pos); + Forward(TwoWay { byteset, critical_pos, shift }) + } + + /// Find the position of the first occurrence of this searcher's needle in + /// the given haystack. If one does not exist, then return None. + /// + /// This accepts prefilter state that is useful when using the same + /// searcher multiple times, such as in an iterator. + /// + /// Callers must guarantee that the needle is non-empty and its length is + /// <= the haystack's length. + #[inline(always)] + pub(crate) fn find( + &self, + pre: Option<&mut Pre<'_>>, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + debug_assert!(!needle.is_empty(), "needle should not be empty"); + debug_assert!(needle.len() <= haystack.len(), "haystack too short"); + + match self.0.shift { + Shift::Small { period } => { + self.find_small_imp(pre, haystack, needle, period) + } + Shift::Large { shift } => { + self.find_large_imp(pre, haystack, needle, shift) + } + } + } + + /// Like find, but handles the degenerate substring test cases. This is + /// only useful for conveniently testing this substring implementation in + /// isolation. + #[cfg(test)] + fn find_general( + &self, + pre: Option<&mut Pre<'_>>, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + if needle.is_empty() { + Some(0) + } else if haystack.len() < needle.len() { + None + } else { + self.find(pre, haystack, needle) + } + } + + // Each of the two search implementations below can be accelerated by a + // prefilter, but it is not always enabled. To avoid its overhead when + // its disabled, we explicitly inline each search implementation based on + // whether a prefilter will be used or not. The decision on which to use + // is made in the parent meta searcher. + + #[inline(always)] + fn find_small_imp( + &self, + mut pre: Option<&mut Pre<'_>>, + haystack: &[u8], + needle: &[u8], + period: usize, + ) -> Option<usize> { + let last_byte = needle.len() - 1; + let mut pos = 0; + let mut shift = 0; + while pos + needle.len() <= haystack.len() { + let mut i = cmp::max(self.0.critical_pos, shift); + if let Some(pre) = pre.as_mut() { + if pre.should_call() { + pos += pre.call(&haystack[pos..], needle)?; + shift = 0; + i = self.0.critical_pos; + if pos + needle.len() > haystack.len() { + return None; + } + } + } + if !self.0.byteset.contains(haystack[pos + last_byte]) { + pos += needle.len(); + shift = 0; + continue; + } + while i < needle.len() && needle[i] == haystack[pos + i] { + i += 1; + } + if i < needle.len() { + pos += i - self.0.critical_pos + 1; + shift = 0; + } else { + let mut j = self.0.critical_pos; + while j > shift && needle[j] == haystack[pos + j] { + j -= 1; + } + if j <= shift && needle[shift] == haystack[pos + shift] { + return Some(pos); + } + pos += period; + shift = needle.len() - period; + } + } + None + } + + #[inline(always)] + fn find_large_imp( + &self, + mut pre: Option<&mut Pre<'_>>, + haystack: &[u8], + needle: &[u8], + shift: usize, + ) -> Option<usize> { + let last_byte = needle.len() - 1; + let mut pos = 0; + 'outer: while pos + needle.len() <= haystack.len() { + if let Some(pre) = pre.as_mut() { + if pre.should_call() { + pos += pre.call(&haystack[pos..], needle)?; + if pos + needle.len() > haystack.len() { + return None; + } + } + } + + if !self.0.byteset.contains(haystack[pos + last_byte]) { + pos += needle.len(); + continue; + } + let mut i = self.0.critical_pos; + while i < needle.len() && needle[i] == haystack[pos + i] { + i += 1; + } + if i < needle.len() { + pos += i - self.0.critical_pos + 1; + } else { + for j in (0..self.0.critical_pos).rev() { + if needle[j] != haystack[pos + j] { + pos += shift; + continue 'outer; + } + } + return Some(pos); + } + } + None + } +} + +impl Reverse { + /// Create a searcher that uses the Two-Way algorithm by searching in + /// reverse through any haystack. + pub(crate) fn new(needle: &[u8]) -> Reverse { + if needle.is_empty() { + return Reverse(TwoWay::empty()); + } + + let byteset = ApproximateByteSet::new(needle); + let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal); + let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal); + let (period_lower_bound, critical_pos) = + if min_suffix.pos < max_suffix.pos { + (min_suffix.period, min_suffix.pos) + } else { + (max_suffix.period, max_suffix.pos) + }; + // let critical_pos = needle.len() - critical_pos; + let shift = Shift::reverse(needle, period_lower_bound, critical_pos); + Reverse(TwoWay { byteset, critical_pos, shift }) + } + + /// Find the position of the last occurrence of this searcher's needle + /// in the given haystack. If one does not exist, then return None. + /// + /// This will automatically initialize prefilter state. This should only + /// be used for one-off searches. + /// + /// Callers must guarantee that the needle is non-empty and its length is + /// <= the haystack's length. + #[inline(always)] + pub(crate) fn rfind( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + debug_assert!(!needle.is_empty(), "needle should not be empty"); + debug_assert!(needle.len() <= haystack.len(), "haystack too short"); + // For the reverse case, we don't use a prefilter. It's plausible that + // perhaps we should, but it's a lot of additional code to do it, and + // it's not clear that it's actually worth it. If you have a really + // compelling use case for this, please file an issue. + match self.0.shift { + Shift::Small { period } => { + self.rfind_small_imp(haystack, needle, period) + } + Shift::Large { shift } => { + self.rfind_large_imp(haystack, needle, shift) + } + } + } + + /// Like rfind, but handles the degenerate substring test cases. This is + /// only useful for conveniently testing this substring implementation in + /// isolation. + #[cfg(test)] + fn rfind_general(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { + if needle.is_empty() { + Some(haystack.len()) + } else if haystack.len() < needle.len() { + None + } else { + self.rfind(haystack, needle) + } + } + + #[inline(always)] + fn rfind_small_imp( + &self, + haystack: &[u8], + needle: &[u8], + period: usize, + ) -> Option<usize> { + let nlen = needle.len(); + let mut pos = haystack.len(); + let mut shift = nlen; + while pos >= nlen { + if !self.0.byteset.contains(haystack[pos - nlen]) { + pos -= nlen; + shift = nlen; + continue; + } + let mut i = cmp::min(self.0.critical_pos, shift); + while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { + i -= 1; + } + if i > 0 || needle[0] != haystack[pos - nlen] { + pos -= self.0.critical_pos - i + 1; + shift = nlen; + } else { + let mut j = self.0.critical_pos; + while j < shift && needle[j] == haystack[pos - nlen + j] { + j += 1; + } + if j >= shift { + return Some(pos - nlen); + } + pos -= period; + shift = period; + } + } + None + } + + #[inline(always)] + fn rfind_large_imp( + &self, + haystack: &[u8], + needle: &[u8], + shift: usize, + ) -> Option<usize> { + let nlen = needle.len(); + let mut pos = haystack.len(); + while pos >= nlen { + if !self.0.byteset.contains(haystack[pos - nlen]) { + pos -= nlen; + continue; + } + let mut i = self.0.critical_pos; + while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { + i -= 1; + } + if i > 0 || needle[0] != haystack[pos - nlen] { + pos -= self.0.critical_pos - i + 1; + } else { + let mut j = self.0.critical_pos; + while j < nlen && needle[j] == haystack[pos - nlen + j] { + j += 1; + } + if j == nlen { + return Some(pos - nlen); + } + pos -= shift; + } + } + None + } +} + +impl TwoWay { + fn empty() -> TwoWay { + TwoWay { + byteset: ApproximateByteSet::new(b""), + critical_pos: 0, + shift: Shift::Large { shift: 0 }, + } + } +} + +/// A representation of the amount we're allowed to shift by during Two-Way +/// search. +/// +/// When computing a critical factorization of the needle, we find the position +/// of the critical factorization by finding the needle's maximal (or minimal) +/// suffix, along with the period of that suffix. It turns out that the period +/// of that suffix is a lower bound on the period of the needle itself. +/// +/// This lower bound is equivalent to the actual period of the needle in +/// some cases. To describe that case, we denote the needle as `x` where +/// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower +/// bound given here is always the period of `v`, which is `<= period(x)`. The +/// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and +/// where `u` is a suffix of `v[0..period(v)]`. +/// +/// This case is important because the search algorithm for when the +/// periods are equivalent is slightly different than the search algorithm +/// for when the periods are not equivalent. In particular, when they aren't +/// equivalent, we know that the period of the needle is no less than half its +/// length. In this case, we shift by an amount less than or equal to the +/// period of the needle (determined by the maximum length of the components +/// of the critical factorization of `x`, i.e., `max(len(u), len(v))`).. +/// +/// The above two cases are represented by the variants below. Each entails +/// a different instantiation of the Two-Way search algorithm. +/// +/// N.B. If we could find a way to compute the exact period in all cases, +/// then we could collapse this case analysis and simplify the algorithm. The +/// Two-Way paper suggests this is possible, but more reading is required to +/// grok why the authors didn't pursue that path. +#[derive(Clone, Copy, Debug)] +enum Shift { + Small { period: usize }, + Large { shift: usize }, +} + +impl Shift { + /// Compute the shift for a given needle in the forward direction. + /// + /// This requires a lower bound on the period and a critical position. + /// These can be computed by extracting both the minimal and maximal + /// lexicographic suffixes, and choosing the right-most starting position. + /// The lower bound on the period is then the period of the chosen suffix. + fn forward( + needle: &[u8], + period_lower_bound: usize, + critical_pos: usize, + ) -> Shift { + let large = cmp::max(critical_pos, needle.len() - critical_pos); + if critical_pos * 2 >= needle.len() { + return Shift::Large { shift: large }; + } + + let (u, v) = needle.split_at(critical_pos); + if !util::is_suffix(&v[..period_lower_bound], u) { + return Shift::Large { shift: large }; + } + Shift::Small { period: period_lower_bound } + } + + /// Compute the shift for a given needle in the reverse direction. + /// + /// This requires a lower bound on the period and a critical position. + /// These can be computed by extracting both the minimal and maximal + /// lexicographic suffixes, and choosing the left-most starting position. + /// The lower bound on the period is then the period of the chosen suffix. + fn reverse( + needle: &[u8], + period_lower_bound: usize, + critical_pos: usize, + ) -> Shift { + let large = cmp::max(critical_pos, needle.len() - critical_pos); + if (needle.len() - critical_pos) * 2 >= needle.len() { + return Shift::Large { shift: large }; + } + + let (v, u) = needle.split_at(critical_pos); + if !util::is_prefix(&v[v.len() - period_lower_bound..], u) { + return Shift::Large { shift: large }; + } + Shift::Small { period: period_lower_bound } + } +} + +/// A suffix extracted from a needle along with its period. +#[derive(Debug)] +struct Suffix { + /// The starting position of this suffix. + /// + /// If this is a forward suffix, then `&bytes[pos..]` can be used. If this + /// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for + /// forward suffixes, this is an inclusive starting position, where as for + /// reverse suffixes, this is an exclusive ending position. + pos: usize, + /// The period of this suffix. + /// + /// Note that this is NOT necessarily the period of the string from which + /// this suffix comes from. (It is always less than or equal to the period + /// of the original string.) + period: usize, +} + +impl Suffix { + fn forward(needle: &[u8], kind: SuffixKind) -> Suffix { + debug_assert!(!needle.is_empty()); + + // suffix represents our maximal (or minimal) suffix, along with + // its period. + let mut suffix = Suffix { pos: 0, period: 1 }; + // The start of a suffix in `needle` that we are considering as a + // more maximal (or minimal) suffix than what's in `suffix`. + let mut candidate_start = 1; + // The current offset of our suffixes that we're comparing. + // + // When the characters at this offset are the same, then we mush on + // to the next position since no decision is possible. When the + // candidate's character is greater (or lesser) than the corresponding + // character than our current maximal (or minimal) suffix, then the + // current suffix is changed over to the candidate and we restart our + // search. Otherwise, the candidate suffix is no good and we restart + // our search on the next candidate. + // + // The three cases above correspond to the three cases in the loop + // below. + let mut offset = 0; + + while candidate_start + offset < needle.len() { + let current = needle[suffix.pos + offset]; + let candidate = needle[candidate_start + offset]; + match kind.cmp(current, candidate) { + SuffixOrdering::Accept => { + suffix = Suffix { pos: candidate_start, period: 1 }; + candidate_start += 1; + offset = 0; + } + SuffixOrdering::Skip => { + candidate_start += offset + 1; + offset = 0; + suffix.period = candidate_start - suffix.pos; + } + SuffixOrdering::Push => { + if offset + 1 == suffix.period { + candidate_start += suffix.period; + offset = 0; + } else { + offset += 1; + } + } + } + } + suffix + } + + fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix { + debug_assert!(!needle.is_empty()); + + // See the comments in `forward` for how this works. + let mut suffix = Suffix { pos: needle.len(), period: 1 }; + if needle.len() == 1 { + return suffix; + } + let mut candidate_start = needle.len() - 1; + let mut offset = 0; + + while offset < candidate_start { + let current = needle[suffix.pos - offset - 1]; + let candidate = needle[candidate_start - offset - 1]; + match kind.cmp(current, candidate) { + SuffixOrdering::Accept => { + suffix = Suffix { pos: candidate_start, period: 1 }; + candidate_start -= 1; + offset = 0; + } + SuffixOrdering::Skip => { + candidate_start -= offset + 1; + offset = 0; + suffix.period = suffix.pos - candidate_start; + } + SuffixOrdering::Push => { + if offset + 1 == suffix.period { + candidate_start -= suffix.period; + offset = 0; + } else { + offset += 1; + } + } + } + } + suffix + } +} + +/// The kind of suffix to extract. +#[derive(Clone, Copy, Debug)] +enum SuffixKind { + /// Extract the smallest lexicographic suffix from a string. + /// + /// Technically, this doesn't actually pick the smallest lexicographic + /// suffix. e.g., Given the choice between `a` and `aa`, this will choose + /// the latter over the former, even though `a < aa`. The reasoning for + /// this isn't clear from the paper, but it still smells like a minimal + /// suffix. + Minimal, + /// Extract the largest lexicographic suffix from a string. + /// + /// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given + /// the choice between `z` and `zz`, this will choose the latter over the + /// former. + Maximal, +} + +/// The result of comparing corresponding bytes between two suffixes. +#[derive(Clone, Copy, Debug)] +enum SuffixOrdering { + /// This occurs when the given candidate byte indicates that the candidate + /// suffix is better than the current maximal (or minimal) suffix. That is, + /// the current candidate suffix should supplant the current maximal (or + /// minimal) suffix. + Accept, + /// This occurs when the given candidate byte excludes the candidate suffix + /// from being better than the current maximal (or minimal) suffix. That + /// is, the current candidate suffix should be dropped and the next one + /// should be considered. + Skip, + /// This occurs when no decision to accept or skip the candidate suffix + /// can be made, e.g., when corresponding bytes are equivalent. In this + /// case, the next corresponding bytes should be compared. + Push, +} + +impl SuffixKind { + /// Returns true if and only if the given candidate byte indicates that + /// it should replace the current suffix as the maximal (or minimal) + /// suffix. + fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering { + use self::SuffixOrdering::*; + + match self { + SuffixKind::Minimal if candidate < current => Accept, + SuffixKind::Minimal if candidate > current => Skip, + SuffixKind::Minimal => Push, + SuffixKind::Maximal if candidate > current => Accept, + SuffixKind::Maximal if candidate < current => Skip, + SuffixKind::Maximal => Push, + } + } +} + +/// A bitset used to track whether a particular byte exists in a needle or not. +/// +/// Namely, bit 'i' is set if and only if byte%64==i for any byte in the +/// needle. If a particular byte in the haystack is NOT in this set, then one +/// can conclude that it is also not in the needle, and thus, one can advance +/// in the haystack by needle.len() bytes. +#[derive(Clone, Copy, Debug)] +struct ApproximateByteSet(u64); + +impl ApproximateByteSet { + /// Create a new set from the given needle. + fn new(needle: &[u8]) -> ApproximateByteSet { + let mut bits = 0; + for &b in needle { + bits |= 1 << (b % 64); + } + ApproximateByteSet(bits) + } + + /// Return true if and only if the given byte might be in this set. This + /// may return a false positive, but will never return a false negative. + #[inline(always)] + fn contains(&self, byte: u8) -> bool { + self.0 & (1 << (byte % 64)) != 0 + } +} + +#[cfg(all(test, feature = "std", not(miri)))] +mod tests { + use quickcheck::quickcheck; + + use super::*; + + define_memmem_quickcheck_tests!( + super::simpletests::twoway_find, + super::simpletests::twoway_rfind + ); + + /// Convenience wrapper for computing the suffix as a byte string. + fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { + let s = Suffix::forward(needle, kind); + (&needle[s.pos..], s.period) + } + + /// Convenience wrapper for computing the reverse suffix as a byte string. + fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { + let s = Suffix::reverse(needle, kind); + (&needle[..s.pos], s.period) + } + + /// Return all of the non-empty suffixes in the given byte string. + fn suffixes(bytes: &[u8]) -> Vec<&[u8]> { + (0..bytes.len()).map(|i| &bytes[i..]).collect() + } + + /// Return the lexicographically maximal suffix of the given byte string. + fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] { + let mut sufs = suffixes(needle); + sufs.sort(); + sufs.pop().unwrap() + } + + /// Return the lexicographically maximal suffix of the reverse of the given + /// byte string. + fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec<u8> { + let mut reversed = needle.to_vec(); + reversed.reverse(); + let mut got = naive_maximal_suffix_forward(&reversed).to_vec(); + got.reverse(); + got + } + + #[test] + fn suffix_forward() { + macro_rules! assert_suffix_min { + ($given:expr, $expected:expr, $period:expr) => { + let (got_suffix, got_period) = + get_suffix_forward($given.as_bytes(), SuffixKind::Minimal); + let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + assert_eq!(($expected, $period), (got_suffix, got_period)); + }; + } + + macro_rules! assert_suffix_max { + ($given:expr, $expected:expr, $period:expr) => { + let (got_suffix, got_period) = + get_suffix_forward($given.as_bytes(), SuffixKind::Maximal); + let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + assert_eq!(($expected, $period), (got_suffix, got_period)); + }; + } + + assert_suffix_min!("a", "a", 1); + assert_suffix_max!("a", "a", 1); + + assert_suffix_min!("ab", "ab", 2); + assert_suffix_max!("ab", "b", 1); + + assert_suffix_min!("ba", "a", 1); + assert_suffix_max!("ba", "ba", 2); + + assert_suffix_min!("abc", "abc", 3); + assert_suffix_max!("abc", "c", 1); + + assert_suffix_min!("acb", "acb", 3); + assert_suffix_max!("acb", "cb", 2); + + assert_suffix_min!("cba", "a", 1); + assert_suffix_max!("cba", "cba", 3); + + assert_suffix_min!("abcabc", "abcabc", 3); + assert_suffix_max!("abcabc", "cabc", 3); + + assert_suffix_min!("abcabcabc", "abcabcabc", 3); + assert_suffix_max!("abcabcabc", "cabcabc", 3); + + assert_suffix_min!("abczz", "abczz", 5); + assert_suffix_max!("abczz", "zz", 1); + + assert_suffix_min!("zzabc", "abc", 3); + assert_suffix_max!("zzabc", "zzabc", 5); + + assert_suffix_min!("aaa", "aaa", 1); + assert_suffix_max!("aaa", "aaa", 1); + + assert_suffix_min!("foobar", "ar", 2); + assert_suffix_max!("foobar", "r", 1); + } + + #[test] + fn suffix_reverse() { + macro_rules! assert_suffix_min { + ($given:expr, $expected:expr, $period:expr) => { + let (got_suffix, got_period) = + get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal); + let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + assert_eq!(($expected, $period), (got_suffix, got_period)); + }; + } + + macro_rules! assert_suffix_max { + ($given:expr, $expected:expr, $period:expr) => { + let (got_suffix, got_period) = + get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal); + let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + assert_eq!(($expected, $period), (got_suffix, got_period)); + }; + } + + assert_suffix_min!("a", "a", 1); + assert_suffix_max!("a", "a", 1); + + assert_suffix_min!("ab", "a", 1); + assert_suffix_max!("ab", "ab", 2); + + assert_suffix_min!("ba", "ba", 2); + assert_suffix_max!("ba", "b", 1); + + assert_suffix_min!("abc", "a", 1); + assert_suffix_max!("abc", "abc", 3); + + assert_suffix_min!("acb", "a", 1); + assert_suffix_max!("acb", "ac", 2); + + assert_suffix_min!("cba", "cba", 3); + assert_suffix_max!("cba", "c", 1); + + assert_suffix_min!("abcabc", "abca", 3); + assert_suffix_max!("abcabc", "abcabc", 3); + + assert_suffix_min!("abcabcabc", "abcabca", 3); + assert_suffix_max!("abcabcabc", "abcabcabc", 3); + + assert_suffix_min!("abczz", "a", 1); + assert_suffix_max!("abczz", "abczz", 5); + + assert_suffix_min!("zzabc", "zza", 3); + assert_suffix_max!("zzabc", "zz", 1); + + assert_suffix_min!("aaa", "aaa", 1); + assert_suffix_max!("aaa", "aaa", 1); + } + + quickcheck! { + fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool { + if bytes.is_empty() { + return true; + } + + let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal); + let expected = naive_maximal_suffix_forward(&bytes); + got == expected + } + + fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool { + if bytes.is_empty() { + return true; + } + + let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal); + let expected = naive_maximal_suffix_reverse(&bytes); + expected == got + } + } +} + +#[cfg(test)] +mod simpletests { + use super::*; + + pub(crate) fn twoway_find( + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + Forward::new(needle).find_general(None, haystack, needle) + } + + pub(crate) fn twoway_rfind( + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + Reverse::new(needle).rfind_general(haystack, needle) + } + + define_memmem_simple_tests!(twoway_find, twoway_rfind); + + // This is a regression test caught by quickcheck that exercised a bug in + // the reverse small period handling. The bug was that we were using 'if j + // == shift' to determine if a match occurred, but the correct guard is 'if + // j >= shift', which matches the corresponding guard in the forward impl. + #[test] + fn regression_rev_small_period() { + let rfind = super::simpletests::twoway_rfind; + let haystack = "ababaz"; + let needle = "abab"; + assert_eq!(Some(0), rfind(haystack.as_bytes(), needle.as_bytes())); + } +} diff --git a/third_party/rust/memchr/src/memmem/util.rs b/third_party/rust/memchr/src/memmem/util.rs new file mode 100644 index 0000000000..de0e385e18 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/util.rs @@ -0,0 +1,88 @@ +// These routines are meant to be optimized specifically for low latency as +// compared to the equivalent routines offered by std. (Which may invoke the +// dynamic linker and call out to libc, which introduces a bit more latency +// than we'd like.) + +/// Returns true if and only if needle is a prefix of haystack. +#[inline(always)] +pub(crate) fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { + needle.len() <= haystack.len() && memcmp(&haystack[..needle.len()], needle) +} + +/// Returns true if and only if needle is a suffix of haystack. +#[inline(always)] +pub(crate) fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { + needle.len() <= haystack.len() + && memcmp(&haystack[haystack.len() - needle.len()..], needle) +} + +/// Return true if and only if x.len() == y.len() && x[i] == y[i] for all +/// 0 <= i < x.len(). +/// +/// Why not just use actual memcmp for this? Well, memcmp requires calling out +/// to libc, and this routine is called in fairly hot code paths. Other than +/// just calling out to libc, it also seems to result in worse codegen. By +/// rolling our own memcmp in pure Rust, it seems to appear more friendly to +/// the optimizer. +/// +/// We mark this as inline always, although, some callers may not want it +/// inlined for better codegen (like Rabin-Karp). In that case, callers are +/// advised to create a non-inlineable wrapper routine that calls memcmp. +#[inline(always)] +pub(crate) fn memcmp(x: &[u8], y: &[u8]) -> bool { + if x.len() != y.len() { + return false; + } + // If we don't have enough bytes to do 4-byte at a time loads, then + // fall back to the naive slow version. + // + // TODO: We could do a copy_nonoverlapping combined with a mask instead + // of a loop. Benchmark it. + if x.len() < 4 { + for (&b1, &b2) in x.iter().zip(y) { + if b1 != b2 { + return false; + } + } + return true; + } + // When we have 4 or more bytes to compare, then proceed in chunks of 4 at + // a time using unaligned loads. + // + // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is + // that this particular version of memcmp is likely to be called with tiny + // needles. That means that if we do 8 byte loads, then a higher proportion + // of memcmp calls will use the slower variant above. With that said, this + // is a hypothesis and is only loosely supported by benchmarks. There's + // likely some improvement that could be made here. The main thing here + // though is to optimize for latency, not throughput. + + // SAFETY: Via the conditional above, we know that both `px` and `py` + // have the same length, so `px < pxend` implies that `py < pyend`. + // Thus, derefencing both `px` and `py` in the loop below is safe. + // + // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual + // end of of `px` and `py`. Thus, the final dereference outside of the + // loop is guaranteed to be valid. (The final comparison will overlap with + // the last comparison done in the loop for lengths that aren't multiples + // of four.) + // + // Finally, we needn't worry about alignment here, since we do unaligned + // loads. + unsafe { + let (mut px, mut py) = (x.as_ptr(), y.as_ptr()); + let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4)); + while px < pxend { + let vx = (px as *const u32).read_unaligned(); + let vy = (py as *const u32).read_unaligned(); + if vx != vy { + return false; + } + px = px.add(4); + py = py.add(4); + } + let vx = (pxend as *const u32).read_unaligned(); + let vy = (pyend as *const u32).read_unaligned(); + vx == vy + } +} diff --git a/third_party/rust/memchr/src/memmem/vector.rs b/third_party/rust/memchr/src/memmem/vector.rs new file mode 100644 index 0000000000..b81165f8bc --- /dev/null +++ b/third_party/rust/memchr/src/memmem/vector.rs @@ -0,0 +1,131 @@ +/// A trait for describing vector operations used by vectorized searchers. +/// +/// The trait is highly constrained to low level vector operations needed. In +/// general, it was invented mostly to be generic over x86's __m128i and +/// __m256i types. It's likely that once std::simd becomes a thing, we can +/// migrate to that since the operations required are quite simple. +/// +/// TODO: Consider moving this trait up a level and using it to implement +/// memchr as well. The trait might need to grow one or two methods, but +/// otherwise should be close to sufficient already. +/// +/// # Safety +/// +/// All methods are not safe since they are intended to be implemented using +/// vendor intrinsics, which are also not safe. Callers must ensure that the +/// appropriate target features are enabled in the calling function, and that +/// the current CPU supports them. All implementations should avoid marking the +/// routines with #[target_feature] and instead mark them as #[inline(always)] +/// to ensure they get appropriately inlined. (inline(always) cannot be used +/// with target_feature.) +pub(crate) trait Vector: Copy + core::fmt::Debug { + /// _mm_set1_epi8 or _mm256_set1_epi8 + unsafe fn splat(byte: u8) -> Self; + /// _mm_loadu_si128 or _mm256_loadu_si256 + unsafe fn load_unaligned(data: *const u8) -> Self; + /// _mm_movemask_epi8 or _mm256_movemask_epi8 + unsafe fn movemask(self) -> u32; + /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 + unsafe fn cmpeq(self, vector2: Self) -> Self; + /// _mm_and_si128 or _mm256_and_si256 + unsafe fn and(self, vector2: Self) -> Self; +} + +#[cfg(target_arch = "x86_64")] +mod x86sse { + use super::Vector; + use core::arch::x86_64::*; + + impl Vector for __m128i { + #[inline(always)] + unsafe fn splat(byte: u8) -> __m128i { + _mm_set1_epi8(byte as i8) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m128i { + _mm_loadu_si128(data as *const __m128i) + } + + #[inline(always)] + unsafe fn movemask(self) -> u32 { + _mm_movemask_epi8(self) as u32 + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m128i { + _mm_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m128i { + _mm_and_si128(self, vector2) + } + } +} + +#[cfg(all(feature = "std", target_arch = "x86_64"))] +mod x86avx { + use super::Vector; + use core::arch::x86_64::*; + + impl Vector for __m256i { + #[inline(always)] + unsafe fn splat(byte: u8) -> __m256i { + _mm256_set1_epi8(byte as i8) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m256i { + _mm256_loadu_si256(data as *const __m256i) + } + + #[inline(always)] + unsafe fn movemask(self) -> u32 { + _mm256_movemask_epi8(self) as u32 + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m256i { + _mm256_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m256i { + _mm256_and_si256(self, vector2) + } + } +} + +#[cfg(target_arch = "wasm32")] +mod wasm_simd128 { + use super::Vector; + use core::arch::wasm32::*; + + impl Vector for v128 { + #[inline(always)] + unsafe fn splat(byte: u8) -> v128 { + u8x16_splat(byte) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> v128 { + v128_load(data.cast()) + } + + #[inline(always)] + unsafe fn movemask(self) -> u32 { + u8x16_bitmask(self).into() + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> v128 { + u8x16_eq(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> v128 { + v128_and(self, vector2) + } + } +} diff --git a/third_party/rust/memchr/src/memmem/wasm.rs b/third_party/rust/memchr/src/memmem/wasm.rs new file mode 100644 index 0000000000..4e3ea985c1 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/wasm.rs @@ -0,0 +1,75 @@ +use core::arch::wasm32::v128; + +use crate::memmem::{genericsimd, NeedleInfo}; + +/// A `v128` accelerated vectorized substring search routine that only works on +/// small needles. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Forward(genericsimd::Forward); + +impl Forward { + /// Create a new "generic simd" forward searcher. If one could not be + /// created from the given inputs, then None is returned. + pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option<Forward> { + if !cfg!(memchr_runtime_simd) { + return None; + } + genericsimd::Forward::new(ninfo, needle).map(Forward) + } + + /// Returns the minimum length of haystack that is needed for this searcher + /// to work. Passing a haystack with a length smaller than this will cause + /// `find` to panic. + #[inline(always)] + pub(crate) fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len::<v128>() + } + + #[inline(always)] + pub(crate) fn find( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + self.find_impl(haystack, needle) + } + + /// The implementation of find marked with the appropriate target feature. + #[target_feature(enable = "simd128")] + fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { + unsafe { genericsimd::fwd_find::<v128>(&self.0, haystack, needle) } + } +} + +#[cfg(all(test, feature = "std", not(miri)))] +mod tests { + use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; + + fn find( + _: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) + } + + #[test] + fn prefilter_permutations() { + use crate::memmem::prefilter::tests::PrefilterTest; + + unsafe { + PrefilterTest::run_all_tests_filter(find, |t| { + // This substring searcher only works on certain configs, so + // filter our tests such that Forward::new will be guaranteed + // to succeed. (And also remove tests with a haystack that is + // too small.) + let fwd = match super::Forward::new(&t.ninfo, &t.needle) { + None => return false, + Some(fwd) => fwd, + }; + t.haystack.len() >= fwd.min_haystack_len() + }) + } + } +} diff --git a/third_party/rust/memchr/src/memmem/x86/avx.rs b/third_party/rust/memchr/src/memmem/x86/avx.rs new file mode 100644 index 0000000000..ce168dd377 --- /dev/null +++ b/third_party/rust/memchr/src/memmem/x86/avx.rs @@ -0,0 +1,139 @@ +#[cfg(not(feature = "std"))] +pub(crate) use self::nostd::Forward; +#[cfg(feature = "std")] +pub(crate) use self::std::Forward; + +#[cfg(feature = "std")] +mod std { + use core::arch::x86_64::{__m128i, __m256i}; + + use crate::memmem::{genericsimd, NeedleInfo}; + + /// An AVX accelerated vectorized substring search routine that only works + /// on small needles. + #[derive(Clone, Copy, Debug)] + pub(crate) struct Forward(genericsimd::Forward); + + impl Forward { + /// Create a new "generic simd" forward searcher. If one could not be + /// created from the given inputs, then None is returned. + pub(crate) fn new( + ninfo: &NeedleInfo, + needle: &[u8], + ) -> Option<Forward> { + if !cfg!(memchr_runtime_avx) || !is_x86_feature_detected!("avx2") { + return None; + } + genericsimd::Forward::new(ninfo, needle).map(Forward) + } + + /// Returns the minimum length of haystack that is needed for this + /// searcher to work. Passing a haystack with a length smaller than + /// this will cause `find` to panic. + #[inline(always)] + pub(crate) fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len::<__m128i>() + } + + #[inline(always)] + pub(crate) fn find( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + // SAFETY: The only way a Forward value can exist is if the avx2 + // target feature is enabled. This is the only safety requirement + // for calling the genericsimd searcher. + unsafe { self.find_impl(haystack, needle) } + } + + /// The implementation of find marked with the appropriate target + /// feature. + /// + /// # Safety + /// + /// Callers must ensure that the avx2 CPU feature is enabled in the + /// current environment. + #[target_feature(enable = "avx2")] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + if haystack.len() < self.0.min_haystack_len::<__m256i>() { + genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) + } else { + genericsimd::fwd_find::<__m256i>(&self.0, haystack, needle) + } + } + } +} + +// We still define the avx "forward" type on nostd to make caller code a bit +// simpler. This avoids needing a lot more conditional compilation. +#[cfg(not(feature = "std"))] +mod nostd { + use crate::memmem::NeedleInfo; + + #[derive(Clone, Copy, Debug)] + pub(crate) struct Forward(()); + + impl Forward { + pub(crate) fn new( + ninfo: &NeedleInfo, + needle: &[u8], + ) -> Option<Forward> { + None + } + + pub(crate) fn min_haystack_len(&self) -> usize { + unreachable!() + } + + pub(crate) fn find( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + unreachable!() + } + } +} + +#[cfg(all(test, feature = "std", not(miri)))] +mod tests { + use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; + + fn find( + _: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) + } + + #[test] + fn prefilter_permutations() { + use crate::memmem::prefilter::tests::PrefilterTest; + + if !is_x86_feature_detected!("avx2") { + return; + } + // SAFETY: The safety of find only requires that the current CPU + // support AVX2, which we checked above. + unsafe { + PrefilterTest::run_all_tests_filter(find, |t| { + // This substring searcher only works on certain configs, so + // filter our tests such that Forward::new will be guaranteed + // to succeed. (And also remove tests with a haystack that is + // too small.) + let fwd = match super::Forward::new(&t.ninfo, &t.needle) { + None => return false, + Some(fwd) => fwd, + }; + t.haystack.len() >= fwd.min_haystack_len() + }) + } + } +} diff --git a/third_party/rust/memchr/src/memmem/x86/mod.rs b/third_party/rust/memchr/src/memmem/x86/mod.rs new file mode 100644 index 0000000000..c1cc73feea --- /dev/null +++ b/third_party/rust/memchr/src/memmem/x86/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod avx; +pub(crate) mod sse; diff --git a/third_party/rust/memchr/src/memmem/x86/sse.rs b/third_party/rust/memchr/src/memmem/x86/sse.rs new file mode 100644 index 0000000000..22e7d9933a --- /dev/null +++ b/third_party/rust/memchr/src/memmem/x86/sse.rs @@ -0,0 +1,89 @@ +use core::arch::x86_64::__m128i; + +use crate::memmem::{genericsimd, NeedleInfo}; + +/// An SSE accelerated vectorized substring search routine that only works on +/// small needles. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Forward(genericsimd::Forward); + +impl Forward { + /// Create a new "generic simd" forward searcher. If one could not be + /// created from the given inputs, then None is returned. + pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option<Forward> { + if !cfg!(memchr_runtime_sse2) { + return None; + } + genericsimd::Forward::new(ninfo, needle).map(Forward) + } + + /// Returns the minimum length of haystack that is needed for this searcher + /// to work. Passing a haystack with a length smaller than this will cause + /// `find` to panic. + #[inline(always)] + pub(crate) fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len::<__m128i>() + } + + #[inline(always)] + pub(crate) fn find( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + // SAFETY: sse2 is enabled on all x86_64 targets, so this is always + // safe to call. + unsafe { self.find_impl(haystack, needle) } + } + + /// The implementation of find marked with the appropriate target feature. + /// + /// # Safety + /// + /// This is safe to call in all cases since sse2 is guaranteed to be part + /// of x86_64. It is marked as unsafe because of the target feature + /// attribute. + #[target_feature(enable = "sse2")] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) + } +} + +#[cfg(all(test, feature = "std", not(miri)))] +mod tests { + use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; + + fn find( + _: &mut PrefilterState, + ninfo: &NeedleInfo, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) + } + + #[test] + fn prefilter_permutations() { + use crate::memmem::prefilter::tests::PrefilterTest; + + // SAFETY: sse2 is enabled on all x86_64 targets, so this is always + // safe to call. + unsafe { + PrefilterTest::run_all_tests_filter(find, |t| { + // This substring searcher only works on certain configs, so + // filter our tests such that Forward::new will be guaranteed + // to succeed. (And also remove tests with a haystack that is + // too small.) + let fwd = match super::Forward::new(&t.ninfo, &t.needle) { + None => return false, + Some(fwd) => fwd, + }; + t.haystack.len() >= fwd.min_haystack_len() + }) + } + } +} |