diff options
Diffstat (limited to 'vendor/memchr/src/arch/x86_64')
-rw-r--r-- | vendor/memchr/src/arch/x86_64/avx2/memchr.rs | 1352 | ||||
-rw-r--r-- | vendor/memchr/src/arch/x86_64/avx2/mod.rs | 6 | ||||
-rw-r--r-- | vendor/memchr/src/arch/x86_64/avx2/packedpair.rs | 272 | ||||
-rw-r--r-- | vendor/memchr/src/arch/x86_64/memchr.rs | 335 | ||||
-rw-r--r-- | vendor/memchr/src/arch/x86_64/mod.rs | 8 | ||||
-rw-r--r-- | vendor/memchr/src/arch/x86_64/sse2/memchr.rs | 1077 | ||||
-rw-r--r-- | vendor/memchr/src/arch/x86_64/sse2/mod.rs | 6 | ||||
-rw-r--r-- | vendor/memchr/src/arch/x86_64/sse2/packedpair.rs | 232 |
8 files changed, 3288 insertions, 0 deletions
diff --git a/vendor/memchr/src/arch/x86_64/avx2/memchr.rs b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs new file mode 100644 index 000000000..59f8c7f73 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs @@ -0,0 +1,1352 @@ +/*! +This module defines 256-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::x86_64::{__m128i, __m256i}; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One { + /// Used for haystacks less than 32 bytes. + sse2: generic::One<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::One<__m256i>, +} + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle: u8) -> Option<One> { + if One::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One { + sse2: generic::One::new(needle), + avx2: generic::One::new(needle), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `One`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `0` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::count_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.count_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + self.count_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a count using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn count_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.sse2.count_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Execute a count using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn count_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.avx2.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two { + /// Used for haystacks less than 32 bytes. + sse2: generic::Two<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::Two<__m256i>, +} + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option<Two> { + if Two::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two { + sse2: generic::Two::new(needle1, needle2), + avx2: generic::Two::new(needle1, needle2), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() || b == self.sse2.needle2() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Two` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Two`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() || b == self.sse2.needle2() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Two` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three { + /// Used for haystacks less than 32 bytes. + sse2: generic::Three<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::Three<__m256i>, +} + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { + if Three::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three { + sse2: generic::Three::new(needle1, needle2, needle3), + avx2: generic::Three::new(needle1, needle2, needle3), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + || b == self.sse2.needle2() + || b == self.sse2.needle3() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Three` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Three`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + || b == self.sse2.needle2() + || b == self.sse2.needle3() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Three` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/x86_64/avx2/mod.rs b/vendor/memchr/src/arch/x86_64/avx2/mod.rs new file mode 100644 index 000000000..ee4097d6f --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `x86_64` target using 256-bit vectors via AVX2. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs new file mode 100644 index 000000000..efae7b66c --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs @@ -0,0 +1,272 @@ +/*! +A 256-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::x86_64::{__m128i, __m256i}; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 256-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder { + sse2: packedpair::Finder<__m128i>, + avx2: packedpair::Finder<__m256i>, +} + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If AVX2 is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option<Finder> { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If AVX2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { + if Finder::is_available() { + // SAFETY: we check that sse2/avx2 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that both SSE2 and AVX2 are available. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let sse2 = packedpair::Finder::<__m128i>::new(needle, pair); + let avx2 = packedpair::Finder::<__m256i>::new(needle, pair); + Finder { sse2, avx2 } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_impl(haystack, needle) } + } + + /// Run this finder on the given haystack as a prefilter. + /// + /// If a candidate match is found, then an offset where the needle *could* + /// begin in the haystack is returned. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_prefilter_impl(haystack) } + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` and `avx2` routines.) + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + if haystack.len() < self.avx2.min_haystack_len() { + self.sse2.find(haystack, needle) + } else { + self.avx2.find(haystack, needle) + } + } + + /// Execute a prefilter search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` and `avx2` routines.) + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { + if haystack.len() < self.avx2.min_haystack_len() { + self.sse2.find_prefilter(haystack) + } else { + self.avx2.find_prefilter(haystack) + } + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.avx2.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + // The caller doesn't need to care about AVX2's min_haystack_len + // since this implementation will automatically switch to the SSE2 + // implementation if the haystack is too short for AVX2. Therefore, the + // caller only needs to care about SSE2's min_haystack_len. + // + // This does assume that SSE2's min_haystack_len is less than or + // equal to AVX2's min_haystack_len. In practice, this is true and + // there is no way it could be false based on how this Finder is + // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If + // they used different pairs, then it's possible (although perhaps + // pathological) for SSE2's min_haystack_len to be bigger than AVX2's. + self.sse2.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option<Option<usize>> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option<Option<usize>> { + if !cfg!(target_feature = "sse2") { + return None; + } + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/x86_64/memchr.rs b/vendor/memchr/src/arch/x86_64/memchr.rs new file mode 100644 index 000000000..fcb13992f --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/memchr.rs @@ -0,0 +1,335 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines efficiently dispatch to the best implementation based on what +the CPU supports. +*/ + +/// Provides a way to run a memchr-like function while amortizing the cost of +/// runtime CPU feature detection. +/// +/// This works by loading a function pointer from an atomic global. Initially, +/// this global is set to a function that does CPU feature detection. For +/// example, if AVX2 is enabled, then the AVX2 implementation is used. +/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And +/// in some niche cases, if SSE2 isn't available, then the architecture +/// independent fallback implementation is used.) +/// +/// After the first call to this function, the atomic global is replaced with +/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then +/// will directly call the chosen routine instead of needing to go through the +/// CPU feature detection branching again. +/// +/// This particular macro is specifically written to provide the implementation +/// of functions with the following signature: +/// +/// ```ignore +/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>; +/// ``` +/// +/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and +/// `needle3`, respectively. The `start` and `end` parameters correspond to the +/// start and end of the haystack, respectively. +/// +/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so +/// that the function is compatible with our lower level iterator logic that +/// operates on raw pointers. We use this macro to implement "raw" memchr +/// routines with the signature above, and then define memchr routines using +/// regular slices on top of them. +/// +/// Note that we use `#[cfg(target_feature = "sse2")]` below even though +/// it shouldn't be strictly necessary because without it, it seems to +/// cause the compiler to blow up. I guess it can't handle a function +/// pointer being created with a sse target feature? Dunno. See the +/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with +/// this. +/// +/// # Safety +/// +/// Primarily callers must that `$fnty` is a correct function pointer type and +/// not something else. +/// +/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a +/// routine that returns a valid function pointer when a match is found. That +/// is, a pointer that is `>= start` and `< end`. +/// +/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers +/// correspond to valid pointers. +macro_rules! unsafe_ifunc { + ( + $memchrty:ident, + $memchrfind:ident, + $fnty:ty, + $retty:ty, + $hay_start:ident, + $hay_end:ident, + $($needle:ident),+ + ) => {{ + #![allow(unused_unsafe)] + + use core::sync::atomic::{AtomicPtr, Ordering}; + + type Fn = *mut (); + type RealFn = $fnty; + static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn); + + #[cfg(target_feature = "sse2")] + #[target_feature(enable = "sse2", enable = "avx2")] + unsafe fn find_avx2( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::x86_64::avx2::memchr::$memchrty; + $memchrty::new_unchecked($($needle),+) + .$memchrfind($hay_start, $hay_end) + } + + #[cfg(target_feature = "sse2")] + #[target_feature(enable = "sse2")] + unsafe fn find_sse2( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::x86_64::sse2::memchr::$memchrty; + $memchrty::new_unchecked($($needle),+) + .$memchrfind($hay_start, $hay_end) + } + + unsafe fn find_fallback( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::all::memchr::$memchrty; + $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end) + } + + unsafe fn detect( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + let fun = { + #[cfg(not(target_feature = "sse2"))] + { + debug!( + "no sse2 feature available, using fallback for {}", + stringify!($memchrty), + ); + find_fallback as RealFn + } + #[cfg(target_feature = "sse2")] + { + use crate::arch::x86_64::{sse2, avx2}; + if avx2::memchr::$memchrty::is_available() { + debug!("chose AVX2 for {}", stringify!($memchrty)); + find_avx2 as RealFn + } else if sse2::memchr::$memchrty::is_available() { + debug!("chose SSE2 for {}", stringify!($memchrty)); + find_sse2 as RealFn + } else { + debug!("chose fallback for {}", stringify!($memchrty)); + find_fallback as RealFn + } + } + }; + FN.store(fun as Fn, Ordering::Relaxed); + // SAFETY: The only thing we need to uphold here is the + // `#[target_feature]` requirements. Since we check is_available + // above before using the corresponding implementation, we are + // guaranteed to only call code that is supported on the current + // CPU. + fun($($needle),+, $hay_start, $hay_end) + } + + // SAFETY: By virtue of the caller contract, RealFn is a function + // pointer, which is always safe to transmute with a *mut (). Also, + // since we use $memchrty::is_available, it is guaranteed to be safe + // to call $memchrty::$memchrfind. + unsafe { + let fun = FN.load(Ordering::Relaxed); + core::mem::transmute::<Fn, RealFn>(fun)( + $($needle),+, + $hay_start, + $hay_end, + ) + } + }}; +} + +// The routines below dispatch to AVX2, SSE2 or a fallback routine based on +// what's available in the current environment. The secret sauce here is that +// we only check for which one to use approximately once, and then "cache" that +// choice into a global function pointer. Subsequent invocations then just call +// the appropriate function directly. + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) fn memchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + find_raw, + unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1 + ) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + rfind_raw, + unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1 + ) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) fn memchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Two, + find_raw, + unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2 + ) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Two, + rfind_raw, + unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2 + ) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) fn memchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Three, + find_raw, + unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2, + n3 + ) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Three, + rfind_raw, + unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2, + n3 + ) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + count_raw, + unsafe fn(u8, *const u8, *const u8) -> usize, + usize, + start, + end, + n1 + ) +} diff --git a/vendor/memchr/src/arch/x86_64/mod.rs b/vendor/memchr/src/arch/x86_64/mod.rs new file mode 100644 index 000000000..5dad72182 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/mod.rs @@ -0,0 +1,8 @@ +/*! +Vector algorithms for the `x86_64` target. +*/ + +pub mod avx2; +pub mod sse2; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/x86_64/sse2/memchr.rs b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs new file mode 100644 index 000000000..c6f75df4a --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs @@ -0,0 +1,1077 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::x86_64::__m128i; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One<__m128i>); + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle: u8) -> Option<One> { + if One::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One(generic::One::new(needle)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `One`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `0` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::count_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.count_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Execute a count using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn count_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.0.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two<__m128i>); + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option<Two> { + if Two::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two(generic::Two::new(needle1, needle2)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Two`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three<__m128i>); + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { + if Three::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three(generic::Three::new(needle1, needle2, needle3)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Three`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option<usize> { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/x86_64/sse2/mod.rs b/vendor/memchr/src/arch/x86_64/sse2/mod.rs new file mode 100644 index 000000000..bcb830790 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `x86_64` target using 128-bit vectors via SSE2. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs new file mode 100644 index 000000000..c8b5b9999 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs @@ -0,0 +1,232 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::x86_64::__m128i; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder<__m128i>); + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If SSE2 is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option<Finder> { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { + if Finder::is_available() { + // SAFETY: we check that sse2 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that SSE2 is available. + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let finder = packedpair::Finder::<__m128i>::new(needle, pair); + Finder(finder) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + true + } + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_impl(haystack, needle) } + } + + /// Run this finder on the given haystack as a prefilter. + /// + /// If a candidate match is found, then an offset where the needle *could* + /// begin in the haystack is returned. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_prefilter_impl(haystack) } + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + self.0.find(haystack, needle) + } + + /// Execute a prefilter search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { + self.0.find_prefilter(haystack) + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.0.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option<Option<usize>> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option<Option<usize>> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} |