summaryrefslogtreecommitdiffstats
path: root/vendor/memchr/src/memchr
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/memchr/src/memchr')
-rw-r--r--vendor/memchr/src/memchr/c.rs44
-rw-r--r--vendor/memchr/src/memchr/fallback.rs329
-rw-r--r--vendor/memchr/src/memchr/iter.rs173
-rw-r--r--vendor/memchr/src/memchr/mod.rs410
-rw-r--r--vendor/memchr/src/memchr/naive.rs25
-rw-r--r--vendor/memchr/src/memchr/x86/avx.rs755
-rw-r--r--vendor/memchr/src/memchr/x86/mod.rs148
-rw-r--r--vendor/memchr/src/memchr/x86/sse2.rs791
-rw-r--r--vendor/memchr/src/memchr/x86/sse42.rs72
9 files changed, 0 insertions, 2747 deletions
diff --git a/vendor/memchr/src/memchr/c.rs b/vendor/memchr/src/memchr/c.rs
deleted file mode 100644
index 608aabc98..000000000
--- a/vendor/memchr/src/memchr/c.rs
+++ /dev/null
@@ -1,44 +0,0 @@
-// This module defines safe wrappers around memchr (POSIX) and memrchr (GNU
-// extension).
-
-#![allow(dead_code)]
-
-use libc::{c_int, c_void, size_t};
-
-pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
- // SAFETY: This is safe to call since all pointers are valid.
- let p = unsafe {
- libc::memchr(
- haystack.as_ptr() as *const c_void,
- needle as c_int,
- haystack.len() as size_t,
- )
- };
- if p.is_null() {
- None
- } else {
- Some(p as usize - (haystack.as_ptr() as usize))
- }
-}
-
-// memrchr is a GNU extension. We know it's available on Linux at least.
-#[cfg(target_os = "linux")]
-pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
- // GNU's memrchr() will - unlike memchr() - error if haystack is empty.
- if haystack.is_empty() {
- return None;
- }
- // SAFETY: This is safe to call since all pointers are valid.
- let p = unsafe {
- libc::memrchr(
- haystack.as_ptr() as *const c_void,
- needle as c_int,
- haystack.len() as size_t,
- )
- };
- if p.is_null() {
- None
- } else {
- Some(p as usize - (haystack.as_ptr() as usize))
- }
-}
diff --git a/vendor/memchr/src/memchr/fallback.rs b/vendor/memchr/src/memchr/fallback.rs
deleted file mode 100644
index b01f224fa..000000000
--- a/vendor/memchr/src/memchr/fallback.rs
+++ /dev/null
@@ -1,329 +0,0 @@
-// This module defines pure Rust platform independent implementations of all
-// the memchr routines. We do our best to make them fast. Some of them may even
-// get auto-vectorized.
-
-use core::{cmp, usize};
-
-#[cfg(target_pointer_width = "16")]
-const USIZE_BYTES: usize = 2;
-
-#[cfg(target_pointer_width = "32")]
-const USIZE_BYTES: usize = 4;
-
-#[cfg(target_pointer_width = "64")]
-const USIZE_BYTES: usize = 8;
-
-// The number of bytes to loop at in one iteration of memchr/memrchr.
-const LOOP_SIZE: usize = 2 * USIZE_BYTES;
-
-/// Return `true` if `x` contains any zero byte.
-///
-/// From *Matters Computational*, J. Arndt
-///
-/// "The idea is to subtract one from each of the bytes and then look for
-/// bytes where the borrow propagated all the way to the most significant
-/// bit."
-#[inline(always)]
-fn contains_zero_byte(x: usize) -> bool {
- const LO_U64: u64 = 0x0101010101010101;
- const HI_U64: u64 = 0x8080808080808080;
-
- const LO_USIZE: usize = LO_U64 as usize;
- const HI_USIZE: usize = HI_U64 as usize;
-
- x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
-}
-
-/// Repeat the given byte into a word size number. That is, every 8 bits
-/// is equivalent to the given byte. For example, if `b` is `\x4E` or
-/// `01001110` in binary, then the returned value on a 32-bit system would be:
-/// `01001110_01001110_01001110_01001110`.
-#[inline(always)]
-fn repeat_byte(b: u8) -> usize {
- (b as usize) * (usize::MAX / 255)
-}
-
-pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = repeat_byte(n1);
- let confirm = |byte| byte == n1;
- let loop_size = cmp::min(LOOP_SIZE, haystack.len());
- let align = USIZE_BYTES - 1;
- let start_ptr = haystack.as_ptr();
- let mut ptr = start_ptr;
-
- unsafe {
- let end_ptr = start_ptr.add(haystack.len());
- if haystack.len() < USIZE_BYTES {
- return forward_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- let chunk = (ptr as *const usize).read_unaligned();
- if contains_zero_byte(chunk ^ vn1) {
- return forward_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
- debug_assert!(ptr > start_ptr);
- debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
- while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
-
- let a = *(ptr as *const usize);
- let b = *(ptr.add(USIZE_BYTES) as *const usize);
- let eqa = contains_zero_byte(a ^ vn1);
- let eqb = contains_zero_byte(b ^ vn1);
- if eqa || eqb {
- break;
- }
- ptr = ptr.add(LOOP_SIZE);
- }
- forward_search(start_ptr, end_ptr, ptr, confirm)
- }
-}
-
-/// Like `memchr`, but searches for two bytes instead of one.
-pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = repeat_byte(n1);
- let vn2 = repeat_byte(n2);
- let confirm = |byte| byte == n1 || byte == n2;
- let align = USIZE_BYTES - 1;
- let start_ptr = haystack.as_ptr();
- let mut ptr = start_ptr;
-
- unsafe {
- let end_ptr = start_ptr.add(haystack.len());
- if haystack.len() < USIZE_BYTES {
- return forward_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- let chunk = (ptr as *const usize).read_unaligned();
- let eq1 = contains_zero_byte(chunk ^ vn1);
- let eq2 = contains_zero_byte(chunk ^ vn2);
- if eq1 || eq2 {
- return forward_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
- debug_assert!(ptr > start_ptr);
- debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
- while ptr <= end_ptr.sub(USIZE_BYTES) {
- debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
-
- let chunk = *(ptr as *const usize);
- let eq1 = contains_zero_byte(chunk ^ vn1);
- let eq2 = contains_zero_byte(chunk ^ vn2);
- if eq1 || eq2 {
- break;
- }
- ptr = ptr.add(USIZE_BYTES);
- }
- forward_search(start_ptr, end_ptr, ptr, confirm)
- }
-}
-
-/// Like `memchr`, but searches for three bytes instead of one.
-pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = repeat_byte(n1);
- let vn2 = repeat_byte(n2);
- let vn3 = repeat_byte(n3);
- let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
- let align = USIZE_BYTES - 1;
- let start_ptr = haystack.as_ptr();
- let mut ptr = start_ptr;
-
- unsafe {
- let end_ptr = start_ptr.add(haystack.len());
- if haystack.len() < USIZE_BYTES {
- return forward_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- let chunk = (ptr as *const usize).read_unaligned();
- let eq1 = contains_zero_byte(chunk ^ vn1);
- let eq2 = contains_zero_byte(chunk ^ vn2);
- let eq3 = contains_zero_byte(chunk ^ vn3);
- if eq1 || eq2 || eq3 {
- return forward_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
- debug_assert!(ptr > start_ptr);
- debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
- while ptr <= end_ptr.sub(USIZE_BYTES) {
- debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
-
- let chunk = *(ptr as *const usize);
- let eq1 = contains_zero_byte(chunk ^ vn1);
- let eq2 = contains_zero_byte(chunk ^ vn2);
- let eq3 = contains_zero_byte(chunk ^ vn3);
- if eq1 || eq2 || eq3 {
- break;
- }
- ptr = ptr.add(USIZE_BYTES);
- }
- forward_search(start_ptr, end_ptr, ptr, confirm)
- }
-}
-
-/// Return the last index matching the byte `x` in `text`.
-pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = repeat_byte(n1);
- let confirm = |byte| byte == n1;
- let loop_size = cmp::min(LOOP_SIZE, haystack.len());
- let align = USIZE_BYTES - 1;
- let start_ptr = haystack.as_ptr();
-
- unsafe {
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
- if haystack.len() < USIZE_BYTES {
- return reverse_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
- if contains_zero_byte(chunk ^ vn1) {
- return reverse_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- ptr = (end_ptr as usize & !align) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
-
- let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
- let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
- let eqa = contains_zero_byte(a ^ vn1);
- let eqb = contains_zero_byte(b ^ vn1);
- if eqa || eqb {
- break;
- }
- ptr = ptr.sub(loop_size);
- }
- reverse_search(start_ptr, end_ptr, ptr, confirm)
- }
-}
-
-/// Like `memrchr`, but searches for two bytes instead of one.
-pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = repeat_byte(n1);
- let vn2 = repeat_byte(n2);
- let confirm = |byte| byte == n1 || byte == n2;
- let align = USIZE_BYTES - 1;
- let start_ptr = haystack.as_ptr();
-
- unsafe {
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
- if haystack.len() < USIZE_BYTES {
- return reverse_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
- let eq1 = contains_zero_byte(chunk ^ vn1);
- let eq2 = contains_zero_byte(chunk ^ vn2);
- if eq1 || eq2 {
- return reverse_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- ptr = (end_ptr as usize & !align) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while ptr >= start_ptr.add(USIZE_BYTES) {
- debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
-
- let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
- let eq1 = contains_zero_byte(chunk ^ vn1);
- let eq2 = contains_zero_byte(chunk ^ vn2);
- if eq1 || eq2 {
- break;
- }
- ptr = ptr.sub(USIZE_BYTES);
- }
- reverse_search(start_ptr, end_ptr, ptr, confirm)
- }
-}
-
-/// Like `memrchr`, but searches for three bytes instead of one.
-pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = repeat_byte(n1);
- let vn2 = repeat_byte(n2);
- let vn3 = repeat_byte(n3);
- let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
- let align = USIZE_BYTES - 1;
- let start_ptr = haystack.as_ptr();
-
- unsafe {
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
- if haystack.len() < USIZE_BYTES {
- return reverse_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
- let eq1 = contains_zero_byte(chunk ^ vn1);
- let eq2 = contains_zero_byte(chunk ^ vn2);
- let eq3 = contains_zero_byte(chunk ^ vn3);
- if eq1 || eq2 || eq3 {
- return reverse_search(start_ptr, end_ptr, ptr, confirm);
- }
-
- ptr = (end_ptr as usize & !align) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while ptr >= start_ptr.add(USIZE_BYTES) {
- debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
-
- let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
- let eq1 = contains_zero_byte(chunk ^ vn1);
- let eq2 = contains_zero_byte(chunk ^ vn2);
- let eq3 = contains_zero_byte(chunk ^ vn3);
- if eq1 || eq2 || eq3 {
- break;
- }
- ptr = ptr.sub(USIZE_BYTES);
- }
- reverse_search(start_ptr, end_ptr, ptr, confirm)
- }
-}
-
-#[inline(always)]
-unsafe fn forward_search<F: Fn(u8) -> bool>(
- start_ptr: *const u8,
- end_ptr: *const u8,
- mut ptr: *const u8,
- confirm: F,
-) -> Option<usize> {
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr);
-
- while ptr < end_ptr {
- if confirm(*ptr) {
- return Some(sub(ptr, start_ptr));
- }
- ptr = ptr.offset(1);
- }
- None
-}
-
-#[inline(always)]
-unsafe fn reverse_search<F: Fn(u8) -> bool>(
- start_ptr: *const u8,
- end_ptr: *const u8,
- mut ptr: *const u8,
- confirm: F,
-) -> Option<usize> {
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr);
-
- while ptr > start_ptr {
- ptr = ptr.offset(-1);
- if confirm(*ptr) {
- return Some(sub(ptr, start_ptr));
- }
- }
- None
-}
-
-/// Subtract `b` from `a` and return the difference. `a` should be greater than
-/// or equal to `b`.
-fn sub(a: *const u8, b: *const u8) -> usize {
- debug_assert!(a >= b);
- (a as usize) - (b as usize)
-}
diff --git a/vendor/memchr/src/memchr/iter.rs b/vendor/memchr/src/memchr/iter.rs
deleted file mode 100644
index 16e203f63..000000000
--- a/vendor/memchr/src/memchr/iter.rs
+++ /dev/null
@@ -1,173 +0,0 @@
-use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
-
-macro_rules! iter_next {
- // Common code for the memchr iterators:
- // update haystack and position and produce the index
- //
- // self: &mut Self where Self is the iterator
- // search_result: Option<usize> which is the result of the corresponding
- // memchr function.
- //
- // Returns Option<usize> (the next iterator element)
- ($self_:expr, $search_result:expr) => {
- $search_result.map(move |index| {
- // split and take the remaining back half
- $self_.haystack = $self_.haystack.split_at(index + 1).1;
- let found_position = $self_.position + index;
- $self_.position = found_position + 1;
- found_position
- })
- };
-}
-
-macro_rules! iter_next_back {
- ($self_:expr, $search_result:expr) => {
- $search_result.map(move |index| {
- // split and take the remaining front half
- $self_.haystack = $self_.haystack.split_at(index).0;
- $self_.position + index
- })
- };
-}
-
-/// An iterator for `memchr`.
-pub struct Memchr<'a> {
- needle: u8,
- // The haystack to iterate over
- haystack: &'a [u8],
- // The index
- position: usize,
-}
-
-impl<'a> Memchr<'a> {
- /// Creates a new iterator that yields all positions of needle in haystack.
- #[inline]
- pub fn new(needle: u8, haystack: &[u8]) -> Memchr<'_> {
- Memchr { needle: needle, haystack: haystack, position: 0 }
- }
-}
-
-impl<'a> Iterator for Memchr<'a> {
- type Item = usize;
-
- #[inline]
- fn next(&mut self) -> Option<usize> {
- iter_next!(self, memchr(self.needle, self.haystack))
- }
-
- #[inline]
- fn size_hint(&self) -> (usize, Option<usize>) {
- (0, Some(self.haystack.len()))
- }
-}
-
-impl<'a> DoubleEndedIterator for Memchr<'a> {
- #[inline]
- fn next_back(&mut self) -> Option<Self::Item> {
- iter_next_back!(self, memrchr(self.needle, self.haystack))
- }
-}
-
-/// An iterator for `memchr2`.
-pub struct Memchr2<'a> {
- needle1: u8,
- needle2: u8,
- // The haystack to iterate over
- haystack: &'a [u8],
- // The index
- position: usize,
-}
-
-impl<'a> Memchr2<'a> {
- /// Creates a new iterator that yields all positions of needle in haystack.
- #[inline]
- pub fn new(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2<'_> {
- Memchr2 {
- needle1: needle1,
- needle2: needle2,
- haystack: haystack,
- position: 0,
- }
- }
-}
-
-impl<'a> Iterator for Memchr2<'a> {
- type Item = usize;
-
- #[inline]
- fn next(&mut self) -> Option<usize> {
- iter_next!(self, memchr2(self.needle1, self.needle2, self.haystack))
- }
-
- #[inline]
- fn size_hint(&self) -> (usize, Option<usize>) {
- (0, Some(self.haystack.len()))
- }
-}
-
-impl<'a> DoubleEndedIterator for Memchr2<'a> {
- #[inline]
- fn next_back(&mut self) -> Option<Self::Item> {
- iter_next_back!(
- self,
- memrchr2(self.needle1, self.needle2, self.haystack)
- )
- }
-}
-
-/// An iterator for `memchr3`.
-pub struct Memchr3<'a> {
- needle1: u8,
- needle2: u8,
- needle3: u8,
- // The haystack to iterate over
- haystack: &'a [u8],
- // The index
- position: usize,
-}
-
-impl<'a> Memchr3<'a> {
- /// Create a new `Memchr3` that's initialized to zero with a haystack
- #[inline]
- pub fn new(
- needle1: u8,
- needle2: u8,
- needle3: u8,
- haystack: &[u8],
- ) -> Memchr3<'_> {
- Memchr3 {
- needle1: needle1,
- needle2: needle2,
- needle3: needle3,
- haystack: haystack,
- position: 0,
- }
- }
-}
-
-impl<'a> Iterator for Memchr3<'a> {
- type Item = usize;
-
- #[inline]
- fn next(&mut self) -> Option<usize> {
- iter_next!(
- self,
- memchr3(self.needle1, self.needle2, self.needle3, self.haystack)
- )
- }
-
- #[inline]
- fn size_hint(&self) -> (usize, Option<usize>) {
- (0, Some(self.haystack.len()))
- }
-}
-
-impl<'a> DoubleEndedIterator for Memchr3<'a> {
- #[inline]
- fn next_back(&mut self) -> Option<Self::Item> {
- iter_next_back!(
- self,
- memrchr3(self.needle1, self.needle2, self.needle3, self.haystack)
- )
- }
-}
diff --git a/vendor/memchr/src/memchr/mod.rs b/vendor/memchr/src/memchr/mod.rs
deleted file mode 100644
index 09ce6ef3c..000000000
--- a/vendor/memchr/src/memchr/mod.rs
+++ /dev/null
@@ -1,410 +0,0 @@
-use core::iter::Rev;
-
-pub use self::iter::{Memchr, Memchr2, Memchr3};
-
-// N.B. If you're looking for the cfg knobs for libc, see build.rs.
-#[cfg(memchr_libc)]
-mod c;
-#[allow(dead_code)]
-pub mod fallback;
-mod iter;
-pub mod naive;
-#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
-mod x86;
-
-/// An iterator over all occurrences of the needle in a haystack.
-#[inline]
-pub fn memchr_iter(needle: u8, haystack: &[u8]) -> Memchr<'_> {
- Memchr::new(needle, haystack)
-}
-
-/// An iterator over all occurrences of the needles in a haystack.
-#[inline]
-pub fn memchr2_iter(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2<'_> {
- Memchr2::new(needle1, needle2, haystack)
-}
-
-/// An iterator over all occurrences of the needles in a haystack.
-#[inline]
-pub fn memchr3_iter(
- needle1: u8,
- needle2: u8,
- needle3: u8,
- haystack: &[u8],
-) -> Memchr3<'_> {
- Memchr3::new(needle1, needle2, needle3, haystack)
-}
-
-/// An iterator over all occurrences of the needle in a haystack, in reverse.
-#[inline]
-pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev<Memchr<'_>> {
- Memchr::new(needle, haystack).rev()
-}
-
-/// An iterator over all occurrences of the needles in a haystack, in reverse.
-#[inline]
-pub fn memrchr2_iter(
- needle1: u8,
- needle2: u8,
- haystack: &[u8],
-) -> Rev<Memchr2<'_>> {
- Memchr2::new(needle1, needle2, haystack).rev()
-}
-
-/// An iterator over all occurrences of the needles in a haystack, in reverse.
-#[inline]
-pub fn memrchr3_iter(
- needle1: u8,
- needle2: u8,
- needle3: u8,
- haystack: &[u8],
-) -> Rev<Memchr3<'_>> {
- Memchr3::new(needle1, needle2, needle3, haystack).rev()
-}
-
-/// Search for the first occurrence of a byte in a slice.
-///
-/// This returns the index corresponding to the first occurrence of `needle` in
-/// `haystack`, or `None` if one is not found. If an index is returned, it is
-/// guaranteed to be less than `usize::MAX`.
-///
-/// While this is operationally the same as something like
-/// `haystack.iter().position(|&b| b == needle)`, `memchr` will use a highly
-/// optimized routine that can be up to an order of magnitude faster in some
-/// cases.
-///
-/// # Example
-///
-/// This shows how to find the first position of a byte in a byte string.
-///
-/// ```
-/// use memchr::memchr;
-///
-/// let haystack = b"the quick brown fox";
-/// assert_eq!(memchr(b'k', haystack), Some(8));
-/// ```
-#[inline]
-pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
- #[cfg(miri)]
- #[inline(always)]
- fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
- naive::memchr(n1, haystack)
- }
-
- #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
- #[inline(always)]
- fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
- x86::memchr(n1, haystack)
- }
-
- #[cfg(all(
- memchr_libc,
- not(all(target_arch = "x86_64", memchr_runtime_simd)),
- not(miri),
- ))]
- #[inline(always)]
- fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
- c::memchr(n1, haystack)
- }
-
- #[cfg(all(
- not(memchr_libc),
- not(all(target_arch = "x86_64", memchr_runtime_simd)),
- not(miri),
- ))]
- #[inline(always)]
- fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
- fallback::memchr(n1, haystack)
- }
-
- if haystack.is_empty() {
- None
- } else {
- imp(needle, haystack)
- }
-}
-
-/// Like `memchr`, but searches for either of two bytes instead of just one.
-///
-/// This returns the index corresponding to the first occurrence of `needle1`
-/// or the first occurrence of `needle2` in `haystack` (whichever occurs
-/// earlier), or `None` if neither one is found. If an index is returned, it is
-/// guaranteed to be less than `usize::MAX`.
-///
-/// While this is operationally the same as something like
-/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, `memchr2`
-/// will use a highly optimized routine that can be up to an order of magnitude
-/// faster in some cases.
-///
-/// # Example
-///
-/// This shows how to find the first position of either of two bytes in a byte
-/// string.
-///
-/// ```
-/// use memchr::memchr2;
-///
-/// let haystack = b"the quick brown fox";
-/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4));
-/// ```
-#[inline]
-pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
- #[cfg(miri)]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- naive::memchr2(n1, n2, haystack)
- }
-
- #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- x86::memchr2(n1, n2, haystack)
- }
-
- #[cfg(all(
- not(all(target_arch = "x86_64", memchr_runtime_simd)),
- not(miri),
- ))]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- fallback::memchr2(n1, n2, haystack)
- }
-
- if haystack.is_empty() {
- None
- } else {
- imp(needle1, needle2, haystack)
- }
-}
-
-/// Like `memchr`, but searches for any of three bytes instead of just one.
-///
-/// This returns the index corresponding to the first occurrence of `needle1`,
-/// the first occurrence of `needle2`, or the first occurrence of `needle3` in
-/// `haystack` (whichever occurs earliest), or `None` if none are found. If an
-/// index is returned, it is guaranteed to be less than `usize::MAX`.
-///
-/// While this is operationally the same as something like
-/// `haystack.iter().position(|&b| b == needle1 || b == needle2 ||
-/// b == needle3)`, `memchr3` will use a highly optimized routine that can be
-/// up to an order of magnitude faster in some cases.
-///
-/// # Example
-///
-/// This shows how to find the first position of any of three bytes in a byte
-/// string.
-///
-/// ```
-/// use memchr::memchr3;
-///
-/// let haystack = b"the quick brown fox";
-/// assert_eq!(memchr3(b'k', b'q', b'e', haystack), Some(2));
-/// ```
-#[inline]
-pub fn memchr3(
- needle1: u8,
- needle2: u8,
- needle3: u8,
- haystack: &[u8],
-) -> Option<usize> {
- #[cfg(miri)]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- naive::memchr3(n1, n2, n3, haystack)
- }
-
- #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- x86::memchr3(n1, n2, n3, haystack)
- }
-
- #[cfg(all(
- not(all(target_arch = "x86_64", memchr_runtime_simd)),
- not(miri),
- ))]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- fallback::memchr3(n1, n2, n3, haystack)
- }
-
- if haystack.is_empty() {
- None
- } else {
- imp(needle1, needle2, needle3, haystack)
- }
-}
-
-/// Search for the last occurrence of a byte in a slice.
-///
-/// This returns the index corresponding to the last occurrence of `needle` in
-/// `haystack`, or `None` if one is not found. If an index is returned, it is
-/// guaranteed to be less than `usize::MAX`.
-///
-/// While this is operationally the same as something like
-/// `haystack.iter().rposition(|&b| b == needle)`, `memrchr` will use a highly
-/// optimized routine that can be up to an order of magnitude faster in some
-/// cases.
-///
-/// # Example
-///
-/// This shows how to find the last position of a byte in a byte string.
-///
-/// ```
-/// use memchr::memrchr;
-///
-/// let haystack = b"the quick brown fox";
-/// assert_eq!(memrchr(b'o', haystack), Some(17));
-/// ```
-#[inline]
-pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
- #[cfg(miri)]
- #[inline(always)]
- fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
- naive::memrchr(n1, haystack)
- }
-
- #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
- #[inline(always)]
- fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
- x86::memrchr(n1, haystack)
- }
-
- #[cfg(all(
- memchr_libc,
- target_os = "linux",
- not(all(target_arch = "x86_64", memchr_runtime_simd)),
- not(miri)
- ))]
- #[inline(always)]
- fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
- c::memrchr(n1, haystack)
- }
-
- #[cfg(all(
- not(all(memchr_libc, target_os = "linux")),
- not(all(target_arch = "x86_64", memchr_runtime_simd)),
- not(miri),
- ))]
- #[inline(always)]
- fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
- fallback::memrchr(n1, haystack)
- }
-
- if haystack.is_empty() {
- None
- } else {
- imp(needle, haystack)
- }
-}
-
-/// Like `memrchr`, but searches for either of two bytes instead of just one.
-///
-/// This returns the index corresponding to the last occurrence of `needle1` or
-/// the last occurrence of `needle2` in `haystack` (whichever occurs later), or
-/// `None` if neither one is found. If an index is returned, it is guaranteed
-/// to be less than `usize::MAX`.
-///
-/// While this is operationally the same as something like
-/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, `memrchr2`
-/// will use a highly optimized routine that can be up to an order of magnitude
-/// faster in some cases.
-///
-/// # Example
-///
-/// This shows how to find the last position of either of two bytes in a byte
-/// string.
-///
-/// ```
-/// use memchr::memrchr2;
-///
-/// let haystack = b"the quick brown fox";
-/// assert_eq!(memrchr2(b'k', b'q', haystack), Some(8));
-/// ```
-#[inline]
-pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
- #[cfg(miri)]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- naive::memrchr2(n1, n2, haystack)
- }
-
- #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- x86::memrchr2(n1, n2, haystack)
- }
-
- #[cfg(all(
- not(all(target_arch = "x86_64", memchr_runtime_simd)),
- not(miri),
- ))]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- fallback::memrchr2(n1, n2, haystack)
- }
-
- if haystack.is_empty() {
- None
- } else {
- imp(needle1, needle2, haystack)
- }
-}
-
-/// Like `memrchr`, but searches for any of three bytes instead of just one.
-///
-/// This returns the index corresponding to the last occurrence of `needle1`,
-/// the last occurrence of `needle2`, or the last occurrence of `needle3` in
-/// `haystack` (whichever occurs later), or `None` if none are found. If an
-/// index is returned, it is guaranteed to be less than `usize::MAX`.
-///
-/// While this is operationally the same as something like
-/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 ||
-/// b == needle3)`, `memrchr3` will use a highly optimized routine that can be
-/// up to an order of magnitude faster in some cases.
-///
-/// # Example
-///
-/// This shows how to find the last position of any of three bytes in a byte
-/// string.
-///
-/// ```
-/// use memchr::memrchr3;
-///
-/// let haystack = b"the quick brown fox";
-/// assert_eq!(memrchr3(b'k', b'q', b'e', haystack), Some(8));
-/// ```
-#[inline]
-pub fn memrchr3(
- needle1: u8,
- needle2: u8,
- needle3: u8,
- haystack: &[u8],
-) -> Option<usize> {
- #[cfg(miri)]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- naive::memrchr3(n1, n2, n3, haystack)
- }
-
- #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- x86::memrchr3(n1, n2, n3, haystack)
- }
-
- #[cfg(all(
- not(all(target_arch = "x86_64", memchr_runtime_simd)),
- not(miri),
- ))]
- #[inline(always)]
- fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- fallback::memrchr3(n1, n2, n3, haystack)
- }
-
- if haystack.is_empty() {
- None
- } else {
- imp(needle1, needle2, needle3, haystack)
- }
-}
diff --git a/vendor/memchr/src/memchr/naive.rs b/vendor/memchr/src/memchr/naive.rs
deleted file mode 100644
index 3f3053d48..000000000
--- a/vendor/memchr/src/memchr/naive.rs
+++ /dev/null
@@ -1,25 +0,0 @@
-#![allow(dead_code)]
-
-pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- haystack.iter().position(|&b| b == n1)
-}
-
-pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- haystack.iter().position(|&b| b == n1 || b == n2)
-}
-
-pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- haystack.iter().position(|&b| b == n1 || b == n2 || b == n3)
-}
-
-pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- haystack.iter().rposition(|&b| b == n1)
-}
-
-pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- haystack.iter().rposition(|&b| b == n1 || b == n2)
-}
-
-pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3)
-}
diff --git a/vendor/memchr/src/memchr/x86/avx.rs b/vendor/memchr/src/memchr/x86/avx.rs
deleted file mode 100644
index 535123097..000000000
--- a/vendor/memchr/src/memchr/x86/avx.rs
+++ /dev/null
@@ -1,755 +0,0 @@
-use core::{arch::x86_64::*, cmp, mem::size_of};
-
-use super::sse2;
-
-const VECTOR_SIZE: usize = size_of::<__m256i>();
-const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
-
-// The number of bytes to loop at in one iteration of memchr/memrchr.
-const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
-
-// The number of bytes to loop at in one iteration of memchr2/memrchr2 and
-// memchr3/memrchr3. There was no observable difference between 128 and 64
-// bytes in benchmarks. memchr3 in particular only gets a very slight speed up
-// from the loop unrolling.
-const LOOP_SIZE2: usize = 2 * VECTOR_SIZE;
-
-#[target_feature(enable = "avx2")]
-pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- // For a high level explanation for how this algorithm works, see the
- // sse2 implementation. The avx implementation here is the same, but with
- // 256-bit vectors instead of 128-bit vectors.
-
- // This routine is called whenever a match is detected. It is specifically
- // marked as unlineable because it improves the codegen of the unrolled
- // loop below. Inlining this seems to cause codegen with some extra adds
- // and a load that aren't necessary. This seems to result in about a 10%
- // improvement for the memchr1/crate/huge/never benchmark.
- //
- // Interestingly, I couldn't observe a similar improvement for memrchr.
- #[cold]
- #[inline(never)]
- #[target_feature(enable = "avx2")]
- unsafe fn matched(
- start_ptr: *const u8,
- ptr: *const u8,
- eqa: __m256i,
- eqb: __m256i,
- eqc: __m256i,
- eqd: __m256i,
- ) -> usize {
- let mut at = sub(ptr, start_ptr);
- let mask = _mm256_movemask_epi8(eqa);
- if mask != 0 {
- return at + forward_pos(mask);
- }
-
- at += VECTOR_SIZE;
- let mask = _mm256_movemask_epi8(eqb);
- if mask != 0 {
- return at + forward_pos(mask);
- }
-
- at += VECTOR_SIZE;
- let mask = _mm256_movemask_epi8(eqc);
- if mask != 0 {
- return at + forward_pos(mask);
- }
-
- at += VECTOR_SIZE;
- let mask = _mm256_movemask_epi8(eqd);
- debug_assert!(mask != 0);
- at + forward_pos(mask)
- }
-
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = start_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- // For small haystacks, defer to the SSE2 implementation. Codegen
- // suggests this completely avoids touching the AVX vectors.
- return sse2::memchr(n1, haystack);
- }
-
- let vn1 = _mm256_set1_epi8(n1 as i8);
- let loop_size = cmp::min(LOOP_SIZE, haystack.len());
- if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
- return Some(i);
- }
-
- ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
- debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
- while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- let a = _mm256_load_si256(ptr as *const __m256i);
- let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
- let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i);
- let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i);
- let eqa = _mm256_cmpeq_epi8(vn1, a);
- let eqb = _mm256_cmpeq_epi8(vn1, b);
- let eqc = _mm256_cmpeq_epi8(vn1, c);
- let eqd = _mm256_cmpeq_epi8(vn1, d);
- let or1 = _mm256_or_si256(eqa, eqb);
- let or2 = _mm256_or_si256(eqc, eqd);
- let or3 = _mm256_or_si256(or1, or2);
-
- if _mm256_movemask_epi8(or3) != 0 {
- return Some(matched(start_ptr, ptr, eqa, eqb, eqc, eqd));
- }
- ptr = ptr.add(loop_size);
- }
- while ptr <= end_ptr.sub(VECTOR_SIZE) {
- debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
-
- if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
- return Some(i);
- }
- ptr = ptr.add(VECTOR_SIZE);
- }
- if ptr < end_ptr {
- debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
- ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
- debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
-
- return forward_search1(start_ptr, end_ptr, ptr, vn1);
- }
- None
-}
-
-#[target_feature(enable = "avx2")]
-pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- #[cold]
- #[inline(never)]
- #[target_feature(enable = "avx2")]
- unsafe fn matched(
- start_ptr: *const u8,
- ptr: *const u8,
- eqa1: __m256i,
- eqa2: __m256i,
- eqb1: __m256i,
- eqb2: __m256i,
- ) -> usize {
- let mut at = sub(ptr, start_ptr);
- let mask1 = _mm256_movemask_epi8(eqa1);
- let mask2 = _mm256_movemask_epi8(eqa2);
- if mask1 != 0 || mask2 != 0 {
- return at + forward_pos2(mask1, mask2);
- }
-
- at += VECTOR_SIZE;
- let mask1 = _mm256_movemask_epi8(eqb1);
- let mask2 = _mm256_movemask_epi8(eqb2);
- at + forward_pos2(mask1, mask2)
- }
-
- let vn1 = _mm256_set1_epi8(n1 as i8);
- let vn2 = _mm256_set1_epi8(n2 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE2, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = start_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr < end_ptr {
- if *ptr == n1 || *ptr == n2 {
- return Some(sub(ptr, start_ptr));
- }
- ptr = ptr.offset(1);
- }
- return None;
- }
-
- if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
- return Some(i);
- }
-
- ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
- debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
- while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- let a = _mm256_load_si256(ptr as *const __m256i);
- let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
- let eqa1 = _mm256_cmpeq_epi8(vn1, a);
- let eqb1 = _mm256_cmpeq_epi8(vn1, b);
- let eqa2 = _mm256_cmpeq_epi8(vn2, a);
- let eqb2 = _mm256_cmpeq_epi8(vn2, b);
- let or1 = _mm256_or_si256(eqa1, eqb1);
- let or2 = _mm256_or_si256(eqa2, eqb2);
- let or3 = _mm256_or_si256(or1, or2);
- if _mm256_movemask_epi8(or3) != 0 {
- return Some(matched(start_ptr, ptr, eqa1, eqa2, eqb1, eqb2));
- }
- ptr = ptr.add(loop_size);
- }
- while ptr <= end_ptr.sub(VECTOR_SIZE) {
- if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
- return Some(i);
- }
- ptr = ptr.add(VECTOR_SIZE);
- }
- if ptr < end_ptr {
- debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
- ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
- debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
-
- return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2);
- }
- None
-}
-
-#[target_feature(enable = "avx2")]
-pub unsafe fn memchr3(
- n1: u8,
- n2: u8,
- n3: u8,
- haystack: &[u8],
-) -> Option<usize> {
- #[cold]
- #[inline(never)]
- #[target_feature(enable = "avx2")]
- unsafe fn matched(
- start_ptr: *const u8,
- ptr: *const u8,
- eqa1: __m256i,
- eqa2: __m256i,
- eqa3: __m256i,
- eqb1: __m256i,
- eqb2: __m256i,
- eqb3: __m256i,
- ) -> usize {
- let mut at = sub(ptr, start_ptr);
- let mask1 = _mm256_movemask_epi8(eqa1);
- let mask2 = _mm256_movemask_epi8(eqa2);
- let mask3 = _mm256_movemask_epi8(eqa3);
- if mask1 != 0 || mask2 != 0 || mask3 != 0 {
- return at + forward_pos3(mask1, mask2, mask3);
- }
-
- at += VECTOR_SIZE;
- let mask1 = _mm256_movemask_epi8(eqb1);
- let mask2 = _mm256_movemask_epi8(eqb2);
- let mask3 = _mm256_movemask_epi8(eqb3);
- at + forward_pos3(mask1, mask2, mask3)
- }
-
- let vn1 = _mm256_set1_epi8(n1 as i8);
- let vn2 = _mm256_set1_epi8(n2 as i8);
- let vn3 = _mm256_set1_epi8(n3 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE2, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = start_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr < end_ptr {
- if *ptr == n1 || *ptr == n2 || *ptr == n3 {
- return Some(sub(ptr, start_ptr));
- }
- ptr = ptr.offset(1);
- }
- return None;
- }
-
- if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
- return Some(i);
- }
-
- ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
- debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
- while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- let a = _mm256_load_si256(ptr as *const __m256i);
- let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
- let eqa1 = _mm256_cmpeq_epi8(vn1, a);
- let eqb1 = _mm256_cmpeq_epi8(vn1, b);
- let eqa2 = _mm256_cmpeq_epi8(vn2, a);
- let eqb2 = _mm256_cmpeq_epi8(vn2, b);
- let eqa3 = _mm256_cmpeq_epi8(vn3, a);
- let eqb3 = _mm256_cmpeq_epi8(vn3, b);
- let or1 = _mm256_or_si256(eqa1, eqb1);
- let or2 = _mm256_or_si256(eqa2, eqb2);
- let or3 = _mm256_or_si256(eqa3, eqb3);
- let or4 = _mm256_or_si256(or1, or2);
- let or5 = _mm256_or_si256(or3, or4);
- if _mm256_movemask_epi8(or5) != 0 {
- return Some(matched(
- start_ptr, ptr, eqa1, eqa2, eqa3, eqb1, eqb2, eqb3,
- ));
- }
- ptr = ptr.add(loop_size);
- }
- while ptr <= end_ptr.sub(VECTOR_SIZE) {
- if let Some(i) =
- forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
- {
- return Some(i);
- }
- ptr = ptr.add(VECTOR_SIZE);
- }
- if ptr < end_ptr {
- debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
- ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
- debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
-
- return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
- }
- None
-}
-
-#[target_feature(enable = "avx2")]
-pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = _mm256_set1_epi8(n1 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr > start_ptr {
- ptr = ptr.offset(-1);
- if *ptr == n1 {
- return Some(sub(ptr, start_ptr));
- }
- }
- return None;
- }
-
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
- return Some(i);
- }
-
- ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- ptr = ptr.sub(loop_size);
- let a = _mm256_load_si256(ptr as *const __m256i);
- let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
- let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i);
- let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i);
- let eqa = _mm256_cmpeq_epi8(vn1, a);
- let eqb = _mm256_cmpeq_epi8(vn1, b);
- let eqc = _mm256_cmpeq_epi8(vn1, c);
- let eqd = _mm256_cmpeq_epi8(vn1, d);
- let or1 = _mm256_or_si256(eqa, eqb);
- let or2 = _mm256_or_si256(eqc, eqd);
- let or3 = _mm256_or_si256(or1, or2);
- if _mm256_movemask_epi8(or3) != 0 {
- let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr);
- let mask = _mm256_movemask_epi8(eqd);
- if mask != 0 {
- return Some(at + reverse_pos(mask));
- }
-
- at -= VECTOR_SIZE;
- let mask = _mm256_movemask_epi8(eqc);
- if mask != 0 {
- return Some(at + reverse_pos(mask));
- }
-
- at -= VECTOR_SIZE;
- let mask = _mm256_movemask_epi8(eqb);
- if mask != 0 {
- return Some(at + reverse_pos(mask));
- }
-
- at -= VECTOR_SIZE;
- let mask = _mm256_movemask_epi8(eqa);
- debug_assert!(mask != 0);
- return Some(at + reverse_pos(mask));
- }
- }
- while ptr >= start_ptr.add(VECTOR_SIZE) {
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
- return Some(i);
- }
- }
- if ptr > start_ptr {
- debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
- return reverse_search1(start_ptr, end_ptr, start_ptr, vn1);
- }
- None
-}
-
-#[target_feature(enable = "avx2")]
-pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = _mm256_set1_epi8(n1 as i8);
- let vn2 = _mm256_set1_epi8(n2 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE2, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr > start_ptr {
- ptr = ptr.offset(-1);
- if *ptr == n1 || *ptr == n2 {
- return Some(sub(ptr, start_ptr));
- }
- }
- return None;
- }
-
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
- return Some(i);
- }
-
- ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- ptr = ptr.sub(loop_size);
- let a = _mm256_load_si256(ptr as *const __m256i);
- let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
- let eqa1 = _mm256_cmpeq_epi8(vn1, a);
- let eqb1 = _mm256_cmpeq_epi8(vn1, b);
- let eqa2 = _mm256_cmpeq_epi8(vn2, a);
- let eqb2 = _mm256_cmpeq_epi8(vn2, b);
- let or1 = _mm256_or_si256(eqa1, eqb1);
- let or2 = _mm256_or_si256(eqa2, eqb2);
- let or3 = _mm256_or_si256(or1, or2);
- if _mm256_movemask_epi8(or3) != 0 {
- let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
- let mask1 = _mm256_movemask_epi8(eqb1);
- let mask2 = _mm256_movemask_epi8(eqb2);
- if mask1 != 0 || mask2 != 0 {
- return Some(at + reverse_pos2(mask1, mask2));
- }
-
- at -= VECTOR_SIZE;
- let mask1 = _mm256_movemask_epi8(eqa1);
- let mask2 = _mm256_movemask_epi8(eqa2);
- return Some(at + reverse_pos2(mask1, mask2));
- }
- }
- while ptr >= start_ptr.add(VECTOR_SIZE) {
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
- return Some(i);
- }
- }
- if ptr > start_ptr {
- debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
- return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2);
- }
- None
-}
-
-#[target_feature(enable = "avx2")]
-pub unsafe fn memrchr3(
- n1: u8,
- n2: u8,
- n3: u8,
- haystack: &[u8],
-) -> Option<usize> {
- let vn1 = _mm256_set1_epi8(n1 as i8);
- let vn2 = _mm256_set1_epi8(n2 as i8);
- let vn3 = _mm256_set1_epi8(n3 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE2, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr > start_ptr {
- ptr = ptr.offset(-1);
- if *ptr == n1 || *ptr == n2 || *ptr == n3 {
- return Some(sub(ptr, start_ptr));
- }
- }
- return None;
- }
-
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
- return Some(i);
- }
-
- ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- ptr = ptr.sub(loop_size);
- let a = _mm256_load_si256(ptr as *const __m256i);
- let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
- let eqa1 = _mm256_cmpeq_epi8(vn1, a);
- let eqb1 = _mm256_cmpeq_epi8(vn1, b);
- let eqa2 = _mm256_cmpeq_epi8(vn2, a);
- let eqb2 = _mm256_cmpeq_epi8(vn2, b);
- let eqa3 = _mm256_cmpeq_epi8(vn3, a);
- let eqb3 = _mm256_cmpeq_epi8(vn3, b);
- let or1 = _mm256_or_si256(eqa1, eqb1);
- let or2 = _mm256_or_si256(eqa2, eqb2);
- let or3 = _mm256_or_si256(eqa3, eqb3);
- let or4 = _mm256_or_si256(or1, or2);
- let or5 = _mm256_or_si256(or3, or4);
- if _mm256_movemask_epi8(or5) != 0 {
- let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
- let mask1 = _mm256_movemask_epi8(eqb1);
- let mask2 = _mm256_movemask_epi8(eqb2);
- let mask3 = _mm256_movemask_epi8(eqb3);
- if mask1 != 0 || mask2 != 0 || mask3 != 0 {
- return Some(at + reverse_pos3(mask1, mask2, mask3));
- }
-
- at -= VECTOR_SIZE;
- let mask1 = _mm256_movemask_epi8(eqa1);
- let mask2 = _mm256_movemask_epi8(eqa2);
- let mask3 = _mm256_movemask_epi8(eqa3);
- return Some(at + reverse_pos3(mask1, mask2, mask3));
- }
- }
- while ptr >= start_ptr.add(VECTOR_SIZE) {
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) =
- reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
- {
- return Some(i);
- }
- }
- if ptr > start_ptr {
- debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
- return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3);
- }
- None
-}
-
-#[target_feature(enable = "avx2")]
-unsafe fn forward_search1(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m256i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm256_loadu_si256(ptr as *const __m256i);
- let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vn1));
- if mask != 0 {
- Some(sub(ptr, start_ptr) + forward_pos(mask))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "avx2")]
-unsafe fn forward_search2(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m256i,
- vn2: __m256i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm256_loadu_si256(ptr as *const __m256i);
- let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
- let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
- if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 {
- let mask1 = _mm256_movemask_epi8(eq1);
- let mask2 = _mm256_movemask_epi8(eq2);
- Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "avx2")]
-unsafe fn forward_search3(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m256i,
- vn2: __m256i,
- vn3: __m256i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm256_loadu_si256(ptr as *const __m256i);
- let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
- let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
- let eq3 = _mm256_cmpeq_epi8(chunk, vn3);
- let or = _mm256_or_si256(eq1, eq2);
- if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 {
- let mask1 = _mm256_movemask_epi8(eq1);
- let mask2 = _mm256_movemask_epi8(eq2);
- let mask3 = _mm256_movemask_epi8(eq3);
- Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "avx2")]
-unsafe fn reverse_search1(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m256i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm256_loadu_si256(ptr as *const __m256i);
- let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(vn1, chunk));
- if mask != 0 {
- Some(sub(ptr, start_ptr) + reverse_pos(mask))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "avx2")]
-unsafe fn reverse_search2(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m256i,
- vn2: __m256i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm256_loadu_si256(ptr as *const __m256i);
- let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
- let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
- if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 {
- let mask1 = _mm256_movemask_epi8(eq1);
- let mask2 = _mm256_movemask_epi8(eq2);
- Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "avx2")]
-unsafe fn reverse_search3(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m256i,
- vn2: __m256i,
- vn3: __m256i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm256_loadu_si256(ptr as *const __m256i);
- let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
- let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
- let eq3 = _mm256_cmpeq_epi8(chunk, vn3);
- let or = _mm256_or_si256(eq1, eq2);
- if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 {
- let mask1 = _mm256_movemask_epi8(eq1);
- let mask2 = _mm256_movemask_epi8(eq2);
- let mask3 = _mm256_movemask_epi8(eq3);
- Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3))
- } else {
- None
- }
-}
-
-/// Compute the position of the first matching byte from the given mask. The
-/// position returned is always in the range [0, 31].
-///
-/// The mask given is expected to be the result of _mm256_movemask_epi8.
-fn forward_pos(mask: i32) -> usize {
- // We are dealing with little endian here, where the most significant byte
- // is at a higher address. That means the least significant bit that is set
- // corresponds to the position of our first matching byte. That position
- // corresponds to the number of zeros after the least significant bit.
- mask.trailing_zeros() as usize
-}
-
-/// Compute the position of the first matching byte from the given masks. The
-/// position returned is always in the range [0, 31]. Each mask corresponds to
-/// the equality comparison of a single byte.
-///
-/// The masks given are expected to be the result of _mm256_movemask_epi8,
-/// where at least one of the masks is non-zero (i.e., indicates a match).
-fn forward_pos2(mask1: i32, mask2: i32) -> usize {
- debug_assert!(mask1 != 0 || mask2 != 0);
-
- forward_pos(mask1 | mask2)
-}
-
-/// Compute the position of the first matching byte from the given masks. The
-/// position returned is always in the range [0, 31]. Each mask corresponds to
-/// the equality comparison of a single byte.
-///
-/// The masks given are expected to be the result of _mm256_movemask_epi8,
-/// where at least one of the masks is non-zero (i.e., indicates a match).
-fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
- debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
-
- forward_pos(mask1 | mask2 | mask3)
-}
-
-/// Compute the position of the last matching byte from the given mask. The
-/// position returned is always in the range [0, 31].
-///
-/// The mask given is expected to be the result of _mm256_movemask_epi8.
-fn reverse_pos(mask: i32) -> usize {
- // We are dealing with little endian here, where the most significant byte
- // is at a higher address. That means the most significant bit that is set
- // corresponds to the position of our last matching byte. The position from
- // the end of the mask is therefore the number of leading zeros in a 32
- // bit integer, and the position from the start of the mask is therefore
- // 32 - (leading zeros) - 1.
- VECTOR_SIZE - (mask as u32).leading_zeros() as usize - 1
-}
-
-/// Compute the position of the last matching byte from the given masks. The
-/// position returned is always in the range [0, 31]. Each mask corresponds to
-/// the equality comparison of a single byte.
-///
-/// The masks given are expected to be the result of _mm256_movemask_epi8,
-/// where at least one of the masks is non-zero (i.e., indicates a match).
-fn reverse_pos2(mask1: i32, mask2: i32) -> usize {
- debug_assert!(mask1 != 0 || mask2 != 0);
-
- reverse_pos(mask1 | mask2)
-}
-
-/// Compute the position of the last matching byte from the given masks. The
-/// position returned is always in the range [0, 31]. Each mask corresponds to
-/// the equality comparison of a single byte.
-///
-/// The masks given are expected to be the result of _mm256_movemask_epi8,
-/// where at least one of the masks is non-zero (i.e., indicates a match).
-fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
- debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
-
- reverse_pos(mask1 | mask2 | mask3)
-}
-
-/// Subtract `b` from `a` and return the difference. `a` should be greater than
-/// or equal to `b`.
-fn sub(a: *const u8, b: *const u8) -> usize {
- debug_assert!(a >= b);
- (a as usize) - (b as usize)
-}
diff --git a/vendor/memchr/src/memchr/x86/mod.rs b/vendor/memchr/src/memchr/x86/mod.rs
deleted file mode 100644
index aec35dbff..000000000
--- a/vendor/memchr/src/memchr/x86/mod.rs
+++ /dev/null
@@ -1,148 +0,0 @@
-use super::fallback;
-
-// We only use AVX when we can detect at runtime whether it's available, which
-// requires std.
-#[cfg(feature = "std")]
-mod avx;
-mod sse2;
-
-/// This macro employs a gcc-like "ifunc" trick where by upon first calling
-/// `memchr` (for example), CPU feature detection will be performed at runtime
-/// to determine the best implementation to use. After CPU feature detection
-/// is done, we replace `memchr`'s function pointer with the selection. Upon
-/// subsequent invocations, the CPU-specific routine is invoked directly, which
-/// skips the CPU feature detection and subsequent branch that's required.
-///
-/// While this typically doesn't matter for rare occurrences or when used on
-/// larger haystacks, `memchr` can be called in tight loops where the overhead
-/// of this branch can actually add up *and is measurable*. This trick was
-/// necessary to bring this implementation up to glibc's speeds for the 'tiny'
-/// benchmarks, for example.
-///
-/// At some point, I expect the Rust ecosystem will get a nice macro for doing
-/// exactly this, at which point, we can replace our hand-jammed version of it.
-///
-/// N.B. The ifunc strategy does prevent function inlining of course, but
-/// on modern CPUs, you'll probably end up with the AVX2 implementation,
-/// which probably can't be inlined anyway---unless you've compiled your
-/// entire program with AVX2 enabled. However, even then, the various memchr
-/// implementations aren't exactly small, so inlining might not help anyway!
-///
-/// # Safety
-///
-/// Callers must ensure that fnty is function pointer type.
-#[cfg(feature = "std")]
-macro_rules! unsafe_ifunc {
- ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
- use std::{mem, sync::atomic::{AtomicPtr, Ordering}};
-
- type FnRaw = *mut ();
-
- static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
-
- fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
- let fun =
- if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
- avx::$name as FnRaw
- } else if cfg!(memchr_runtime_sse2) {
- sse2::$name as FnRaw
- } else {
- fallback::$name as FnRaw
- };
- FN.store(fun as FnRaw, Ordering::Relaxed);
- // SAFETY: By virtue of the caller contract, $fnty is a function
- // pointer, which is always safe to transmute with a *mut ().
- // Also, if 'fun is the AVX routine, then it is guaranteed to be
- // supported since we checked the avx2 feature.
- unsafe {
- mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
- }
- }
-
- // SAFETY: By virtue of the caller contract, $fnty is a function
- // pointer, which is always safe to transmute with a *mut (). Also, if
- // 'fun is the AVX routine, then it is guaranteed to be supported since
- // we checked the avx2 feature.
- unsafe {
- let fun = FN.load(Ordering::Relaxed);
- mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
- }
- }}
-}
-
-/// When std isn't available to provide runtime CPU feature detection, or if
-/// runtime CPU feature detection has been explicitly disabled, then just
-/// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64
-/// targets, so no CPU feature detection is necessary.
-///
-/// # Safety
-///
-/// There are no safety requirements for this definition of the macro. It is
-/// safe for all inputs since it is restricted to either the fallback routine
-/// or the SSE routine, which is always safe to call on x86_64.
-#[cfg(not(feature = "std"))]
-macro_rules! unsafe_ifunc {
- ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
- if cfg!(memchr_runtime_sse2) {
- unsafe { sse2::$name($($needle),+, $haystack) }
- } else {
- fallback::$name($($needle),+, $haystack)
- }
- }}
-}
-
-#[inline(always)]
-pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
-}
-
-#[inline(always)]
-pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- unsafe_ifunc!(
- fn(u8, u8, &[u8]) -> Option<usize>,
- memchr2,
- haystack,
- n1,
- n2
- )
-}
-
-#[inline(always)]
-pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- unsafe_ifunc!(
- fn(u8, u8, u8, &[u8]) -> Option<usize>,
- memchr3,
- haystack,
- n1,
- n2,
- n3
- )
-}
-
-#[inline(always)]
-pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
-}
-
-#[inline(always)]
-pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- unsafe_ifunc!(
- fn(u8, u8, &[u8]) -> Option<usize>,
- memrchr2,
- haystack,
- n1,
- n2
- )
-}
-
-#[inline(always)]
-pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
- unsafe_ifunc!(
- fn(u8, u8, u8, &[u8]) -> Option<usize>,
- memrchr3,
- haystack,
- n1,
- n2,
- n3
- )
-}
diff --git a/vendor/memchr/src/memchr/x86/sse2.rs b/vendor/memchr/src/memchr/x86/sse2.rs
deleted file mode 100644
index b7b3a9328..000000000
--- a/vendor/memchr/src/memchr/x86/sse2.rs
+++ /dev/null
@@ -1,791 +0,0 @@
-use core::{arch::x86_64::*, cmp, mem::size_of};
-
-const VECTOR_SIZE: usize = size_of::<__m128i>();
-const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
-
-// The number of bytes to loop at in one iteration of memchr/memrchr.
-const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
-
-// The number of bytes to loop at in one iteration of memchr2/memrchr2 and
-// memchr3/memrchr3. There was no observable difference between 64 and 32 bytes
-// in benchmarks. memchr3 in particular only gets a very slight speed up from
-// the loop unrolling.
-const LOOP_SIZE2: usize = 2 * VECTOR_SIZE;
-
-#[target_feature(enable = "sse2")]
-pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- // What follows is a fast SSE2-only algorithm to detect the position of
- // `n1` in `haystack` if it exists. From what I know, this is the "classic"
- // algorithm. I believe it can be found in places like glibc and Go's
- // standard library. It appears to be well known and is elaborated on in
- // more detail here: https://gms.tf/stdfind-and-memchr-optimizations.html
- //
- // While this routine is very long, the basic idea is actually very simple
- // and can be expressed straight-forwardly in pseudo code:
- //
- // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1
- // // Note: shift amount in bytes
- //
- // while i <= haystack.len() - 16:
- // // A 16 byte vector. Each byte in chunk corresponds to a byte in
- // // the haystack.
- // chunk = haystack[i:i+16]
- // // Compare bytes in needle with bytes in chunk. The result is a 16
- // // byte chunk where each byte is 0xFF if the corresponding bytes
- // // in needle and chunk were equal, or 0x00 otherwise.
- // eqs = cmpeq(needle, chunk)
- // // Return a 32 bit integer where the most significant 16 bits
- // // are always 0 and the lower 16 bits correspond to whether the
- // // most significant bit in the correspond byte in `eqs` is set.
- // // In other words, `mask as u16` has bit i set if and only if
- // // needle[i] == chunk[i].
- // mask = movemask(eqs)
- //
- // // Mask is 0 if there is no match, and non-zero otherwise.
- // if mask != 0:
- // // trailing_zeros tells us the position of the least significant
- // // bit that is set.
- // return i + trailing_zeros(mask)
- //
- // // haystack length may not be a multiple of 16, so search the rest.
- // while i < haystack.len():
- // if haystack[i] == n1:
- // return i
- //
- // // No match found.
- // return NULL
- //
- // In fact, we could loosely translate the above code to Rust line-for-line
- // and it would be a pretty fast algorithm. But, we pull out all the stops
- // to go as fast as possible:
- //
- // 1. We use aligned loads. That is, we do some finagling to make sure our
- // primary loop not only proceeds in increments of 16 bytes, but that
- // the address of haystack's pointer that we dereference is aligned to
- // 16 bytes. 16 is a magic number here because it is the size of SSE2
- // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.)
- // Therefore, to get aligned loads, our pointer's address must be evenly
- // divisible by 16.
- // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's
- // kind of like loop unrolling, but we combine the equality comparisons
- // using a vector OR such that we only need to extract a single mask to
- // determine whether a match exists or not. If so, then we do some
- // book-keeping to determine the precise location but otherwise mush on.
- // 3. We use our "chunk" comparison routine in as many places as possible,
- // even if it means using unaligned loads. In particular, if haystack
- // starts with an unaligned address, then we do an unaligned load to
- // search the first 16 bytes. We then start our primary loop at the
- // smallest subsequent aligned address, which will actually overlap with
- // previously searched bytes. But we're OK with that. We do a similar
- // dance at the end of our primary loop. Finally, to avoid a
- // byte-at-a-time loop at the end, we do a final 16 byte unaligned load
- // that may overlap with a previous load. This is OK because it converts
- // a loop into a small number of very fast vector instructions.
- //
- // The primary downside of this algorithm is that it's effectively
- // completely unsafe. Therefore, we have to be super careful to avoid
- // undefined behavior:
- //
- // 1. We use raw pointers everywhere. Not only does dereferencing a pointer
- // require the pointer to be valid, but we actually can't even store the
- // address of an invalid pointer (unless it's 1 past the end of
- // haystack) without sacrificing performance.
- // 2. _mm_loadu_si128 is used when you don't care about alignment, and
- // _mm_load_si128 is used when you do care. You cannot use the latter
- // on unaligned pointers.
- // 3. We make liberal use of debug_assert! to check assumptions.
- // 4. We make a concerted effort to stick with pointers instead of indices.
- // Indices are nicer because there's less to worry about with them (see
- // above about pointer offsets), but I could not get the compiler to
- // produce as good of code as what the below produces. In any case,
- // pointers are what we really care about here, and alignment is
- // expressed a bit more naturally with them.
- //
- // In general, most of the algorithms in this crate have a similar
- // structure to what you see below, so this comment applies fairly well to
- // all of them.
-
- let vn1 = _mm_set1_epi8(n1 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = start_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr < end_ptr {
- if *ptr == n1 {
- return Some(sub(ptr, start_ptr));
- }
- ptr = ptr.offset(1);
- }
- return None;
- }
-
- if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
- return Some(i);
- }
-
- ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
- debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
- while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- let a = _mm_load_si128(ptr as *const __m128i);
- let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
- let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
- let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
- let eqa = _mm_cmpeq_epi8(vn1, a);
- let eqb = _mm_cmpeq_epi8(vn1, b);
- let eqc = _mm_cmpeq_epi8(vn1, c);
- let eqd = _mm_cmpeq_epi8(vn1, d);
- let or1 = _mm_or_si128(eqa, eqb);
- let or2 = _mm_or_si128(eqc, eqd);
- let or3 = _mm_or_si128(or1, or2);
- if _mm_movemask_epi8(or3) != 0 {
- let mut at = sub(ptr, start_ptr);
- let mask = _mm_movemask_epi8(eqa);
- if mask != 0 {
- return Some(at + forward_pos(mask));
- }
-
- at += VECTOR_SIZE;
- let mask = _mm_movemask_epi8(eqb);
- if mask != 0 {
- return Some(at + forward_pos(mask));
- }
-
- at += VECTOR_SIZE;
- let mask = _mm_movemask_epi8(eqc);
- if mask != 0 {
- return Some(at + forward_pos(mask));
- }
-
- at += VECTOR_SIZE;
- let mask = _mm_movemask_epi8(eqd);
- debug_assert!(mask != 0);
- return Some(at + forward_pos(mask));
- }
- ptr = ptr.add(loop_size);
- }
- while ptr <= end_ptr.sub(VECTOR_SIZE) {
- debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
-
- if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
- return Some(i);
- }
- ptr = ptr.add(VECTOR_SIZE);
- }
- if ptr < end_ptr {
- debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
- ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
- debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
-
- return forward_search1(start_ptr, end_ptr, ptr, vn1);
- }
- None
-}
-
-#[target_feature(enable = "sse2")]
-pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = _mm_set1_epi8(n1 as i8);
- let vn2 = _mm_set1_epi8(n2 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE2, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = start_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr < end_ptr {
- if *ptr == n1 || *ptr == n2 {
- return Some(sub(ptr, start_ptr));
- }
- ptr = ptr.offset(1);
- }
- return None;
- }
-
- if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
- return Some(i);
- }
-
- ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
- debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
- while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- let a = _mm_load_si128(ptr as *const __m128i);
- let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
- let eqa1 = _mm_cmpeq_epi8(vn1, a);
- let eqb1 = _mm_cmpeq_epi8(vn1, b);
- let eqa2 = _mm_cmpeq_epi8(vn2, a);
- let eqb2 = _mm_cmpeq_epi8(vn2, b);
- let or1 = _mm_or_si128(eqa1, eqb1);
- let or2 = _mm_or_si128(eqa2, eqb2);
- let or3 = _mm_or_si128(or1, or2);
- if _mm_movemask_epi8(or3) != 0 {
- let mut at = sub(ptr, start_ptr);
- let mask1 = _mm_movemask_epi8(eqa1);
- let mask2 = _mm_movemask_epi8(eqa2);
- if mask1 != 0 || mask2 != 0 {
- return Some(at + forward_pos2(mask1, mask2));
- }
-
- at += VECTOR_SIZE;
- let mask1 = _mm_movemask_epi8(eqb1);
- let mask2 = _mm_movemask_epi8(eqb2);
- return Some(at + forward_pos2(mask1, mask2));
- }
- ptr = ptr.add(loop_size);
- }
- while ptr <= end_ptr.sub(VECTOR_SIZE) {
- if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
- return Some(i);
- }
- ptr = ptr.add(VECTOR_SIZE);
- }
- if ptr < end_ptr {
- debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
- ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
- debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
-
- return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2);
- }
- None
-}
-
-#[target_feature(enable = "sse2")]
-pub unsafe fn memchr3(
- n1: u8,
- n2: u8,
- n3: u8,
- haystack: &[u8],
-) -> Option<usize> {
- let vn1 = _mm_set1_epi8(n1 as i8);
- let vn2 = _mm_set1_epi8(n2 as i8);
- let vn3 = _mm_set1_epi8(n3 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE2, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = start_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr < end_ptr {
- if *ptr == n1 || *ptr == n2 || *ptr == n3 {
- return Some(sub(ptr, start_ptr));
- }
- ptr = ptr.offset(1);
- }
- return None;
- }
-
- if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
- return Some(i);
- }
-
- ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
- debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
- while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- let a = _mm_load_si128(ptr as *const __m128i);
- let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
- let eqa1 = _mm_cmpeq_epi8(vn1, a);
- let eqb1 = _mm_cmpeq_epi8(vn1, b);
- let eqa2 = _mm_cmpeq_epi8(vn2, a);
- let eqb2 = _mm_cmpeq_epi8(vn2, b);
- let eqa3 = _mm_cmpeq_epi8(vn3, a);
- let eqb3 = _mm_cmpeq_epi8(vn3, b);
- let or1 = _mm_or_si128(eqa1, eqb1);
- let or2 = _mm_or_si128(eqa2, eqb2);
- let or3 = _mm_or_si128(eqa3, eqb3);
- let or4 = _mm_or_si128(or1, or2);
- let or5 = _mm_or_si128(or3, or4);
- if _mm_movemask_epi8(or5) != 0 {
- let mut at = sub(ptr, start_ptr);
- let mask1 = _mm_movemask_epi8(eqa1);
- let mask2 = _mm_movemask_epi8(eqa2);
- let mask3 = _mm_movemask_epi8(eqa3);
- if mask1 != 0 || mask2 != 0 || mask3 != 0 {
- return Some(at + forward_pos3(mask1, mask2, mask3));
- }
-
- at += VECTOR_SIZE;
- let mask1 = _mm_movemask_epi8(eqb1);
- let mask2 = _mm_movemask_epi8(eqb2);
- let mask3 = _mm_movemask_epi8(eqb3);
- return Some(at + forward_pos3(mask1, mask2, mask3));
- }
- ptr = ptr.add(loop_size);
- }
- while ptr <= end_ptr.sub(VECTOR_SIZE) {
- if let Some(i) =
- forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
- {
- return Some(i);
- }
- ptr = ptr.add(VECTOR_SIZE);
- }
- if ptr < end_ptr {
- debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
- ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
- debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
-
- return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
- }
- None
-}
-
-#[target_feature(enable = "sse2")]
-pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = _mm_set1_epi8(n1 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr > start_ptr {
- ptr = ptr.offset(-1);
- if *ptr == n1 {
- return Some(sub(ptr, start_ptr));
- }
- }
- return None;
- }
-
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
- return Some(i);
- }
-
- ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- ptr = ptr.sub(loop_size);
- let a = _mm_load_si128(ptr as *const __m128i);
- let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
- let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
- let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
- let eqa = _mm_cmpeq_epi8(vn1, a);
- let eqb = _mm_cmpeq_epi8(vn1, b);
- let eqc = _mm_cmpeq_epi8(vn1, c);
- let eqd = _mm_cmpeq_epi8(vn1, d);
- let or1 = _mm_or_si128(eqa, eqb);
- let or2 = _mm_or_si128(eqc, eqd);
- let or3 = _mm_or_si128(or1, or2);
- if _mm_movemask_epi8(or3) != 0 {
- let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr);
- let mask = _mm_movemask_epi8(eqd);
- if mask != 0 {
- return Some(at + reverse_pos(mask));
- }
-
- at -= VECTOR_SIZE;
- let mask = _mm_movemask_epi8(eqc);
- if mask != 0 {
- return Some(at + reverse_pos(mask));
- }
-
- at -= VECTOR_SIZE;
- let mask = _mm_movemask_epi8(eqb);
- if mask != 0 {
- return Some(at + reverse_pos(mask));
- }
-
- at -= VECTOR_SIZE;
- let mask = _mm_movemask_epi8(eqa);
- debug_assert!(mask != 0);
- return Some(at + reverse_pos(mask));
- }
- }
- while ptr >= start_ptr.add(VECTOR_SIZE) {
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
- return Some(i);
- }
- }
- if ptr > start_ptr {
- debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
- return reverse_search1(start_ptr, end_ptr, start_ptr, vn1);
- }
- None
-}
-
-#[target_feature(enable = "sse2")]
-pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
- let vn1 = _mm_set1_epi8(n1 as i8);
- let vn2 = _mm_set1_epi8(n2 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE2, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr > start_ptr {
- ptr = ptr.offset(-1);
- if *ptr == n1 || *ptr == n2 {
- return Some(sub(ptr, start_ptr));
- }
- }
- return None;
- }
-
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
- return Some(i);
- }
-
- ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- ptr = ptr.sub(loop_size);
- let a = _mm_load_si128(ptr as *const __m128i);
- let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
- let eqa1 = _mm_cmpeq_epi8(vn1, a);
- let eqb1 = _mm_cmpeq_epi8(vn1, b);
- let eqa2 = _mm_cmpeq_epi8(vn2, a);
- let eqb2 = _mm_cmpeq_epi8(vn2, b);
- let or1 = _mm_or_si128(eqa1, eqb1);
- let or2 = _mm_or_si128(eqa2, eqb2);
- let or3 = _mm_or_si128(or1, or2);
- if _mm_movemask_epi8(or3) != 0 {
- let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
- let mask1 = _mm_movemask_epi8(eqb1);
- let mask2 = _mm_movemask_epi8(eqb2);
- if mask1 != 0 || mask2 != 0 {
- return Some(at + reverse_pos2(mask1, mask2));
- }
-
- at -= VECTOR_SIZE;
- let mask1 = _mm_movemask_epi8(eqa1);
- let mask2 = _mm_movemask_epi8(eqa2);
- return Some(at + reverse_pos2(mask1, mask2));
- }
- }
- while ptr >= start_ptr.add(VECTOR_SIZE) {
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
- return Some(i);
- }
- }
- if ptr > start_ptr {
- debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
- return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2);
- }
- None
-}
-
-#[target_feature(enable = "sse2")]
-pub unsafe fn memrchr3(
- n1: u8,
- n2: u8,
- n3: u8,
- haystack: &[u8],
-) -> Option<usize> {
- let vn1 = _mm_set1_epi8(n1 as i8);
- let vn2 = _mm_set1_epi8(n2 as i8);
- let vn3 = _mm_set1_epi8(n3 as i8);
- let len = haystack.len();
- let loop_size = cmp::min(LOOP_SIZE2, len);
- let start_ptr = haystack.as_ptr();
- let end_ptr = start_ptr.add(haystack.len());
- let mut ptr = end_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr > start_ptr {
- ptr = ptr.offset(-1);
- if *ptr == n1 || *ptr == n2 || *ptr == n3 {
- return Some(sub(ptr, start_ptr));
- }
- }
- return None;
- }
-
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
- return Some(i);
- }
-
- ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
- debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
- while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
- debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
-
- ptr = ptr.sub(loop_size);
- let a = _mm_load_si128(ptr as *const __m128i);
- let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
- let eqa1 = _mm_cmpeq_epi8(vn1, a);
- let eqb1 = _mm_cmpeq_epi8(vn1, b);
- let eqa2 = _mm_cmpeq_epi8(vn2, a);
- let eqb2 = _mm_cmpeq_epi8(vn2, b);
- let eqa3 = _mm_cmpeq_epi8(vn3, a);
- let eqb3 = _mm_cmpeq_epi8(vn3, b);
- let or1 = _mm_or_si128(eqa1, eqb1);
- let or2 = _mm_or_si128(eqa2, eqb2);
- let or3 = _mm_or_si128(eqa3, eqb3);
- let or4 = _mm_or_si128(or1, or2);
- let or5 = _mm_or_si128(or3, or4);
- if _mm_movemask_epi8(or5) != 0 {
- let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
- let mask1 = _mm_movemask_epi8(eqb1);
- let mask2 = _mm_movemask_epi8(eqb2);
- let mask3 = _mm_movemask_epi8(eqb3);
- if mask1 != 0 || mask2 != 0 || mask3 != 0 {
- return Some(at + reverse_pos3(mask1, mask2, mask3));
- }
-
- at -= VECTOR_SIZE;
- let mask1 = _mm_movemask_epi8(eqa1);
- let mask2 = _mm_movemask_epi8(eqa2);
- let mask3 = _mm_movemask_epi8(eqa3);
- return Some(at + reverse_pos3(mask1, mask2, mask3));
- }
- }
- while ptr >= start_ptr.add(VECTOR_SIZE) {
- ptr = ptr.sub(VECTOR_SIZE);
- if let Some(i) =
- reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
- {
- return Some(i);
- }
- }
- if ptr > start_ptr {
- debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
- return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3);
- }
- None
-}
-
-#[target_feature(enable = "sse2")]
-pub unsafe fn forward_search1(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m128i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm_loadu_si128(ptr as *const __m128i);
- let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vn1));
- if mask != 0 {
- Some(sub(ptr, start_ptr) + forward_pos(mask))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "sse2")]
-unsafe fn forward_search2(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m128i,
- vn2: __m128i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm_loadu_si128(ptr as *const __m128i);
- let eq1 = _mm_cmpeq_epi8(chunk, vn1);
- let eq2 = _mm_cmpeq_epi8(chunk, vn2);
- if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 {
- let mask1 = _mm_movemask_epi8(eq1);
- let mask2 = _mm_movemask_epi8(eq2);
- Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "sse2")]
-pub unsafe fn forward_search3(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m128i,
- vn2: __m128i,
- vn3: __m128i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm_loadu_si128(ptr as *const __m128i);
- let eq1 = _mm_cmpeq_epi8(chunk, vn1);
- let eq2 = _mm_cmpeq_epi8(chunk, vn2);
- let eq3 = _mm_cmpeq_epi8(chunk, vn3);
- let or = _mm_or_si128(eq1, eq2);
- if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 {
- let mask1 = _mm_movemask_epi8(eq1);
- let mask2 = _mm_movemask_epi8(eq2);
- let mask3 = _mm_movemask_epi8(eq3);
- Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "sse2")]
-unsafe fn reverse_search1(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m128i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm_loadu_si128(ptr as *const __m128i);
- let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(vn1, chunk));
- if mask != 0 {
- Some(sub(ptr, start_ptr) + reverse_pos(mask))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "sse2")]
-unsafe fn reverse_search2(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m128i,
- vn2: __m128i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm_loadu_si128(ptr as *const __m128i);
- let eq1 = _mm_cmpeq_epi8(chunk, vn1);
- let eq2 = _mm_cmpeq_epi8(chunk, vn2);
- if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 {
- let mask1 = _mm_movemask_epi8(eq1);
- let mask2 = _mm_movemask_epi8(eq2);
- Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2))
- } else {
- None
- }
-}
-
-#[target_feature(enable = "sse2")]
-unsafe fn reverse_search3(
- start_ptr: *const u8,
- end_ptr: *const u8,
- ptr: *const u8,
- vn1: __m128i,
- vn2: __m128i,
- vn3: __m128i,
-) -> Option<usize> {
- debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
- debug_assert!(start_ptr <= ptr);
- debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
-
- let chunk = _mm_loadu_si128(ptr as *const __m128i);
- let eq1 = _mm_cmpeq_epi8(chunk, vn1);
- let eq2 = _mm_cmpeq_epi8(chunk, vn2);
- let eq3 = _mm_cmpeq_epi8(chunk, vn3);
- let or = _mm_or_si128(eq1, eq2);
- if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 {
- let mask1 = _mm_movemask_epi8(eq1);
- let mask2 = _mm_movemask_epi8(eq2);
- let mask3 = _mm_movemask_epi8(eq3);
- Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3))
- } else {
- None
- }
-}
-
-/// Compute the position of the first matching byte from the given mask. The
-/// position returned is always in the range [0, 15].
-///
-/// The mask given is expected to be the result of _mm_movemask_epi8.
-fn forward_pos(mask: i32) -> usize {
- // We are dealing with little endian here, where the most significant byte
- // is at a higher address. That means the least significant bit that is set
- // corresponds to the position of our first matching byte. That position
- // corresponds to the number of zeros after the least significant bit.
- mask.trailing_zeros() as usize
-}
-
-/// Compute the position of the first matching byte from the given masks. The
-/// position returned is always in the range [0, 15]. Each mask corresponds to
-/// the equality comparison of a single byte.
-///
-/// The masks given are expected to be the result of _mm_movemask_epi8, where
-/// at least one of the masks is non-zero (i.e., indicates a match).
-fn forward_pos2(mask1: i32, mask2: i32) -> usize {
- debug_assert!(mask1 != 0 || mask2 != 0);
-
- forward_pos(mask1 | mask2)
-}
-
-/// Compute the position of the first matching byte from the given masks. The
-/// position returned is always in the range [0, 15]. Each mask corresponds to
-/// the equality comparison of a single byte.
-///
-/// The masks given are expected to be the result of _mm_movemask_epi8, where
-/// at least one of the masks is non-zero (i.e., indicates a match).
-fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
- debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
-
- forward_pos(mask1 | mask2 | mask3)
-}
-
-/// Compute the position of the last matching byte from the given mask. The
-/// position returned is always in the range [0, 15].
-///
-/// The mask given is expected to be the result of _mm_movemask_epi8.
-fn reverse_pos(mask: i32) -> usize {
- // We are dealing with little endian here, where the most significant byte
- // is at a higher address. That means the most significant bit that is set
- // corresponds to the position of our last matching byte. The position from
- // the end of the mask is therefore the number of leading zeros in a 16
- // bit integer, and the position from the start of the mask is therefore
- // 16 - (leading zeros) - 1.
- VECTOR_SIZE - (mask as u16).leading_zeros() as usize - 1
-}
-
-/// Compute the position of the last matching byte from the given masks. The
-/// position returned is always in the range [0, 15]. Each mask corresponds to
-/// the equality comparison of a single byte.
-///
-/// The masks given are expected to be the result of _mm_movemask_epi8, where
-/// at least one of the masks is non-zero (i.e., indicates a match).
-fn reverse_pos2(mask1: i32, mask2: i32) -> usize {
- debug_assert!(mask1 != 0 || mask2 != 0);
-
- reverse_pos(mask1 | mask2)
-}
-
-/// Compute the position of the last matching byte from the given masks. The
-/// position returned is always in the range [0, 15]. Each mask corresponds to
-/// the equality comparison of a single byte.
-///
-/// The masks given are expected to be the result of _mm_movemask_epi8, where
-/// at least one of the masks is non-zero (i.e., indicates a match).
-fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
- debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
-
- reverse_pos(mask1 | mask2 | mask3)
-}
-
-/// Subtract `b` from `a` and return the difference. `a` should be greater than
-/// or equal to `b`.
-fn sub(a: *const u8, b: *const u8) -> usize {
- debug_assert!(a >= b);
- (a as usize) - (b as usize)
-}
diff --git a/vendor/memchr/src/memchr/x86/sse42.rs b/vendor/memchr/src/memchr/x86/sse42.rs
deleted file mode 100644
index da38e50c2..000000000
--- a/vendor/memchr/src/memchr/x86/sse42.rs
+++ /dev/null
@@ -1,72 +0,0 @@
-// This code is unused. PCMPESTRI is gratuitously slow. I imagine it might
-// start winning with a hypothetical memchr4 (or greater). This technique might
-// also be good for exposing searches over ranges of bytes, but that departs
-// from the standard memchr API, so it's not clear whether we actually want
-// that or not.
-//
-// N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind
-// of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means
-// I don't see a way of effectively using PCMPISTRI unless there's some fast
-// way to replace zero bytes with a byte that is not not a needle byte.
-
-use core::{arch::x86_64::*, mem::size_of};
-
-use x86::sse2;
-
-const VECTOR_SIZE: usize = size_of::<__m128i>();
-const CONTROL_ANY: i32 = _SIDD_UBYTE_OPS
- | _SIDD_CMP_EQUAL_ANY
- | _SIDD_POSITIVE_POLARITY
- | _SIDD_LEAST_SIGNIFICANT;
-
-#[target_feature(enable = "sse4.2")]
-pub unsafe fn memchr3(
- n1: u8,
- n2: u8,
- n3: u8,
- haystack: &[u8],
-) -> Option<usize> {
- let vn1 = _mm_set1_epi8(n1 as i8);
- let vn2 = _mm_set1_epi8(n2 as i8);
- let vn3 = _mm_set1_epi8(n3 as i8);
- let vn = _mm_setr_epi8(
- n1 as i8, n2 as i8, n3 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- );
- let len = haystack.len();
- let start_ptr = haystack.as_ptr();
- let end_ptr = haystack[haystack.len()..].as_ptr();
- let mut ptr = start_ptr;
-
- if haystack.len() < VECTOR_SIZE {
- while ptr < end_ptr {
- if *ptr == n1 || *ptr == n2 || *ptr == n3 {
- return Some(sub(ptr, start_ptr));
- }
- ptr = ptr.offset(1);
- }
- return None;
- }
- while ptr <= end_ptr.sub(VECTOR_SIZE) {
- let chunk = _mm_loadu_si128(ptr as *const __m128i);
- let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY);
- if res < 16 {
- return Some(sub(ptr, start_ptr) + res as usize);
- }
- ptr = ptr.add(VECTOR_SIZE);
- }
- if ptr < end_ptr {
- debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
- ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
- debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
-
- return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
- }
- None
-}
-
-/// Subtract `b` from `a` and return the difference. `a` should be greater than
-/// or equal to `b`.
-fn sub(a: *const u8, b: *const u8) -> usize {
- debug_assert!(a >= b);
- (a as usize) - (b as usize)
-}