summaryrefslogtreecommitdiffstats
path: root/third_party/rust/memchr/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/memchr/src')
-rw-r--r--third_party/rust/memchr/src/c.rs44
-rw-r--r--third_party/rust/memchr/src/fallback.rs330
-rw-r--r--third_party/rust/memchr/src/iter.rs173
-rw-r--r--third_party/rust/memchr/src/lib.rs451
-rw-r--r--third_party/rust/memchr/src/naive.rs25
-rw-r--r--third_party/rust/memchr/src/tests/iter.rs229
-rw-r--r--third_party/rust/memchr/src/tests/memchr.rs131
-rw-r--r--third_party/rust/memchr/src/tests/miri.rs19
-rw-r--r--third_party/rust/memchr/src/tests/mod.rs362
-rw-r--r--third_party/rust/memchr/src/x86/avx.rs703
-rw-r--r--third_party/rust/memchr/src/x86/mod.rs119
-rw-r--r--third_party/rust/memchr/src/x86/sse2.rs793
-rw-r--r--third_party/rust/memchr/src/x86/sse42.rs75
13 files changed, 3454 insertions, 0 deletions
diff --git a/third_party/rust/memchr/src/c.rs b/third_party/rust/memchr/src/c.rs
new file mode 100644
index 0000000000..63feca979c
--- /dev/null
+++ b/third_party/rust/memchr/src/c.rs
@@ -0,0 +1,44 @@
+// This module defines safe wrappers around memchr (POSIX) and memrchr (GNU
+// extension).
+
+#![allow(dead_code)]
+
+extern crate libc;
+
+use self::libc::{c_int, c_void, size_t};
+
+pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+ let p = unsafe {
+ libc::memchr(
+ haystack.as_ptr() as *const c_void,
+ needle as c_int,
+ haystack.len() as size_t,
+ )
+ };
+ if p.is_null() {
+ None
+ } else {
+ Some(p as usize - (haystack.as_ptr() as usize))
+ }
+}
+
+// memrchr is a GNU extension. We know it's available on Linux, so start there.
+#[cfg(target_os = "linux")]
+pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+ // GNU's memrchr() will - unlike memchr() - error if haystack is empty.
+ if haystack.is_empty() {
+ return None;
+ }
+ let p = unsafe {
+ libc::memrchr(
+ haystack.as_ptr() as *const c_void,
+ needle as c_int,
+ haystack.len() as size_t,
+ )
+ };
+ if p.is_null() {
+ None
+ } else {
+ Some(p as usize - (haystack.as_ptr() as usize))
+ }
+}
diff --git a/third_party/rust/memchr/src/fallback.rs b/third_party/rust/memchr/src/fallback.rs
new file mode 100644
index 0000000000..8bc32b27ba
--- /dev/null
+++ b/third_party/rust/memchr/src/fallback.rs
@@ -0,0 +1,330 @@
+// This module defines pure Rust platform independent implementations of all
+// the memchr routines. We do our best to make them fast. Some of them may even
+// get auto-vectorized.
+
+use core::cmp;
+use core::usize;
+
+#[cfg(target_pointer_width = "16")]
+const USIZE_BYTES: usize = 2;
+
+#[cfg(target_pointer_width = "32")]
+const USIZE_BYTES: usize = 4;
+
+#[cfg(target_pointer_width = "64")]
+const USIZE_BYTES: usize = 8;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+/// Return `true` if `x` contains any zero byte.
+///
+/// From *Matters Computational*, J. Arndt
+///
+/// "The idea is to subtract one from each of the bytes and then look for
+/// bytes where the borrow propagated all the way to the most significant
+/// bit."
+#[inline(always)]
+fn contains_zero_byte(x: usize) -> bool {
+ const LO_U64: u64 = 0x0101010101010101;
+ const HI_U64: u64 = 0x8080808080808080;
+
+ const LO_USIZE: usize = LO_U64 as usize;
+ const HI_USIZE: usize = HI_U64 as usize;
+
+ x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
+}
+
+/// Repeat the given byte into a word size number. That is, every 8 bits
+/// is equivalent to the given byte. For example, if `b` is `\x4E` or
+/// `01001110` in binary, then the returned value on a 32-bit system would be:
+/// `01001110_01001110_01001110_01001110`.
+#[inline(always)]
+fn repeat_byte(b: u8) -> usize {
+ (b as usize) * (usize::MAX / 255)
+}
+
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte == n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr as *const usize).read_unaligned();
+ if contains_zero_byte(chunk ^ vn1) {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr as *const usize);
+ let b = *(ptr.add(USIZE_BYTES) as *const usize);
+ let eqa = contains_zero_byte(a ^ vn1);
+ let eqb = contains_zero_byte(b ^ vn1);
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.add(LOOP_SIZE);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Like `memchr`, but searches for two bytes instead of one.
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let vn2 = repeat_byte(n2);
+ let confirm = |byte| byte == n1 || byte == n2;
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr as *const usize).read_unaligned();
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ if eq1 || eq2 {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while ptr <= end_ptr.sub(USIZE_BYTES) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let chunk = *(ptr as *const usize);
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ if eq1 || eq2 {
+ break;
+ }
+ ptr = ptr.add(USIZE_BYTES);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Like `memchr`, but searches for three bytes instead of one.
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let vn2 = repeat_byte(n2);
+ let vn3 = repeat_byte(n3);
+ let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr as *const usize).read_unaligned();
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ let eq3 = contains_zero_byte(chunk ^ vn3);
+ if eq1 || eq2 || eq3 {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while ptr <= end_ptr.sub(USIZE_BYTES) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let chunk = *(ptr as *const usize);
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ let eq3 = contains_zero_byte(chunk ^ vn3);
+ if eq1 || eq2 || eq3 {
+ break;
+ }
+ ptr = ptr.add(USIZE_BYTES);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Return the last index matching the byte `x` in `text`.
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte == n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+ if contains_zero_byte(chunk ^ vn1) {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = (end_ptr as usize & !align) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
+ let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
+ let eqa = contains_zero_byte(a ^ vn1);
+ let eqb = contains_zero_byte(b ^ vn1);
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.sub(loop_size);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Like `memrchr`, but searches for two bytes instead of one.
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let vn2 = repeat_byte(n2);
+ let confirm = |byte| byte == n1 || byte == n2;
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ if eq1 || eq2 {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = (end_ptr as usize & !align) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while ptr >= start_ptr.add(USIZE_BYTES) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ if eq1 || eq2 {
+ break;
+ }
+ ptr = ptr.sub(USIZE_BYTES);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Like `memrchr`, but searches for three bytes instead of one.
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let vn2 = repeat_byte(n2);
+ let vn3 = repeat_byte(n3);
+ let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ let eq3 = contains_zero_byte(chunk ^ vn3);
+ if eq1 || eq2 || eq3 {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = (end_ptr as usize & !align) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while ptr >= start_ptr.add(USIZE_BYTES) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ let eq3 = contains_zero_byte(chunk ^ vn3);
+ if eq1 || eq2 || eq3 {
+ break;
+ }
+ ptr = ptr.sub(USIZE_BYTES);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+#[inline(always)]
+unsafe fn forward_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr < end_ptr {
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ None
+}
+
+#[inline(always)]
+unsafe fn reverse_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ None
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/iter.rs b/third_party/rust/memchr/src/iter.rs
new file mode 100644
index 0000000000..6217ae4a09
--- /dev/null
+++ b/third_party/rust/memchr/src/iter.rs
@@ -0,0 +1,173 @@
+use {memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+macro_rules! iter_next {
+ // Common code for the memchr iterators:
+ // update haystack and position and produce the index
+ //
+ // self: &mut Self where Self is the iterator
+ // search_result: Option<usize> which is the result of the corresponding
+ // memchr function.
+ //
+ // Returns Option<usize> (the next iterator element)
+ ($self_:expr, $search_result:expr) => {
+ $search_result.map(move |index| {
+ // split and take the remaining back half
+ $self_.haystack = $self_.haystack.split_at(index + 1).1;
+ let found_position = $self_.position + index;
+ $self_.position = found_position + 1;
+ found_position
+ })
+ };
+}
+
+macro_rules! iter_next_back {
+ ($self_:expr, $search_result:expr) => {
+ $search_result.map(move |index| {
+ // split and take the remaining front half
+ $self_.haystack = $self_.haystack.split_at(index).0;
+ $self_.position + index
+ })
+ };
+}
+
+/// An iterator for `memchr`.
+pub struct Memchr<'a> {
+ needle: u8,
+ // The haystack to iterate over
+ haystack: &'a [u8],
+ // The index
+ position: usize,
+}
+
+impl<'a> Memchr<'a> {
+ /// Creates a new iterator that yields all positions of needle in haystack.
+ #[inline]
+ pub fn new(needle: u8, haystack: &[u8]) -> Memchr {
+ Memchr { needle: needle, haystack: haystack, position: 0 }
+ }
+}
+
+impl<'a> Iterator for Memchr<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ iter_next!(self, memchr(self.needle, self.haystack))
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.haystack.len()))
+ }
+}
+
+impl<'a> DoubleEndedIterator for Memchr<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ iter_next_back!(self, memrchr(self.needle, self.haystack))
+ }
+}
+
+/// An iterator for `memchr2`.
+pub struct Memchr2<'a> {
+ needle1: u8,
+ needle2: u8,
+ // The haystack to iterate over
+ haystack: &'a [u8],
+ // The index
+ position: usize,
+}
+
+impl<'a> Memchr2<'a> {
+ /// Creates a new iterator that yields all positions of needle in haystack.
+ #[inline]
+ pub fn new(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2 {
+ Memchr2 {
+ needle1: needle1,
+ needle2: needle2,
+ haystack: haystack,
+ position: 0,
+ }
+ }
+}
+
+impl<'a> Iterator for Memchr2<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ iter_next!(self, memchr2(self.needle1, self.needle2, self.haystack))
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.haystack.len()))
+ }
+}
+
+impl<'a> DoubleEndedIterator for Memchr2<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ iter_next_back!(
+ self,
+ memrchr2(self.needle1, self.needle2, self.haystack)
+ )
+ }
+}
+
+/// An iterator for `memchr3`.
+pub struct Memchr3<'a> {
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ // The haystack to iterate over
+ haystack: &'a [u8],
+ // The index
+ position: usize,
+}
+
+impl<'a> Memchr3<'a> {
+ /// Create a new `Memchr3` that's initialized to zero with a haystack
+ #[inline]
+ pub fn new(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+ ) -> Memchr3 {
+ Memchr3 {
+ needle1: needle1,
+ needle2: needle2,
+ needle3: needle3,
+ haystack: haystack,
+ position: 0,
+ }
+ }
+}
+
+impl<'a> Iterator for Memchr3<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ iter_next!(
+ self,
+ memchr3(self.needle1, self.needle2, self.needle3, self.haystack)
+ )
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.haystack.len()))
+ }
+}
+
+impl<'a> DoubleEndedIterator for Memchr3<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ iter_next_back!(
+ self,
+ memrchr3(self.needle1, self.needle2, self.needle3, self.haystack)
+ )
+ }
+}
diff --git a/third_party/rust/memchr/src/lib.rs b/third_party/rust/memchr/src/lib.rs
new file mode 100644
index 0000000000..fed7108734
--- /dev/null
+++ b/third_party/rust/memchr/src/lib.rs
@@ -0,0 +1,451 @@
+/*!
+The `memchr` crate provides heavily optimized routines for searching bytes.
+
+The `memchr` function is traditionally provided by libc, however, the
+performance of `memchr` can vary significantly depending on the specific
+implementation of libc that is used. They can range from manually tuned
+Assembly implementations (like that found in GNU's libc) all the way to
+non-vectorized C implementations (like that found in MUSL).
+
+To smooth out the differences between implementations of libc, at least
+on `x86_64` for Rust 1.27+, this crate provides its own implementation of
+`memchr` that should perform competitively with the one found in GNU's libc.
+The implementation is in pure Rust and has no dependency on a C compiler or an
+Assembler.
+
+Additionally, GNU libc also provides an extension, `memrchr`. This crate
+provides its own implementation of `memrchr` as well, on top of `memchr2`,
+`memchr3`, `memrchr2` and `memrchr3`. The difference between `memchr` and
+`memchr2` is that that `memchr2` permits finding all occurrences of two bytes
+instead of one. Similarly for `memchr3`.
+*/
+
+#![cfg_attr(not(feature = "std"), no_std)]
+#![deny(missing_docs)]
+#![doc(html_root_url = "https://docs.rs/memchr/2.0.0")]
+
+// Supporting 8-bit (or others) would be fine. If you need it, please submit a
+// bug report at https://github.com/BurntSushi/rust-memchr
+#[cfg(not(any(
+ target_pointer_width = "16",
+ target_pointer_width = "32",
+ target_pointer_width = "64"
+)))]
+compile_error!("memchr currently not supported on non-32 or non-64 bit");
+
+#[cfg(feature = "std")]
+extern crate core;
+
+#[cfg(all(test, all(not(miri), feature = "std")))]
+#[macro_use]
+extern crate quickcheck;
+
+use core::iter::Rev;
+
+pub use iter::{Memchr, Memchr2, Memchr3};
+
+// N.B. If you're looking for the cfg knobs for libc, see build.rs.
+#[cfg(memchr_libc)]
+mod c;
+#[allow(dead_code)]
+mod fallback;
+mod iter;
+mod naive;
+#[cfg(all(test, all(not(miri), feature = "std")))]
+mod tests;
+#[cfg(all(test, any(miri, not(feature = "std"))))]
+#[path = "tests/miri.rs"]
+mod tests;
+#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
+mod x86;
+
+/// An iterator over all occurrences of the needle in a haystack.
+#[inline]
+pub fn memchr_iter(needle: u8, haystack: &[u8]) -> Memchr {
+ Memchr::new(needle, haystack)
+}
+
+/// An iterator over all occurrences of the needles in a haystack.
+#[inline]
+pub fn memchr2_iter(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2 {
+ Memchr2::new(needle1, needle2, haystack)
+}
+
+/// An iterator over all occurrences of the needles in a haystack.
+#[inline]
+pub fn memchr3_iter(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+) -> Memchr3 {
+ Memchr3::new(needle1, needle2, needle3, haystack)
+}
+
+/// An iterator over all occurrences of the needle in a haystack, in reverse.
+#[inline]
+pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev<Memchr> {
+ Memchr::new(needle, haystack).rev()
+}
+
+/// An iterator over all occurrences of the needles in a haystack, in reverse.
+#[inline]
+pub fn memrchr2_iter(
+ needle1: u8,
+ needle2: u8,
+ haystack: &[u8],
+) -> Rev<Memchr2> {
+ Memchr2::new(needle1, needle2, haystack).rev()
+}
+
+/// An iterator over all occurrences of the needles in a haystack, in reverse.
+#[inline]
+pub fn memrchr3_iter(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+) -> Rev<Memchr3> {
+ Memchr3::new(needle1, needle2, needle3, haystack).rev()
+}
+
+/// Search for the first occurrence of a byte in a slice.
+///
+/// This returns the index corresponding to the first occurrence of `needle` in
+/// `haystack`, or `None` if one is not found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle)`, `memchr` will use a highly
+/// optimized routine that can be up to an order of magnitude faster in some
+/// cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of a byte in a byte string.
+///
+/// ```
+/// use memchr::memchr;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr(b'k', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memchr(n1, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memchr(n1, haystack)
+ }
+
+ #[cfg(all(
+ memchr_libc,
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ c::memchr(n1, haystack)
+ }
+
+ #[cfg(all(
+ not(memchr_libc),
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memchr(n1, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle, haystack)
+ }
+}
+
+/// Like `memchr`, but searches for either of two bytes instead of just one.
+///
+/// This returns the index corresponding to the first occurrence of `needle1`
+/// or the first occurrence of `needle2` in `haystack` (whichever occurs
+/// earlier), or `None` if neither one is found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, `memchr2`
+/// will use a highly optimized routine that can be up to an order of magnitude
+/// faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of either of two bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memchr2;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4));
+/// ```
+#[inline]
+pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memchr2(n1, n2, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memchr2(n1, n2, haystack)
+ }
+
+ #[cfg(all(
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memchr2(n1, n2, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle1, needle2, haystack)
+ }
+}
+
+/// Like `memchr`, but searches for any of three bytes instead of just one.
+///
+/// This returns the index corresponding to the first occurrence of `needle1`,
+/// the first occurrence of `needle2`, or the first occurrence of `needle3` in
+/// `haystack` (whichever occurs earliest), or `None` if none are found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle1 || b == needle2 ||
+/// b == needle3)`, `memchr3` will use a highly optimized routine that can be
+/// up to an order of magnitude faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of any of three bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memchr3;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr3(b'k', b'q', b'e', haystack), Some(2));
+/// ```
+#[inline]
+pub fn memchr3(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memchr3(n1, n2, n3, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memchr3(n1, n2, n3, haystack)
+ }
+
+ #[cfg(all(
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memchr3(n1, n2, n3, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle1, needle2, needle3, haystack)
+ }
+}
+
+/// Search for the last occurrence of a byte in a slice.
+///
+/// This returns the index corresponding to the last occurrence of `needle` in
+/// `haystack`, or `None` if one is not found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle)`, `memrchr` will use a highly
+/// optimized routine that can be up to an order of magnitude faster in some
+/// cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of a byte in a byte string.
+///
+/// ```
+/// use memchr::memrchr;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr(b'o', haystack), Some(17));
+/// ```
+#[inline]
+pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memrchr(n1, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memrchr(n1, haystack)
+ }
+
+ #[cfg(all(
+ memchr_libc,
+ target_os = "linux",
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri)
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ c::memrchr(n1, haystack)
+ }
+
+ #[cfg(all(
+ not(all(memchr_libc, target_os = "linux")),
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memrchr(n1, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle, haystack)
+ }
+}
+
+/// Like `memrchr`, but searches for either of two bytes instead of just one.
+///
+/// This returns the index corresponding to the last occurrence of `needle1`
+/// or the last occurrence of `needle2` in `haystack` (whichever occurs later),
+/// or `None` if neither one is found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, `memrchr2`
+/// will use a highly optimized routine that can be up to an order of magnitude
+/// faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of either of two bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memrchr2;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr2(b'k', b'q', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memrchr2(n1, n2, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memrchr2(n1, n2, haystack)
+ }
+
+ #[cfg(all(
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memrchr2(n1, n2, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle1, needle2, haystack)
+ }
+}
+
+/// Like `memrchr`, but searches for any of three bytes instead of just one.
+///
+/// This returns the index corresponding to the last occurrence of `needle1`,
+/// the last occurrence of `needle2`, or the last occurrence of `needle3` in
+/// `haystack` (whichever occurs later), or `None` if none are found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 ||
+/// b == needle3)`, `memrchr3` will use a highly optimized routine that can be
+/// up to an order of magnitude faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of any of three bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memrchr3;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr3(b'k', b'q', b'e', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memrchr3(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memrchr3(n1, n2, n3, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memrchr3(n1, n2, n3, haystack)
+ }
+
+ #[cfg(all(
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memrchr3(n1, n2, n3, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle1, needle2, needle3, haystack)
+ }
+}
diff --git a/third_party/rust/memchr/src/naive.rs b/third_party/rust/memchr/src/naive.rs
new file mode 100644
index 0000000000..3f3053d481
--- /dev/null
+++ b/third_party/rust/memchr/src/naive.rs
@@ -0,0 +1,25 @@
+#![allow(dead_code)]
+
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().position(|&b| b == n1)
+}
+
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().position(|&b| b == n1 || b == n2)
+}
+
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().position(|&b| b == n1 || b == n2 || b == n3)
+}
+
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().rposition(|&b| b == n1)
+}
+
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().rposition(|&b| b == n1 || b == n2)
+}
+
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3)
+}
diff --git a/third_party/rust/memchr/src/tests/iter.rs b/third_party/rust/memchr/src/tests/iter.rs
new file mode 100644
index 0000000000..8f335003b9
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/iter.rs
@@ -0,0 +1,229 @@
+use tests::memchr_tests;
+use {Memchr, Memchr2, Memchr3};
+
+#[test]
+fn memchr1_iter() {
+ for test in memchr_tests() {
+ test.iter_one(false, Memchr::new);
+ }
+}
+
+#[test]
+fn memchr2_iter() {
+ for test in memchr_tests() {
+ test.iter_two(false, Memchr2::new);
+ }
+}
+
+#[test]
+fn memchr3_iter() {
+ for test in memchr_tests() {
+ test.iter_three(false, Memchr3::new);
+ }
+}
+
+#[test]
+fn memrchr1_iter() {
+ for test in memchr_tests() {
+ test.iter_one(true, |n1, corpus| Memchr::new(n1, corpus).rev());
+ }
+}
+
+#[test]
+fn memrchr2_iter() {
+ for test in memchr_tests() {
+ test.iter_two(true, |n1, n2, corpus| {
+ Memchr2::new(n1, n2, corpus).rev()
+ })
+ }
+}
+
+#[test]
+fn memrchr3_iter() {
+ for test in memchr_tests() {
+ test.iter_three(true, |n1, n2, n3, corpus| {
+ Memchr3::new(n1, n2, n3, corpus).rev()
+ })
+ }
+}
+
+quickcheck! {
+ fn qc_memchr_double_ended_iter(
+ needle: u8, data: Vec<u8>, take_side: Vec<bool>
+ ) -> bool {
+ // make nonempty
+ let mut take_side = take_side;
+ if take_side.is_empty() { take_side.push(true) };
+
+ let iter = Memchr::new(needle, &data);
+ let all_found = double_ended_take(
+ iter, take_side.iter().cycle().cloned());
+
+ all_found.iter().cloned().eq(positions1(needle, &data))
+ }
+
+ fn qc_memchr2_double_ended_iter(
+ needle1: u8, needle2: u8, data: Vec<u8>, take_side: Vec<bool>
+ ) -> bool {
+ // make nonempty
+ let mut take_side = take_side;
+ if take_side.is_empty() { take_side.push(true) };
+
+ let iter = Memchr2::new(needle1, needle2, &data);
+ let all_found = double_ended_take(
+ iter, take_side.iter().cycle().cloned());
+
+ all_found.iter().cloned().eq(positions2(needle1, needle2, &data))
+ }
+
+ fn qc_memchr3_double_ended_iter(
+ needle1: u8, needle2: u8, needle3: u8,
+ data: Vec<u8>, take_side: Vec<bool>
+ ) -> bool {
+ // make nonempty
+ let mut take_side = take_side;
+ if take_side.is_empty() { take_side.push(true) };
+
+ let iter = Memchr3::new(needle1, needle2, needle3, &data);
+ let all_found = double_ended_take(
+ iter, take_side.iter().cycle().cloned());
+
+ all_found
+ .iter()
+ .cloned()
+ .eq(positions3(needle1, needle2, needle3, &data))
+ }
+
+ fn qc_memchr1_iter(data: Vec<u8>) -> bool {
+ let needle = 0;
+ let answer = positions1(needle, &data);
+ answer.eq(Memchr::new(needle, &data))
+ }
+
+ fn qc_memchr1_rev_iter(data: Vec<u8>) -> bool {
+ let needle = 0;
+ let answer = positions1(needle, &data);
+ answer.rev().eq(Memchr::new(needle, &data).rev())
+ }
+
+ fn qc_memchr2_iter(data: Vec<u8>) -> bool {
+ let needle1 = 0;
+ let needle2 = 1;
+ let answer = positions2(needle1, needle2, &data);
+ answer.eq(Memchr2::new(needle1, needle2, &data))
+ }
+
+ fn qc_memchr2_rev_iter(data: Vec<u8>) -> bool {
+ let needle1 = 0;
+ let needle2 = 1;
+ let answer = positions2(needle1, needle2, &data);
+ answer.rev().eq(Memchr2::new(needle1, needle2, &data).rev())
+ }
+
+ fn qc_memchr3_iter(data: Vec<u8>) -> bool {
+ let needle1 = 0;
+ let needle2 = 1;
+ let needle3 = 2;
+ let answer = positions3(needle1, needle2, needle3, &data);
+ answer.eq(Memchr3::new(needle1, needle2, needle3, &data))
+ }
+
+ fn qc_memchr3_rev_iter(data: Vec<u8>) -> bool {
+ let needle1 = 0;
+ let needle2 = 1;
+ let needle3 = 2;
+ let answer = positions3(needle1, needle2, needle3, &data);
+ answer.rev().eq(Memchr3::new(needle1, needle2, needle3, &data).rev())
+ }
+
+ fn qc_memchr1_iter_size_hint(data: Vec<u8>) -> bool {
+ // test that the size hint is within reasonable bounds
+ let needle = 0;
+ let mut iter = Memchr::new(needle, &data);
+ let mut real_count = data
+ .iter()
+ .filter(|&&elt| elt == needle)
+ .count();
+
+ while let Some(index) = iter.next() {
+ real_count -= 1;
+ let (lower, upper) = iter.size_hint();
+ assert!(lower <= real_count);
+ assert!(upper.unwrap() >= real_count);
+ assert!(upper.unwrap() <= data.len() - index);
+ }
+ true
+ }
+}
+
+// take items from a DEI, taking front for each true and back for each false.
+// Return a vector with the concatenation of the fronts and the reverse of the
+// backs.
+fn double_ended_take<I, J>(mut iter: I, take_side: J) -> Vec<I::Item>
+where
+ I: DoubleEndedIterator,
+ J: Iterator<Item = bool>,
+{
+ let mut found_front = Vec::new();
+ let mut found_back = Vec::new();
+
+ for take_front in take_side {
+ if take_front {
+ if let Some(pos) = iter.next() {
+ found_front.push(pos);
+ } else {
+ break;
+ }
+ } else {
+ if let Some(pos) = iter.next_back() {
+ found_back.push(pos);
+ } else {
+ break;
+ }
+ };
+ }
+
+ let mut all_found = found_front;
+ all_found.extend(found_back.into_iter().rev());
+ all_found
+}
+
+// return an iterator of the 0-based indices of haystack that match the needle
+fn positions1<'a>(
+ n1: u8,
+ haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+ let it = haystack
+ .iter()
+ .enumerate()
+ .filter(move |&(_, &b)| b == n1)
+ .map(|t| t.0);
+ Box::new(it)
+}
+
+fn positions2<'a>(
+ n1: u8,
+ n2: u8,
+ haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+ let it = haystack
+ .iter()
+ .enumerate()
+ .filter(move |&(_, &b)| b == n1 || b == n2)
+ .map(|t| t.0);
+ Box::new(it)
+}
+
+fn positions3<'a>(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+ let it = haystack
+ .iter()
+ .enumerate()
+ .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3)
+ .map(|t| t.0);
+ Box::new(it)
+}
diff --git a/third_party/rust/memchr/src/tests/memchr.rs b/third_party/rust/memchr/src/tests/memchr.rs
new file mode 100644
index 0000000000..87d3d14edd
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/memchr.rs
@@ -0,0 +1,131 @@
+use fallback;
+use naive;
+use {memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+use tests::memchr_tests;
+
+#[test]
+fn memchr1_find() {
+ for test in memchr_tests() {
+ test.one(false, memchr);
+ }
+}
+
+#[test]
+fn memchr1_fallback_find() {
+ for test in memchr_tests() {
+ test.one(false, fallback::memchr);
+ }
+}
+
+#[test]
+fn memchr2_find() {
+ for test in memchr_tests() {
+ test.two(false, memchr2);
+ }
+}
+
+#[test]
+fn memchr2_fallback_find() {
+ for test in memchr_tests() {
+ test.two(false, fallback::memchr2);
+ }
+}
+
+#[test]
+fn memchr3_find() {
+ for test in memchr_tests() {
+ test.three(false, memchr3);
+ }
+}
+
+#[test]
+fn memchr3_fallback_find() {
+ for test in memchr_tests() {
+ test.three(false, fallback::memchr3);
+ }
+}
+
+#[test]
+fn memrchr1_find() {
+ for test in memchr_tests() {
+ test.one(true, memrchr);
+ }
+}
+
+#[test]
+fn memrchr1_fallback_find() {
+ for test in memchr_tests() {
+ test.one(true, fallback::memrchr);
+ }
+}
+
+#[test]
+fn memrchr2_find() {
+ for test in memchr_tests() {
+ test.two(true, memrchr2);
+ }
+}
+
+#[test]
+fn memrchr2_fallback_find() {
+ for test in memchr_tests() {
+ test.two(true, fallback::memrchr2);
+ }
+}
+
+#[test]
+fn memrchr3_find() {
+ for test in memchr_tests() {
+ test.three(true, memrchr3);
+ }
+}
+
+#[test]
+fn memrchr3_fallback_find() {
+ for test in memchr_tests() {
+ test.three(true, fallback::memrchr3);
+ }
+}
+
+quickcheck! {
+ fn qc_memchr1_matches_naive(n1: u8, corpus: Vec<u8>) -> bool {
+ memchr(n1, &corpus) == naive::memchr(n1, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> bool {
+ memchr2(n1, n2, &corpus) == naive::memchr2(n1, n2, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memchr3_matches_naive(
+ n1: u8, n2: u8, n3: u8,
+ corpus: Vec<u8>
+ ) -> bool {
+ memchr3(n1, n2, n3, &corpus) == naive::memchr3(n1, n2, n3, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memrchr1_matches_naive(n1: u8, corpus: Vec<u8>) -> bool {
+ memrchr(n1, &corpus) == naive::memrchr(n1, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> bool {
+ memrchr2(n1, n2, &corpus) == naive::memrchr2(n1, n2, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memrchr3_matches_naive(
+ n1: u8, n2: u8, n3: u8,
+ corpus: Vec<u8>
+ ) -> bool {
+ memrchr3(n1, n2, n3, &corpus) == naive::memrchr3(n1, n2, n3, &corpus)
+ }
+}
diff --git a/third_party/rust/memchr/src/tests/miri.rs b/third_party/rust/memchr/src/tests/miri.rs
new file mode 100644
index 0000000000..879ef938ec
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/miri.rs
@@ -0,0 +1,19 @@
+// Simple tests using MIRI
+
+use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+#[test]
+fn test_with_miri() {
+ assert_eq!(memchr(b'a', b"abcda"), Some(0));
+ assert_eq!(memchr(b'z', b"abcda"), None);
+ assert_eq!(memchr2(b'a', b'z', b"abcda"), Some(0));
+ assert_eq!(memchr2(b'z', b'y', b"abcda"), None);
+ assert_eq!(memchr3(b'a', b'z', b'b', b"abcda"), Some(0));
+ assert_eq!(memchr3(b'z', b'y', b'x', b"abcda"), None);
+ assert_eq!(memrchr(b'a', b"abcda"), Some(4));
+ assert_eq!(memrchr(b'z', b"abcda"), None);
+ assert_eq!(memrchr2(b'a', b'z', b"abcda"), Some(4));
+ assert_eq!(memrchr2(b'z', b'y', b"abcda"), None);
+ assert_eq!(memrchr3(b'a', b'z', b'b', b"abcda"), Some(4));
+ assert_eq!(memrchr3(b'z', b'y', b'x', b"abcda"), None);
+}
diff --git a/third_party/rust/memchr/src/tests/mod.rs b/third_party/rust/memchr/src/tests/mod.rs
new file mode 100644
index 0000000000..82c1a248e9
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/mod.rs
@@ -0,0 +1,362 @@
+use std::iter::repeat;
+
+mod iter;
+mod memchr;
+
+#[cfg(target_endian = "little")]
+#[test]
+fn byte_order() {
+ eprintln!("LITTLE ENDIAN");
+}
+
+#[cfg(target_endian = "big")]
+#[test]
+fn byte_order() {
+ eprintln!("BIG ENDIAN");
+}
+
+/// Create a sequence of tests that should be run by memchr implementations.
+fn memchr_tests() -> Vec<MemchrTest> {
+ let mut tests = Vec::new();
+ for statict in MEMCHR_TESTS {
+ assert!(!statict.corpus.contains("%"), "% is not allowed in corpora");
+ assert!(!statict.corpus.contains("#"), "# is not allowed in corpora");
+ assert!(!statict.needles.contains(&b'%'), "% is an invalid needle");
+ assert!(!statict.needles.contains(&b'#'), "# is an invalid needle");
+
+ let t = MemchrTest {
+ corpus: statict.corpus.to_string(),
+ needles: statict.needles.to_vec(),
+ positions: statict.positions.to_vec(),
+ };
+ tests.push(t.clone());
+ tests.extend(t.expand());
+ }
+ tests
+}
+
+/// A set of tests for memchr-like functions.
+///
+/// These tests mostly try to cover the short string cases. We cover the longer
+/// string cases via the benchmarks (which are tests themselves), via
+/// quickcheck tests and via automatic expansion of each test case (by
+/// increasing the corpus size). Finally, we cover different alignment cases
+/// in the tests by varying the starting point of the slice.
+const MEMCHR_TESTS: &[MemchrTestStatic] = &[
+ // one needle (applied to memchr + memchr2 + memchr3)
+ MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] },
+ MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] },
+ MemchrTestStatic {
+ corpus: "aaa",
+ needles: &[b'a'],
+ positions: &[0, 1, 2],
+ },
+ MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] },
+ MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] },
+ MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] },
+ MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] },
+ MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] },
+ MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] },
+ MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] },
+ MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] },
+ MemchrTestStatic {
+ corpus: "\x00\x00",
+ needles: &[b'\x00'],
+ positions: &[0, 1],
+ },
+ MemchrTestStatic {
+ corpus: "\x00a\x00",
+ needles: &[b'\x00'],
+ positions: &[0, 2],
+ },
+ MemchrTestStatic {
+ corpus: "zzzzzzzzzzzzzzzza",
+ needles: &[b'a'],
+ positions: &[16],
+ },
+ MemchrTestStatic {
+ corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
+ needles: &[b'a'],
+ positions: &[32],
+ },
+ // two needles (applied to memchr2 + memchr3)
+ MemchrTestStatic {
+ corpus: "az",
+ needles: &[b'a', b'z'],
+ positions: &[0, 1],
+ },
+ MemchrTestStatic {
+ corpus: "az",
+ needles: &[b'a', b'z'],
+ positions: &[0, 1],
+ },
+ MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] },
+ MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] },
+ MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] },
+ MemchrTestStatic {
+ corpus: "yyyyaz",
+ needles: &[b'a', b'z'],
+ positions: &[4, 5],
+ },
+ MemchrTestStatic {
+ corpus: "yyyyaz",
+ needles: &[b'z', b'a'],
+ positions: &[4, 5],
+ },
+ // three needles (applied to memchr3)
+ MemchrTestStatic {
+ corpus: "xyz",
+ needles: &[b'x', b'y', b'z'],
+ positions: &[0, 1, 2],
+ },
+ MemchrTestStatic {
+ corpus: "zxy",
+ needles: &[b'x', b'y', b'z'],
+ positions: &[0, 1, 2],
+ },
+ MemchrTestStatic {
+ corpus: "zxy",
+ needles: &[b'x', b'a', b'z'],
+ positions: &[0, 1],
+ },
+ MemchrTestStatic {
+ corpus: "zxy",
+ needles: &[b't', b'a', b'z'],
+ positions: &[0],
+ },
+ MemchrTestStatic {
+ corpus: "yxz",
+ needles: &[b't', b'a', b'z'],
+ positions: &[2],
+ },
+];
+
+/// A description of a test on a memchr like function.
+#[derive(Clone, Debug)]
+struct MemchrTest {
+ /// The thing to search. We use `&str` instead of `&[u8]` because they
+ /// are nicer to write in tests, and we don't miss much since memchr
+ /// doesn't care about UTF-8.
+ ///
+ /// Corpora cannot contain either '%' or '#'. We use these bytes when
+ /// expanding test cases into many test cases, and we assume they are not
+ /// used. If they are used, `memchr_tests` will panic.
+ corpus: String,
+ /// The needles to search for. This is intended to be an "alternation" of
+ /// needles. The number of needles may cause this test to be skipped for
+ /// some memchr variants. For example, a test with 2 needles cannot be used
+ /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
+ /// However, a test with only 1 needle can be used to test all of `memchr`,
+ /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
+ /// bytes that we never used in the corpus (such as '#').
+ needles: Vec<u8>,
+ /// The positions expected to match for all of the needles.
+ positions: Vec<usize>,
+}
+
+/// Like MemchrTest, but easier to define as a constant.
+#[derive(Clone, Debug)]
+struct MemchrTestStatic {
+ corpus: &'static str,
+ needles: &'static [u8],
+ positions: &'static [usize],
+}
+
+impl MemchrTest {
+ fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
+ let needles = match self.needles(1) {
+ None => return,
+ Some(needles) => needles,
+ };
+ // We test different alignments here. Since some implementations use
+ // AVX2, which can read 32 bytes at a time, we test at least that.
+ // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128
+ // (avx) bytes at a time, so we include that in our offsets as well.
+ //
+ // You might think this would cause most needles to not be found, but
+ // we actually expand our tests to include corpus sizes all the way up
+ // to >500 bytes, so we should exericse most branches.
+ for align in 0..130 {
+ let corpus = self.corpus(align);
+ assert_eq!(
+ self.positions(align, reverse).get(0).cloned(),
+ f(needles[0], corpus.as_bytes()),
+ "search for {:?} failed in: {:?} (len: {}, alignment: {})",
+ needles[0] as char,
+ corpus,
+ corpus.len(),
+ align
+ );
+ }
+ }
+
+ fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
+ let needles = match self.needles(2) {
+ None => return,
+ Some(needles) => needles,
+ };
+ for align in 0..130 {
+ let corpus = self.corpus(align);
+ assert_eq!(
+ self.positions(align, reverse).get(0).cloned(),
+ f(needles[0], needles[1], corpus.as_bytes()),
+ "search for {:?}|{:?} failed in: {:?} \
+ (len: {}, alignment: {})",
+ needles[0] as char,
+ needles[1] as char,
+ corpus,
+ corpus.len(),
+ align
+ );
+ }
+ }
+
+ fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>(
+ &self,
+ reverse: bool,
+ f: F,
+ ) {
+ let needles = match self.needles(3) {
+ None => return,
+ Some(needles) => needles,
+ };
+ for align in 0..130 {
+ let corpus = self.corpus(align);
+ assert_eq!(
+ self.positions(align, reverse).get(0).cloned(),
+ f(needles[0], needles[1], needles[2], corpus.as_bytes()),
+ "search for {:?}|{:?}|{:?} failed in: {:?} \
+ (len: {}, alignment: {})",
+ needles[0] as char,
+ needles[1] as char,
+ needles[2] as char,
+ corpus,
+ corpus.len(),
+ align
+ );
+ }
+ }
+
+ fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F)
+ where
+ F: FnOnce(u8, &'a [u8]) -> I,
+ I: Iterator<Item = usize>,
+ {
+ if let Some(ns) = self.needles(1) {
+ self.iter(reverse, f(ns[0], self.corpus.as_bytes()));
+ }
+ }
+
+ fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F)
+ where
+ F: FnOnce(u8, u8, &'a [u8]) -> I,
+ I: Iterator<Item = usize>,
+ {
+ if let Some(ns) = self.needles(2) {
+ self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes()));
+ }
+ }
+
+ fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F)
+ where
+ F: FnOnce(u8, u8, u8, &'a [u8]) -> I,
+ I: Iterator<Item = usize>,
+ {
+ if let Some(ns) = self.needles(3) {
+ self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes()));
+ }
+ }
+
+ /// Test that the positions yielded by the given iterator match the
+ /// positions in this test. If reverse is true, then reverse the positions
+ /// before comparing them.
+ fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) {
+ assert_eq!(
+ self.positions(0, reverse),
+ it.collect::<Vec<usize>>(),
+ r"search for {:?} failed in: {:?}",
+ self.needles.iter().map(|&b| b as char).collect::<Vec<char>>(),
+ self.corpus
+ );
+ }
+
+ /// Expand this test into many variations of the same test.
+ ///
+ /// In particular, this will generate more tests with larger corpus sizes.
+ /// The expected positions are updated to maintain the integrity of the
+ /// test.
+ ///
+ /// This is important in testing a memchr implementation, because there are
+ /// often different cases depending on the length of the corpus.
+ ///
+ /// Note that we extend the corpus by adding `%` bytes, which we
+ /// don't otherwise use as a needle.
+ fn expand(&self) -> Vec<MemchrTest> {
+ let mut more = Vec::new();
+
+ // Add bytes to the start of the corpus.
+ for i in 1..515 {
+ let mut t = self.clone();
+ let mut new_corpus: String = repeat('%').take(i).collect();
+ new_corpus.push_str(&t.corpus);
+ t.corpus = new_corpus;
+ t.positions = t.positions.into_iter().map(|p| p + i).collect();
+ more.push(t);
+ }
+ // Add bytes to the end of the corpus.
+ for i in 1..515 {
+ let mut t = self.clone();
+ let padding: String = repeat('%').take(i).collect();
+ t.corpus.push_str(&padding);
+ more.push(t);
+ }
+
+ more
+ }
+
+ /// Return the corpus at the given alignment.
+ ///
+ /// If the alignment exceeds the length of the corpus, then this returns
+ /// an empty slice.
+ fn corpus(&self, align: usize) -> &str {
+ self.corpus.get(align..).unwrap_or("")
+ }
+
+ /// Return exactly `count` needles from this test. If this test has less
+ /// than `count` needles, then add `#` until the number of needles
+ /// matches `count`. If this test has more than `count` needles, then
+ /// return `None` (because there is no way to use this test data for a
+ /// search using fewer needles).
+ fn needles(&self, count: usize) -> Option<Vec<u8>> {
+ if self.needles.len() > count {
+ return None;
+ }
+
+ let mut needles = self.needles.to_vec();
+ for _ in needles.len()..count {
+ // we assume # is never used in tests.
+ needles.push(b'#');
+ }
+ Some(needles)
+ }
+
+ /// Return the positions in this test, reversed if `reverse` is true.
+ ///
+ /// If alignment is given, then all positions greater than or equal to that
+ /// alignment are offset by the alignment. Positions less than the
+ /// alignment are dropped.
+ fn positions(&self, align: usize, reverse: bool) -> Vec<usize> {
+ let positions = if reverse {
+ let mut positions = self.positions.to_vec();
+ positions.reverse();
+ positions
+ } else {
+ self.positions.to_vec()
+ };
+ positions
+ .into_iter()
+ .filter(|&p| p >= align)
+ .map(|p| p - align)
+ .collect()
+ }
+}
diff --git a/third_party/rust/memchr/src/x86/avx.rs b/third_party/rust/memchr/src/x86/avx.rs
new file mode 100644
index 0000000000..e3d8e8902e
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/avx.rs
@@ -0,0 +1,703 @@
+use core::arch::x86_64::*;
+use core::cmp;
+use core::mem::size_of;
+
+use x86::sse2;
+
+const VECTOR_SIZE: usize = size_of::<__m256i>();
+const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+// The number of bytes to loop at in one iteration of memchr2/memrchr2 and
+// memchr3/memrchr3. There was no observable difference between 128 and 64
+// bytes in benchmarks. memchr3 in particular only gets a very slight speed up
+// from the loop unrolling.
+const LOOP_SIZE2: usize = 2 * VECTOR_SIZE;
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ // For a high level explanation for how this algorithm works, see the
+ // sse2 implementation. The avx implementation here is the same, but with
+ // 256-bit vectors instead of 128-bit vectors.
+
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ // For small haystacks, defer to the SSE2 implementation. Codegen
+ // suggests this completely avoids touching the AVX vectors.
+ return sse2::memchr(n1, haystack);
+ }
+
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i);
+ let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i);
+ let eqa = _mm256_cmpeq_epi8(vn1, a);
+ let eqb = _mm256_cmpeq_epi8(vn1, b);
+ let eqc = _mm256_cmpeq_epi8(vn1, c);
+ let eqd = _mm256_cmpeq_epi8(vn1, d);
+ let or1 = _mm256_or_si256(eqa, eqb);
+ let or2 = _mm256_or_si256(eqc, eqd);
+ let or3 = _mm256_or_si256(or1, or2);
+ if _mm256_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask = _mm256_movemask_epi8(eqa);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqb);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqc);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqd);
+ debug_assert!(mask != 0);
+ return Some(at + forward_pos(mask));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+ if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search1(start_ptr, end_ptr, ptr, vn1);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let vn2 = _mm256_set1_epi8(n2 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+ let or1 = _mm256_or_si256(eqa1, eqb1);
+ let or2 = _mm256_or_si256(eqa2, eqb2);
+ let or3 = _mm256_or_si256(or1, or2);
+ if _mm256_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask1 = _mm256_movemask_epi8(eqa1);
+ let mask2 = _mm256_movemask_epi8(eqa2);
+ if mask1 != 0 || mask2 != 0 {
+ return Some(at + forward_pos2(mask1, mask2));
+ }
+
+ at += VECTOR_SIZE;
+ let mask1 = _mm256_movemask_epi8(eqb1);
+ let mask2 = _mm256_movemask_epi8(eqb2);
+ return Some(at + forward_pos2(mask1, mask2));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr3(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let vn2 = _mm256_set1_epi8(n2 as i8);
+ let vn3 = _mm256_set1_epi8(n3 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+ let eqa3 = _mm256_cmpeq_epi8(vn3, a);
+ let eqb3 = _mm256_cmpeq_epi8(vn3, b);
+ let or1 = _mm256_or_si256(eqa1, eqb1);
+ let or2 = _mm256_or_si256(eqa2, eqb2);
+ let or3 = _mm256_or_si256(eqa3, eqb3);
+ let or4 = _mm256_or_si256(or1, or2);
+ let or5 = _mm256_or_si256(or3, or4);
+ if _mm256_movemask_epi8(or5) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask1 = _mm256_movemask_epi8(eqa1);
+ let mask2 = _mm256_movemask_epi8(eqa2);
+ let mask3 = _mm256_movemask_epi8(eqa3);
+ if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+ return Some(at + forward_pos3(mask1, mask2, mask3));
+ }
+
+ at += VECTOR_SIZE;
+ let mask1 = _mm256_movemask_epi8(eqb1);
+ let mask2 = _mm256_movemask_epi8(eqb2);
+ let mask3 = _mm256_movemask_epi8(eqb3);
+ return Some(at + forward_pos3(mask1, mask2, mask3));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ if let Some(i) =
+ forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+ {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i);
+ let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i);
+ let eqa = _mm256_cmpeq_epi8(vn1, a);
+ let eqb = _mm256_cmpeq_epi8(vn1, b);
+ let eqc = _mm256_cmpeq_epi8(vn1, c);
+ let eqd = _mm256_cmpeq_epi8(vn1, d);
+ let or1 = _mm256_or_si256(eqa, eqb);
+ let or2 = _mm256_or_si256(eqc, eqd);
+ let or3 = _mm256_or_si256(or1, or2);
+ if _mm256_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr);
+ let mask = _mm256_movemask_epi8(eqd);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqc);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqb);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqa);
+ debug_assert!(mask != 0);
+ return Some(at + reverse_pos(mask));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search1(start_ptr, end_ptr, start_ptr, vn1);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let vn2 = _mm256_set1_epi8(n2 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 || *ptr == n2 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+ let or1 = _mm256_or_si256(eqa1, eqb1);
+ let or2 = _mm256_or_si256(eqa2, eqb2);
+ let or3 = _mm256_or_si256(or1, or2);
+ if _mm256_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+ let mask1 = _mm256_movemask_epi8(eqb1);
+ let mask2 = _mm256_movemask_epi8(eqb2);
+ if mask1 != 0 || mask2 != 0 {
+ return Some(at + reverse_pos2(mask1, mask2));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask1 = _mm256_movemask_epi8(eqa1);
+ let mask2 = _mm256_movemask_epi8(eqa2);
+ return Some(at + reverse_pos2(mask1, mask2));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr3(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let vn2 = _mm256_set1_epi8(n2 as i8);
+ let vn3 = _mm256_set1_epi8(n3 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+ let eqa3 = _mm256_cmpeq_epi8(vn3, a);
+ let eqb3 = _mm256_cmpeq_epi8(vn3, b);
+ let or1 = _mm256_or_si256(eqa1, eqb1);
+ let or2 = _mm256_or_si256(eqa2, eqb2);
+ let or3 = _mm256_or_si256(eqa3, eqb3);
+ let or4 = _mm256_or_si256(or1, or2);
+ let or5 = _mm256_or_si256(or3, or4);
+ if _mm256_movemask_epi8(or5) != 0 {
+ let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+ let mask1 = _mm256_movemask_epi8(eqb1);
+ let mask2 = _mm256_movemask_epi8(eqb2);
+ let mask3 = _mm256_movemask_epi8(eqb3);
+ if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+ return Some(at + reverse_pos3(mask1, mask2, mask3));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask1 = _mm256_movemask_epi8(eqa1);
+ let mask2 = _mm256_movemask_epi8(eqa2);
+ let mask3 = _mm256_movemask_epi8(eqa3);
+ return Some(at + reverse_pos3(mask1, mask2, mask3));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) =
+ reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+ {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search1(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vn1));
+ if mask != 0 {
+ Some(sub(ptr, start_ptr) + forward_pos(mask))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search2(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+ vn2: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+ if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 {
+ let mask1 = _mm256_movemask_epi8(eq1);
+ let mask2 = _mm256_movemask_epi8(eq2);
+ Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search3(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+ vn2: __m256i,
+ vn3: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+ let eq3 = _mm256_cmpeq_epi8(chunk, vn3);
+ let or = _mm256_or_si256(eq1, eq2);
+ if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 {
+ let mask1 = _mm256_movemask_epi8(eq1);
+ let mask2 = _mm256_movemask_epi8(eq2);
+ let mask3 = _mm256_movemask_epi8(eq3);
+ Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search1(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(vn1, chunk));
+ if mask != 0 {
+ Some(sub(ptr, start_ptr) + reverse_pos(mask))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search2(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+ vn2: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+ if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 {
+ let mask1 = _mm256_movemask_epi8(eq1);
+ let mask2 = _mm256_movemask_epi8(eq2);
+ Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search3(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+ vn2: __m256i,
+ vn3: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+ let eq3 = _mm256_cmpeq_epi8(chunk, vn3);
+ let or = _mm256_or_si256(eq1, eq2);
+ if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 {
+ let mask1 = _mm256_movemask_epi8(eq1);
+ let mask2 = _mm256_movemask_epi8(eq2);
+ let mask3 = _mm256_movemask_epi8(eq3);
+ Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3))
+ } else {
+ None
+ }
+}
+
+/// Compute the position of the first matching byte from the given mask. The
+/// position returned is always in the range [0, 31].
+///
+/// The mask given is expected to be the result of _mm256_movemask_epi8.
+fn forward_pos(mask: i32) -> usize {
+ // We are dealing with little endian here, where the most significant byte
+ // is at a higher address. That means the least significant bit that is set
+ // corresponds to the position of our first matching byte. That position
+ // corresponds to the number of zeros after the least significant bit.
+ mask.trailing_zeros() as usize
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos2(mask1: i32, mask2: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0);
+
+ forward_pos(mask1 | mask2)
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+ forward_pos(mask1 | mask2 | mask3)
+}
+
+/// Compute the position of the last matching byte from the given mask. The
+/// position returned is always in the range [0, 31].
+///
+/// The mask given is expected to be the result of _mm256_movemask_epi8.
+fn reverse_pos(mask: i32) -> usize {
+ // We are dealing with little endian here, where the most significant byte
+ // is at a higher address. That means the most significant bit that is set
+ // corresponds to the position of our last matching byte. The position from
+ // the end of the mask is therefore the number of leading zeros in a 32
+ // bit integer, and the position from the start of the mask is therefore
+ // 32 - (leading zeros) - 1.
+ VECTOR_SIZE - (mask as u32).leading_zeros() as usize - 1
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos2(mask1: i32, mask2: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0);
+
+ reverse_pos(mask1 | mask2)
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+ reverse_pos(mask1 | mask2 | mask3)
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/x86/mod.rs b/third_party/rust/memchr/src/x86/mod.rs
new file mode 100644
index 0000000000..855dc8b755
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/mod.rs
@@ -0,0 +1,119 @@
+use fallback;
+
+// We only use AVX when we can detect at runtime whether it's available, which
+// requires std.
+#[cfg(feature = "std")]
+mod avx;
+mod sse2;
+
+// This macro employs a gcc-like "ifunc" trick where by upon first calling
+// `memchr` (for example), CPU feature detection will be performed at runtime
+// to determine the best implementation to use. After CPU feature detection
+// is done, we replace `memchr`'s function pointer with the selection. Upon
+// subsequent invocations, the CPU-specific routine is invoked directly, which
+// skips the CPU feature detection and subsequent branch that's required.
+//
+// While this typically doesn't matter for rare occurrences or when used on
+// larger haystacks, `memchr` can be called in tight loops where the overhead
+// of this branch can actually add up *and is measurable*. This trick was
+// necessary to bring this implementation up to glibc's speeds for the 'tiny'
+// benchmarks, for example.
+//
+// At some point, I expect the Rust ecosystem will get a nice macro for doing
+// exactly this, at which point, we can replace our hand-jammed version of it.
+//
+// N.B. The ifunc strategy does prevent function inlining of course, but on
+// modern CPUs, you'll probably end up with the AVX2 implementation, which
+// probably can't be inlined anyway---unless you've compiled your entire
+// program with AVX2 enabled. However, even then, the various memchr
+// implementations aren't exactly small, so inlining might not help anyway!
+#[cfg(feature = "std")]
+macro_rules! ifunc {
+ ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
+ use std::mem;
+ use std::sync::atomic::{AtomicPtr, Ordering};
+
+ type FnRaw = *mut ();
+
+ static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
+
+ fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
+ let fun =
+ if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
+ avx::$name as FnRaw
+ } else if cfg!(memchr_runtime_sse2) {
+ sse2::$name as FnRaw
+ } else {
+ fallback::$name as FnRaw
+ };
+ FN.store(fun as FnRaw, Ordering::Relaxed);
+ unsafe {
+ mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
+ }
+ }
+
+ unsafe {
+ let fun = FN.load(Ordering::Relaxed);
+ mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
+ }
+ }}
+}
+
+// When std isn't available to provide runtime CPU feature detection, or if
+// runtime CPU feature detection has been explicitly disabled, then just call
+// our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 targets,
+// so no CPU feature detection is necessary.
+#[cfg(not(feature = "std"))]
+macro_rules! ifunc {
+ ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
+ if cfg!(memchr_runtime_sse2) {
+ unsafe { sse2::$name($($needle),+, $haystack) }
+ } else {
+ fallback::$name($($needle),+, $haystack)
+ }
+ }}
+}
+
+#[inline(always)]
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
+}
+
+#[inline(always)]
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memchr2, haystack, n1, n2)
+}
+
+#[inline(always)]
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(
+ fn(u8, u8, u8, &[u8]) -> Option<usize>,
+ memchr3,
+ haystack,
+ n1,
+ n2,
+ n3
+ )
+}
+
+#[inline(always)]
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
+}
+
+#[inline(always)]
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memrchr2, haystack, n1, n2)
+}
+
+#[inline(always)]
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(
+ fn(u8, u8, u8, &[u8]) -> Option<usize>,
+ memrchr3,
+ haystack,
+ n1,
+ n2,
+ n3
+ )
+}
diff --git a/third_party/rust/memchr/src/x86/sse2.rs b/third_party/rust/memchr/src/x86/sse2.rs
new file mode 100644
index 0000000000..76f5a78c34
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/sse2.rs
@@ -0,0 +1,793 @@
+use core::arch::x86_64::*;
+use core::cmp;
+use core::mem::size_of;
+
+const VECTOR_SIZE: usize = size_of::<__m128i>();
+const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+// The number of bytes to loop at in one iteration of memchr2/memrchr2 and
+// memchr3/memrchr3. There was no observable difference between 64 and 32 bytes
+// in benchmarks. memchr3 in particular only gets a very slight speed up from
+// the loop unrolling.
+const LOOP_SIZE2: usize = 2 * VECTOR_SIZE;
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ // What follows is a fast SSE2-only algorithm to detect the position of
+ // `n1` in `haystack` if it exists. From what I know, this is the "classic"
+ // algorithm. I believe it can be found in places like glibc and Go's
+ // standard library. It appears to be well known and is elaborated on in
+ // more detail here: https://gms.tf/stdfind-and-memchr-optimizations.html
+ //
+ // While this routine is very long, the basic idea is actually very simple
+ // and can be expressed straight-forwardly in pseudo code:
+ //
+ // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1
+ // // Note: shift amount in bytes
+ //
+ // while i <= haystack.len() - 16:
+ // // A 16 byte vector. Each byte in chunk corresponds to a byte in
+ // // the haystack.
+ // chunk = haystack[i:i+16]
+ // // Compare bytes in needle with bytes in chunk. The result is a 16
+ // // byte chunk where each byte is 0xFF if the corresponding bytes
+ // // in needle and chunk were equal, or 0x00 otherwise.
+ // eqs = cmpeq(needle, chunk)
+ // // Return a 32 bit integer where the most significant 16 bits
+ // // are always 0 and the lower 16 bits correspond to whether the
+ // // most significant bit in the correspond byte in `eqs` is set.
+ // // In other words, `mask as u16` has bit i set if and only if
+ // // needle[i] == chunk[i].
+ // mask = movemask(eqs)
+ //
+ // // Mask is 0 if there is no match, and non-zero otherwise.
+ // if mask != 0:
+ // // trailing_zeros tells us the position of the least significant
+ // // bit that is set.
+ // return i + trailing_zeros(mask)
+ //
+ // // haystack length may not be a multiple of 16, so search the rest.
+ // while i < haystack.len():
+ // if haystack[i] == n1:
+ // return i
+ //
+ // // No match found.
+ // return NULL
+ //
+ // In fact, we could loosely translate the above code to Rust line-for-line
+ // and it would be a pretty fast algorithm. But, we pull out all the stops
+ // to go as fast as possible:
+ //
+ // 1. We use aligned loads. That is, we do some finagling to make sure our
+ // primary loop not only proceeds in increments of 16 bytes, but that
+ // the address of haystack's pointer that we dereference is aligned to
+ // 16 bytes. 16 is a magic number here because it is the size of SSE2
+ // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.)
+ // Therefore, to get aligned loads, our pointer's address must be evenly
+ // divisible by 16.
+ // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's
+ // kind of like loop unrolling, but we combine the equality comparisons
+ // using a vector OR such that we only need to extract a single mask to
+ // determine whether a match exists or not. If so, then we do some
+ // book-keeping to determine the precise location but otherwise mush on.
+ // 3. We use our "chunk" comparison routine in as many places as possible,
+ // even if it means using unaligned loads. In particular, if haystack
+ // starts with an unaligned address, then we do an unaligned load to
+ // search the first 16 bytes. We then start our primary loop at the
+ // smallest subsequent aligned address, which will actually overlap with
+ // previously searched bytes. But we're OK with that. We do a similar
+ // dance at the end of our primary loop. Finally, to avoid a
+ // byte-at-a-time loop at the end, we do a final 16 byte unaligned load
+ // that may overlap with a previous load. This is OK because it converts
+ // a loop into a small number of very fast vector instructions.
+ //
+ // The primary downside of this algorithm is that it's effectively
+ // completely unsafe. Therefore, we have to be super careful to avoid
+ // undefined behavior:
+ //
+ // 1. We use raw pointers everywhere. Not only does dereferencing a pointer
+ // require the pointer to be valid, but we actually can't even store the
+ // address of an invalid pointer (unless it's 1 past the end of
+ // haystack) without sacrificing performance.
+ // 2. _mm_loadu_si128 is used when you don't care about alignment, and
+ // _mm_load_si128 is used when you do care. You cannot use the latter
+ // on unaligned pointers.
+ // 3. We make liberal use of debug_assert! to check assumptions.
+ // 4. We make a concerted effort to stick with pointers instead of indices.
+ // Indices are nicer because there's less to worry about with them (see
+ // above about pointer offsets), but I could not get the compiler to
+ // produce as good of code as what the below produces. In any case,
+ // pointers are what we really care about here, and alignment is
+ // expressed a bit more naturally with them.
+ //
+ // In general, most of the algorithms in this crate have a similar
+ // structure to what you see below, so this comment applies fairly well to
+ // all of them.
+
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+ let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+ let eqa = _mm_cmpeq_epi8(vn1, a);
+ let eqb = _mm_cmpeq_epi8(vn1, b);
+ let eqc = _mm_cmpeq_epi8(vn1, c);
+ let eqd = _mm_cmpeq_epi8(vn1, d);
+ let or1 = _mm_or_si128(eqa, eqb);
+ let or2 = _mm_or_si128(eqc, eqd);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask = _mm_movemask_epi8(eqa);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqb);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqc);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqd);
+ debug_assert!(mask != 0);
+ return Some(at + forward_pos(mask));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+ if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search1(start_ptr, end_ptr, ptr, vn1);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let eqa1 = _mm_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm_cmpeq_epi8(vn2, b);
+ let or1 = _mm_or_si128(eqa1, eqb1);
+ let or2 = _mm_or_si128(eqa2, eqb2);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask1 = _mm_movemask_epi8(eqa1);
+ let mask2 = _mm_movemask_epi8(eqa2);
+ if mask1 != 0 || mask2 != 0 {
+ return Some(at + forward_pos2(mask1, mask2));
+ }
+
+ at += VECTOR_SIZE;
+ let mask1 = _mm_movemask_epi8(eqb1);
+ let mask2 = _mm_movemask_epi8(eqb2);
+ return Some(at + forward_pos2(mask1, mask2));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr3(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let vn3 = _mm_set1_epi8(n3 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let eqa1 = _mm_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm_cmpeq_epi8(vn2, b);
+ let eqa3 = _mm_cmpeq_epi8(vn3, a);
+ let eqb3 = _mm_cmpeq_epi8(vn3, b);
+ let or1 = _mm_or_si128(eqa1, eqb1);
+ let or2 = _mm_or_si128(eqa2, eqb2);
+ let or3 = _mm_or_si128(eqa3, eqb3);
+ let or4 = _mm_or_si128(or1, or2);
+ let or5 = _mm_or_si128(or3, or4);
+ if _mm_movemask_epi8(or5) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask1 = _mm_movemask_epi8(eqa1);
+ let mask2 = _mm_movemask_epi8(eqa2);
+ let mask3 = _mm_movemask_epi8(eqa3);
+ if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+ return Some(at + forward_pos3(mask1, mask2, mask3));
+ }
+
+ at += VECTOR_SIZE;
+ let mask1 = _mm_movemask_epi8(eqb1);
+ let mask2 = _mm_movemask_epi8(eqb2);
+ let mask3 = _mm_movemask_epi8(eqb3);
+ return Some(at + forward_pos3(mask1, mask2, mask3));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ if let Some(i) =
+ forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+ {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+ let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+ let eqa = _mm_cmpeq_epi8(vn1, a);
+ let eqb = _mm_cmpeq_epi8(vn1, b);
+ let eqc = _mm_cmpeq_epi8(vn1, c);
+ let eqd = _mm_cmpeq_epi8(vn1, d);
+ let or1 = _mm_or_si128(eqa, eqb);
+ let or2 = _mm_or_si128(eqc, eqd);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr);
+ let mask = _mm_movemask_epi8(eqd);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqc);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqb);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqa);
+ debug_assert!(mask != 0);
+ return Some(at + reverse_pos(mask));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search1(start_ptr, end_ptr, start_ptr, vn1);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 || *ptr == n2 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let eqa1 = _mm_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm_cmpeq_epi8(vn2, b);
+ let or1 = _mm_or_si128(eqa1, eqb1);
+ let or2 = _mm_or_si128(eqa2, eqb2);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+ let mask1 = _mm_movemask_epi8(eqb1);
+ let mask2 = _mm_movemask_epi8(eqb2);
+ if mask1 != 0 || mask2 != 0 {
+ return Some(at + reverse_pos2(mask1, mask2));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask1 = _mm_movemask_epi8(eqa1);
+ let mask2 = _mm_movemask_epi8(eqa2);
+ return Some(at + reverse_pos2(mask1, mask2));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr3(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let vn3 = _mm_set1_epi8(n3 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let eqa1 = _mm_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm_cmpeq_epi8(vn2, b);
+ let eqa3 = _mm_cmpeq_epi8(vn3, a);
+ let eqb3 = _mm_cmpeq_epi8(vn3, b);
+ let or1 = _mm_or_si128(eqa1, eqb1);
+ let or2 = _mm_or_si128(eqa2, eqb2);
+ let or3 = _mm_or_si128(eqa3, eqb3);
+ let or4 = _mm_or_si128(or1, or2);
+ let or5 = _mm_or_si128(or3, or4);
+ if _mm_movemask_epi8(or5) != 0 {
+ let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+ let mask1 = _mm_movemask_epi8(eqb1);
+ let mask2 = _mm_movemask_epi8(eqb2);
+ let mask3 = _mm_movemask_epi8(eqb3);
+ if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+ return Some(at + reverse_pos3(mask1, mask2, mask3));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask1 = _mm_movemask_epi8(eqa1);
+ let mask2 = _mm_movemask_epi8(eqa2);
+ let mask3 = _mm_movemask_epi8(eqa3);
+ return Some(at + reverse_pos3(mask1, mask2, mask3));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) =
+ reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+ {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn forward_search1(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vn1));
+ if mask != 0 {
+ Some(sub(ptr, start_ptr) + forward_pos(mask))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn forward_search2(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+ vn2: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+ if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 {
+ let mask1 = _mm_movemask_epi8(eq1);
+ let mask2 = _mm_movemask_epi8(eq2);
+ Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn forward_search3(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+ vn2: __m128i,
+ vn3: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+ let eq3 = _mm_cmpeq_epi8(chunk, vn3);
+ let or = _mm_or_si128(eq1, eq2);
+ if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 {
+ let mask1 = _mm_movemask_epi8(eq1);
+ let mask2 = _mm_movemask_epi8(eq2);
+ let mask3 = _mm_movemask_epi8(eq3);
+ Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search1(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(vn1, chunk));
+ if mask != 0 {
+ Some(sub(ptr, start_ptr) + reverse_pos(mask))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search2(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+ vn2: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+ if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 {
+ let mask1 = _mm_movemask_epi8(eq1);
+ let mask2 = _mm_movemask_epi8(eq2);
+ Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search3(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+ vn2: __m128i,
+ vn3: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+ let eq3 = _mm_cmpeq_epi8(chunk, vn3);
+ let or = _mm_or_si128(eq1, eq2);
+ if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 {
+ let mask1 = _mm_movemask_epi8(eq1);
+ let mask2 = _mm_movemask_epi8(eq2);
+ let mask3 = _mm_movemask_epi8(eq3);
+ Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3))
+ } else {
+ None
+ }
+}
+
+/// Compute the position of the first matching byte from the given mask. The
+/// position returned is always in the range [0, 15].
+///
+/// The mask given is expected to be the result of _mm_movemask_epi8.
+fn forward_pos(mask: i32) -> usize {
+ // We are dealing with little endian here, where the most significant byte
+ // is at a higher address. That means the least significant bit that is set
+ // corresponds to the position of our first matching byte. That position
+ // corresponds to the number of zeros after the least significant bit.
+ mask.trailing_zeros() as usize
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos2(mask1: i32, mask2: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0);
+
+ forward_pos(mask1 | mask2)
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+ forward_pos(mask1 | mask2 | mask3)
+}
+
+/// Compute the position of the last matching byte from the given mask. The
+/// position returned is always in the range [0, 15].
+///
+/// The mask given is expected to be the result of _mm_movemask_epi8.
+fn reverse_pos(mask: i32) -> usize {
+ // We are dealing with little endian here, where the most significant byte
+ // is at a higher address. That means the most significant bit that is set
+ // corresponds to the position of our last matching byte. The position from
+ // the end of the mask is therefore the number of leading zeros in a 16
+ // bit integer, and the position from the start of the mask is therefore
+ // 16 - (leading zeros) - 1.
+ VECTOR_SIZE - (mask as u16).leading_zeros() as usize - 1
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos2(mask1: i32, mask2: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0);
+
+ reverse_pos(mask1 | mask2)
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+ reverse_pos(mask1 | mask2 | mask3)
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/x86/sse42.rs b/third_party/rust/memchr/src/x86/sse42.rs
new file mode 100644
index 0000000000..78a9b37973
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/sse42.rs
@@ -0,0 +1,75 @@
+// This code is unused. PCMPESTRI is gratuitously slow. I imagine it might
+// start winning with a hypothetical memchr4 (or greater). This technique might
+// also be good for exposing searches over ranges of bytes, but that departs
+// from the standard memchr API, so it's not clear whether we actually want
+// that or not.
+//
+// N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind
+// of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means
+// I don't see a way of effectively using PCMPISTRI unless there's some fast
+// way to replace zero bytes with a byte that is not not a needle byte.
+
+use core::arch::x86_64::*;
+use core::mem::size_of;
+
+use x86::sse2;
+
+const VECTOR_SIZE: usize = size_of::<__m128i>();
+const CONTROL_ANY: i32 =
+ _SIDD_UBYTE_OPS
+ | _SIDD_CMP_EQUAL_ANY
+ | _SIDD_POSITIVE_POLARITY
+ | _SIDD_LEAST_SIGNIFICANT;
+
+#[target_feature(enable = "sse4.2")]
+pub unsafe fn memchr3(
+ n1: u8, n2: u8, n3: u8,
+ haystack: &[u8]
+) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let vn3 = _mm_set1_epi8(n3 as i8);
+ let vn = _mm_setr_epi8(
+ n1 as i8, n2 as i8, n3 as i8, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ );
+ let len = haystack.len();
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY);
+ if res < 16 {
+ return Some(sub(ptr, start_ptr) + res as usize);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}