summaryrefslogtreecommitdiffstats
path: root/third_party/rust/memchr
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/memchr
parentInitial commit. (diff)
downloadfirefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz
firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/memchr')
-rw-r--r--third_party/rust/memchr/.cargo-checksum.json1
-rw-r--r--third_party/rust/memchr/COPYING3
-rw-r--r--third_party/rust/memchr/Cargo.toml42
-rw-r--r--third_party/rust/memchr/LICENSE-MIT21
-rw-r--r--third_party/rust/memchr/README.md79
-rw-r--r--third_party/rust/memchr/UNLICENSE24
-rw-r--r--third_party/rust/memchr/build.rs61
-rw-r--r--third_party/rust/memchr/rustfmt.toml2
-rw-r--r--third_party/rust/memchr/src/c.rs44
-rw-r--r--third_party/rust/memchr/src/fallback.rs330
-rw-r--r--third_party/rust/memchr/src/iter.rs173
-rw-r--r--third_party/rust/memchr/src/lib.rs451
-rw-r--r--third_party/rust/memchr/src/naive.rs25
-rw-r--r--third_party/rust/memchr/src/tests/iter.rs229
-rw-r--r--third_party/rust/memchr/src/tests/memchr.rs131
-rw-r--r--third_party/rust/memchr/src/tests/miri.rs19
-rw-r--r--third_party/rust/memchr/src/tests/mod.rs362
-rw-r--r--third_party/rust/memchr/src/x86/avx.rs703
-rw-r--r--third_party/rust/memchr/src/x86/mod.rs119
-rw-r--r--third_party/rust/memchr/src/x86/sse2.rs793
-rw-r--r--third_party/rust/memchr/src/x86/sse42.rs75
21 files changed, 3687 insertions, 0 deletions
diff --git a/third_party/rust/memchr/.cargo-checksum.json b/third_party/rust/memchr/.cargo-checksum.json
new file mode 100644
index 0000000000..63cbe32f9c
--- /dev/null
+++ b/third_party/rust/memchr/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"7f0cb3d85439207568f7bb264b9fd5c8c5f5dedf9ee428813ee5d686ca37df18","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"d2ab7c9c77235b68d1cc856ab5ef7b5115312098469edcac9d5611c5b74d3cd1","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","build.rs":"ed35d244304888581bfcbdc52596721a5bbb908bcbd56bbdfe977800ef3042e1","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/c.rs":"86fe35cbb46c8bece9927fbde20f1ca3af526defdde05ac969ad2f4bc9bb25e9","src/fallback.rs":"79519255d480a9c2667c06f9287931cfc2b85f6af6fcf92d453a11ee161dcb74","src/iter.rs":"23b1066d6b40159fe944388db7743c89422b1110ddb44667fde6d722f178ed4e","src/lib.rs":"8278d5f65db8081fa5ad3a0d17dac54d30de3d7a01b0dc4479927156e40b225c","src/naive.rs":"c7453bc99cc4e58eb37cf5a50c88688833e50a270ee1849baefddb8acc0ccd94","src/tests/iter.rs":"8d5999a2a5b8a3228c76cd82cd3ee86dfa6f7b4022405b1f06eedf7d74e4c704","src/tests/memchr.rs":"f30074eeab99a16ce5ca8a30f1890f86c43c0422523a7195cbb3ca5f3e465b67","src/tests/miri.rs":"27859a7ac1d0a9305b2d114e6dd8876f3d5e47fc46a81be96239c793ac6edb1f","src/tests/mod.rs":"2ad0c82d33b32562087254522641ea7bfa2a283130152be5e927a33f1978ebc7","src/x86/avx.rs":"b19987410e49a079f33162424c42494626c91303c41824961e478be3b537c9c9","src/x86/mod.rs":"0b13becaabc150a0099f7528226c82e288136cc7ebcdb8e96cf5ee9aae0f05b6","src/x86/sse2.rs":"7c1b8248a8cd48396cb70a862d77f9a972f1e16324d65c260f39d13af73bf638","src/x86/sse42.rs":"f671ae9dd2b518a823e499a09ce32d4957bc5ae043db90d61c027e32f688f2b2"},"package":"3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"} \ No newline at end of file
diff --git a/third_party/rust/memchr/COPYING b/third_party/rust/memchr/COPYING
new file mode 100644
index 0000000000..bb9c20a094
--- /dev/null
+++ b/third_party/rust/memchr/COPYING
@@ -0,0 +1,3 @@
+This project is dual-licensed under the Unlicense and MIT licenses.
+
+You may use this code under the terms of either license.
diff --git a/third_party/rust/memchr/Cargo.toml b/third_party/rust/memchr/Cargo.toml
new file mode 100644
index 0000000000..6cdb3a1946
--- /dev/null
+++ b/third_party/rust/memchr/Cargo.toml
@@ -0,0 +1,42 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "memchr"
+version = "2.3.3"
+authors = ["Andrew Gallant <jamslam@gmail.com>", "bluss"]
+exclude = ["/ci/*", "/.travis.yml", "/Makefile", "/appveyor.yml"]
+description = "Safe interface to memchr."
+homepage = "https://github.com/BurntSushi/rust-memchr"
+documentation = "https://docs.rs/memchr/"
+readme = "README.md"
+keywords = ["memchr", "char", "scan", "strchr", "string"]
+license = "Unlicense/MIT"
+repository = "https://github.com/BurntSushi/rust-memchr"
+[profile.test]
+opt-level = 3
+
+[lib]
+name = "memchr"
+bench = false
+[dependencies.libc]
+version = "0.2.18"
+optional = true
+default-features = false
+[dev-dependencies.quickcheck]
+version = "0.9"
+default-features = false
+
+[features]
+default = ["std"]
+std = []
+use_std = ["std"]
diff --git a/third_party/rust/memchr/LICENSE-MIT b/third_party/rust/memchr/LICENSE-MIT
new file mode 100644
index 0000000000..3b0a5dc09c
--- /dev/null
+++ b/third_party/rust/memchr/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/third_party/rust/memchr/README.md b/third_party/rust/memchr/README.md
new file mode 100644
index 0000000000..f78a5a5370
--- /dev/null
+++ b/third_party/rust/memchr/README.md
@@ -0,0 +1,79 @@
+memchr
+======
+The `memchr` crate provides heavily optimized routines for searching bytes.
+
+[![Build status](https://github.com/BurntSushi/rust-memchr/workflows/ci/badge.svg)](https://github.com/BurntSushi/rust-memchr/actions)
+[![](http://meritbadge.herokuapp.com/memchr)](https://crates.io/crates/memchr)
+
+Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
+
+
+### Documentation
+
+[https://docs.rs/memchr](https://docs.rs/memchr)
+
+
+### Overview
+
+The `memchr` function is traditionally provided by libc, but its
+performance can vary significantly depending on the specific
+implementation of libc that is used. They can range from manually tuned
+Assembly implementations (like that found in GNU's libc) all the way to
+non-vectorized C implementations (like that found in MUSL).
+
+To smooth out the differences between implementations of libc, at least
+on `x86_64` for Rust 1.27+, this crate provides its own implementation of
+`memchr` that should perform competitively with the one found in GNU's libc.
+The implementation is in pure Rust and has no dependency on a C compiler or an
+Assembler.
+
+Additionally, GNU libc also provides an extension, `memrchr`. This crate
+provides its own implementation of `memrchr` as well, on top of `memchr2`,
+`memchr3`, `memrchr2` and `memrchr3`. The difference between `memchr` and
+`memchr2` is that `memchr2` permits finding all occurrences of two bytes
+instead of one. Similarly for `memchr3`.
+
+### Compiling without the standard library
+
+memchr links to the standard library by default, but you can disable the
+`std` feature if you want to use it in a `#![no_std]` crate:
+
+```toml
+[dependencies]
+memchr = { version = "2", default-features = false }
+```
+
+On x86 platforms, when the `std` feature is disabled, the SSE2
+implementation of memchr will be used in compilers that support it. When
+`std` is enabled, the AVX implementation of memchr will be used if the CPU
+is determined to support it at runtime.
+
+### Using libc
+
+`memchr` is a routine that is part of libc, although this crate does not use
+libc by default. Instead, it uses its own routines, which are either vectorized
+or generic fallback routines. In general, these should be competitive with
+what's in libc, although this has not been tested for all architectures. If
+using `memchr` from libc is desirable and a vectorized routine is not otherwise
+available in this crate, then enabling the `libc` feature will use libc's
+version of `memchr`.
+
+The rest of the functions in this crate, e.g., `memchr2` or `memrchr3`, are not
+a standard part of libc, so they will always use the implementations in this
+crate. One exception to this is `memrchr`, which is an extension commonly found
+on Linux. On Linux, `memrchr` is used in precisely the same scenario as
+`memchr`, as described above.
+
+
+### Minimum Rust version policy
+
+This crate's minimum supported `rustc` version is `1.28.0`.
+
+The current policy is that the minimum Rust version required to use this crate
+can be increased in minor version updates. For example, if `crate 1.0` requires
+Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
+1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
+version of Rust.
+
+In general, this crate will be conservative with respect to the minimum
+supported version of Rust.
diff --git a/third_party/rust/memchr/UNLICENSE b/third_party/rust/memchr/UNLICENSE
new file mode 100644
index 0000000000..68a49daad8
--- /dev/null
+++ b/third_party/rust/memchr/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/third_party/rust/memchr/build.rs b/third_party/rust/memchr/build.rs
new file mode 100644
index 0000000000..4ae3184d41
--- /dev/null
+++ b/third_party/rust/memchr/build.rs
@@ -0,0 +1,61 @@
+use std::env;
+
+fn main() {
+ enable_simd_optimizations();
+ enable_libc();
+}
+
+// This adds various simd cfgs if this compiler supports it.
+//
+// This can be disabled with RUSTFLAGS="--cfg memchr_disable_auto_simd", but
+// this is generally only intended for testing.
+fn enable_simd_optimizations() {
+ if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD") {
+ return;
+ }
+ println!("cargo:rustc-cfg=memchr_runtime_simd");
+ println!("cargo:rustc-cfg=memchr_runtime_sse2");
+ println!("cargo:rustc-cfg=memchr_runtime_sse42");
+ println!("cargo:rustc-cfg=memchr_runtime_avx");
+}
+
+// This adds a `memchr_libc` cfg if and only if libc can be used, if no other
+// better option is available.
+//
+// This could be performed in the source code, but it's simpler to do it once
+// here and consolidate it into one cfg knob.
+//
+// Basically, we use libc only if its enabled and if we aren't targeting a
+// known bad platform. For example, wasm32 doesn't have a libc and the
+// performance of memchr on Windows is seemingly worse than the fallback
+// implementation.
+fn enable_libc() {
+ const NO_ARCH: &'static [&'static str] = &["wasm32", "windows"];
+ const NO_ENV: &'static [&'static str] = &["sgx"];
+
+ if !is_feature_set("LIBC") {
+ return;
+ }
+
+ let arch = match env::var("CARGO_CFG_TARGET_ARCH") {
+ Err(_) => return,
+ Ok(arch) => arch,
+ };
+ let env = match env::var("CARGO_CFG_TARGET_ENV") {
+ Err(_) => return,
+ Ok(env) => env,
+ };
+ if NO_ARCH.contains(&&*arch) || NO_ENV.contains(&&*env) {
+ return;
+ }
+
+ println!("cargo:rustc-cfg=memchr_libc");
+}
+
+fn is_feature_set(name: &str) -> bool {
+ is_env_set(&format!("CARGO_FEATURE_{}", name))
+}
+
+fn is_env_set(name: &str) -> bool {
+ env::var_os(name).is_some()
+}
diff --git a/third_party/rust/memchr/rustfmt.toml b/third_party/rust/memchr/rustfmt.toml
new file mode 100644
index 0000000000..aa37a218b9
--- /dev/null
+++ b/third_party/rust/memchr/rustfmt.toml
@@ -0,0 +1,2 @@
+max_width = 79
+use_small_heuristics = "max"
diff --git a/third_party/rust/memchr/src/c.rs b/third_party/rust/memchr/src/c.rs
new file mode 100644
index 0000000000..63feca979c
--- /dev/null
+++ b/third_party/rust/memchr/src/c.rs
@@ -0,0 +1,44 @@
+// This module defines safe wrappers around memchr (POSIX) and memrchr (GNU
+// extension).
+
+#![allow(dead_code)]
+
+extern crate libc;
+
+use self::libc::{c_int, c_void, size_t};
+
+pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+ let p = unsafe {
+ libc::memchr(
+ haystack.as_ptr() as *const c_void,
+ needle as c_int,
+ haystack.len() as size_t,
+ )
+ };
+ if p.is_null() {
+ None
+ } else {
+ Some(p as usize - (haystack.as_ptr() as usize))
+ }
+}
+
+// memrchr is a GNU extension. We know it's available on Linux, so start there.
+#[cfg(target_os = "linux")]
+pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+ // GNU's memrchr() will - unlike memchr() - error if haystack is empty.
+ if haystack.is_empty() {
+ return None;
+ }
+ let p = unsafe {
+ libc::memrchr(
+ haystack.as_ptr() as *const c_void,
+ needle as c_int,
+ haystack.len() as size_t,
+ )
+ };
+ if p.is_null() {
+ None
+ } else {
+ Some(p as usize - (haystack.as_ptr() as usize))
+ }
+}
diff --git a/third_party/rust/memchr/src/fallback.rs b/third_party/rust/memchr/src/fallback.rs
new file mode 100644
index 0000000000..8bc32b27ba
--- /dev/null
+++ b/third_party/rust/memchr/src/fallback.rs
@@ -0,0 +1,330 @@
+// This module defines pure Rust platform independent implementations of all
+// the memchr routines. We do our best to make them fast. Some of them may even
+// get auto-vectorized.
+
+use core::cmp;
+use core::usize;
+
+#[cfg(target_pointer_width = "16")]
+const USIZE_BYTES: usize = 2;
+
+#[cfg(target_pointer_width = "32")]
+const USIZE_BYTES: usize = 4;
+
+#[cfg(target_pointer_width = "64")]
+const USIZE_BYTES: usize = 8;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+/// Return `true` if `x` contains any zero byte.
+///
+/// From *Matters Computational*, J. Arndt
+///
+/// "The idea is to subtract one from each of the bytes and then look for
+/// bytes where the borrow propagated all the way to the most significant
+/// bit."
+#[inline(always)]
+fn contains_zero_byte(x: usize) -> bool {
+ const LO_U64: u64 = 0x0101010101010101;
+ const HI_U64: u64 = 0x8080808080808080;
+
+ const LO_USIZE: usize = LO_U64 as usize;
+ const HI_USIZE: usize = HI_U64 as usize;
+
+ x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
+}
+
+/// Repeat the given byte into a word size number. That is, every 8 bits
+/// is equivalent to the given byte. For example, if `b` is `\x4E` or
+/// `01001110` in binary, then the returned value on a 32-bit system would be:
+/// `01001110_01001110_01001110_01001110`.
+#[inline(always)]
+fn repeat_byte(b: u8) -> usize {
+ (b as usize) * (usize::MAX / 255)
+}
+
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte == n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr as *const usize).read_unaligned();
+ if contains_zero_byte(chunk ^ vn1) {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr as *const usize);
+ let b = *(ptr.add(USIZE_BYTES) as *const usize);
+ let eqa = contains_zero_byte(a ^ vn1);
+ let eqb = contains_zero_byte(b ^ vn1);
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.add(LOOP_SIZE);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Like `memchr`, but searches for two bytes instead of one.
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let vn2 = repeat_byte(n2);
+ let confirm = |byte| byte == n1 || byte == n2;
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr as *const usize).read_unaligned();
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ if eq1 || eq2 {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while ptr <= end_ptr.sub(USIZE_BYTES) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let chunk = *(ptr as *const usize);
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ if eq1 || eq2 {
+ break;
+ }
+ ptr = ptr.add(USIZE_BYTES);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Like `memchr`, but searches for three bytes instead of one.
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let vn2 = repeat_byte(n2);
+ let vn3 = repeat_byte(n3);
+ let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr as *const usize).read_unaligned();
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ let eq3 = contains_zero_byte(chunk ^ vn3);
+ if eq1 || eq2 || eq3 {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while ptr <= end_ptr.sub(USIZE_BYTES) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let chunk = *(ptr as *const usize);
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ let eq3 = contains_zero_byte(chunk ^ vn3);
+ if eq1 || eq2 || eq3 {
+ break;
+ }
+ ptr = ptr.add(USIZE_BYTES);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Return the last index matching the byte `x` in `text`.
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte == n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+ if contains_zero_byte(chunk ^ vn1) {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = (end_ptr as usize & !align) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
+ let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
+ let eqa = contains_zero_byte(a ^ vn1);
+ let eqb = contains_zero_byte(b ^ vn1);
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.sub(loop_size);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Like `memrchr`, but searches for two bytes instead of one.
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let vn2 = repeat_byte(n2);
+ let confirm = |byte| byte == n1 || byte == n2;
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ if eq1 || eq2 {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = (end_ptr as usize & !align) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while ptr >= start_ptr.add(USIZE_BYTES) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ if eq1 || eq2 {
+ break;
+ }
+ ptr = ptr.sub(USIZE_BYTES);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Like `memrchr`, but searches for three bytes instead of one.
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let vn2 = repeat_byte(n2);
+ let vn3 = repeat_byte(n3);
+ let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ let eq3 = contains_zero_byte(chunk ^ vn3);
+ if eq1 || eq2 || eq3 {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = (end_ptr as usize & !align) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while ptr >= start_ptr.add(USIZE_BYTES) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
+ let eq1 = contains_zero_byte(chunk ^ vn1);
+ let eq2 = contains_zero_byte(chunk ^ vn2);
+ let eq3 = contains_zero_byte(chunk ^ vn3);
+ if eq1 || eq2 || eq3 {
+ break;
+ }
+ ptr = ptr.sub(USIZE_BYTES);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+#[inline(always)]
+unsafe fn forward_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr < end_ptr {
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ None
+}
+
+#[inline(always)]
+unsafe fn reverse_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ None
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/iter.rs b/third_party/rust/memchr/src/iter.rs
new file mode 100644
index 0000000000..6217ae4a09
--- /dev/null
+++ b/third_party/rust/memchr/src/iter.rs
@@ -0,0 +1,173 @@
+use {memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+macro_rules! iter_next {
+ // Common code for the memchr iterators:
+ // update haystack and position and produce the index
+ //
+ // self: &mut Self where Self is the iterator
+ // search_result: Option<usize> which is the result of the corresponding
+ // memchr function.
+ //
+ // Returns Option<usize> (the next iterator element)
+ ($self_:expr, $search_result:expr) => {
+ $search_result.map(move |index| {
+ // split and take the remaining back half
+ $self_.haystack = $self_.haystack.split_at(index + 1).1;
+ let found_position = $self_.position + index;
+ $self_.position = found_position + 1;
+ found_position
+ })
+ };
+}
+
+macro_rules! iter_next_back {
+ ($self_:expr, $search_result:expr) => {
+ $search_result.map(move |index| {
+ // split and take the remaining front half
+ $self_.haystack = $self_.haystack.split_at(index).0;
+ $self_.position + index
+ })
+ };
+}
+
+/// An iterator for `memchr`.
+pub struct Memchr<'a> {
+ needle: u8,
+ // The haystack to iterate over
+ haystack: &'a [u8],
+ // The index
+ position: usize,
+}
+
+impl<'a> Memchr<'a> {
+ /// Creates a new iterator that yields all positions of needle in haystack.
+ #[inline]
+ pub fn new(needle: u8, haystack: &[u8]) -> Memchr {
+ Memchr { needle: needle, haystack: haystack, position: 0 }
+ }
+}
+
+impl<'a> Iterator for Memchr<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ iter_next!(self, memchr(self.needle, self.haystack))
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.haystack.len()))
+ }
+}
+
+impl<'a> DoubleEndedIterator for Memchr<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ iter_next_back!(self, memrchr(self.needle, self.haystack))
+ }
+}
+
+/// An iterator for `memchr2`.
+pub struct Memchr2<'a> {
+ needle1: u8,
+ needle2: u8,
+ // The haystack to iterate over
+ haystack: &'a [u8],
+ // The index
+ position: usize,
+}
+
+impl<'a> Memchr2<'a> {
+ /// Creates a new iterator that yields all positions of needle in haystack.
+ #[inline]
+ pub fn new(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2 {
+ Memchr2 {
+ needle1: needle1,
+ needle2: needle2,
+ haystack: haystack,
+ position: 0,
+ }
+ }
+}
+
+impl<'a> Iterator for Memchr2<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ iter_next!(self, memchr2(self.needle1, self.needle2, self.haystack))
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.haystack.len()))
+ }
+}
+
+impl<'a> DoubleEndedIterator for Memchr2<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ iter_next_back!(
+ self,
+ memrchr2(self.needle1, self.needle2, self.haystack)
+ )
+ }
+}
+
+/// An iterator for `memchr3`.
+pub struct Memchr3<'a> {
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ // The haystack to iterate over
+ haystack: &'a [u8],
+ // The index
+ position: usize,
+}
+
+impl<'a> Memchr3<'a> {
+ /// Create a new `Memchr3` that's initialized to zero with a haystack
+ #[inline]
+ pub fn new(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+ ) -> Memchr3 {
+ Memchr3 {
+ needle1: needle1,
+ needle2: needle2,
+ needle3: needle3,
+ haystack: haystack,
+ position: 0,
+ }
+ }
+}
+
+impl<'a> Iterator for Memchr3<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ iter_next!(
+ self,
+ memchr3(self.needle1, self.needle2, self.needle3, self.haystack)
+ )
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.haystack.len()))
+ }
+}
+
+impl<'a> DoubleEndedIterator for Memchr3<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ iter_next_back!(
+ self,
+ memrchr3(self.needle1, self.needle2, self.needle3, self.haystack)
+ )
+ }
+}
diff --git a/third_party/rust/memchr/src/lib.rs b/third_party/rust/memchr/src/lib.rs
new file mode 100644
index 0000000000..fed7108734
--- /dev/null
+++ b/third_party/rust/memchr/src/lib.rs
@@ -0,0 +1,451 @@
+/*!
+The `memchr` crate provides heavily optimized routines for searching bytes.
+
+The `memchr` function is traditionally provided by libc, however, the
+performance of `memchr` can vary significantly depending on the specific
+implementation of libc that is used. They can range from manually tuned
+Assembly implementations (like that found in GNU's libc) all the way to
+non-vectorized C implementations (like that found in MUSL).
+
+To smooth out the differences between implementations of libc, at least
+on `x86_64` for Rust 1.27+, this crate provides its own implementation of
+`memchr` that should perform competitively with the one found in GNU's libc.
+The implementation is in pure Rust and has no dependency on a C compiler or an
+Assembler.
+
+Additionally, GNU libc also provides an extension, `memrchr`. This crate
+provides its own implementation of `memrchr` as well, on top of `memchr2`,
+`memchr3`, `memrchr2` and `memrchr3`. The difference between `memchr` and
+`memchr2` is that that `memchr2` permits finding all occurrences of two bytes
+instead of one. Similarly for `memchr3`.
+*/
+
+#![cfg_attr(not(feature = "std"), no_std)]
+#![deny(missing_docs)]
+#![doc(html_root_url = "https://docs.rs/memchr/2.0.0")]
+
+// Supporting 8-bit (or others) would be fine. If you need it, please submit a
+// bug report at https://github.com/BurntSushi/rust-memchr
+#[cfg(not(any(
+ target_pointer_width = "16",
+ target_pointer_width = "32",
+ target_pointer_width = "64"
+)))]
+compile_error!("memchr currently not supported on non-32 or non-64 bit");
+
+#[cfg(feature = "std")]
+extern crate core;
+
+#[cfg(all(test, all(not(miri), feature = "std")))]
+#[macro_use]
+extern crate quickcheck;
+
+use core::iter::Rev;
+
+pub use iter::{Memchr, Memchr2, Memchr3};
+
+// N.B. If you're looking for the cfg knobs for libc, see build.rs.
+#[cfg(memchr_libc)]
+mod c;
+#[allow(dead_code)]
+mod fallback;
+mod iter;
+mod naive;
+#[cfg(all(test, all(not(miri), feature = "std")))]
+mod tests;
+#[cfg(all(test, any(miri, not(feature = "std"))))]
+#[path = "tests/miri.rs"]
+mod tests;
+#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
+mod x86;
+
+/// An iterator over all occurrences of the needle in a haystack.
+#[inline]
+pub fn memchr_iter(needle: u8, haystack: &[u8]) -> Memchr {
+ Memchr::new(needle, haystack)
+}
+
+/// An iterator over all occurrences of the needles in a haystack.
+#[inline]
+pub fn memchr2_iter(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2 {
+ Memchr2::new(needle1, needle2, haystack)
+}
+
+/// An iterator over all occurrences of the needles in a haystack.
+#[inline]
+pub fn memchr3_iter(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+) -> Memchr3 {
+ Memchr3::new(needle1, needle2, needle3, haystack)
+}
+
+/// An iterator over all occurrences of the needle in a haystack, in reverse.
+#[inline]
+pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev<Memchr> {
+ Memchr::new(needle, haystack).rev()
+}
+
+/// An iterator over all occurrences of the needles in a haystack, in reverse.
+#[inline]
+pub fn memrchr2_iter(
+ needle1: u8,
+ needle2: u8,
+ haystack: &[u8],
+) -> Rev<Memchr2> {
+ Memchr2::new(needle1, needle2, haystack).rev()
+}
+
+/// An iterator over all occurrences of the needles in a haystack, in reverse.
+#[inline]
+pub fn memrchr3_iter(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+) -> Rev<Memchr3> {
+ Memchr3::new(needle1, needle2, needle3, haystack).rev()
+}
+
+/// Search for the first occurrence of a byte in a slice.
+///
+/// This returns the index corresponding to the first occurrence of `needle` in
+/// `haystack`, or `None` if one is not found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle)`, `memchr` will use a highly
+/// optimized routine that can be up to an order of magnitude faster in some
+/// cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of a byte in a byte string.
+///
+/// ```
+/// use memchr::memchr;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr(b'k', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memchr(n1, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memchr(n1, haystack)
+ }
+
+ #[cfg(all(
+ memchr_libc,
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ c::memchr(n1, haystack)
+ }
+
+ #[cfg(all(
+ not(memchr_libc),
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memchr(n1, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle, haystack)
+ }
+}
+
+/// Like `memchr`, but searches for either of two bytes instead of just one.
+///
+/// This returns the index corresponding to the first occurrence of `needle1`
+/// or the first occurrence of `needle2` in `haystack` (whichever occurs
+/// earlier), or `None` if neither one is found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, `memchr2`
+/// will use a highly optimized routine that can be up to an order of magnitude
+/// faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of either of two bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memchr2;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4));
+/// ```
+#[inline]
+pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memchr2(n1, n2, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memchr2(n1, n2, haystack)
+ }
+
+ #[cfg(all(
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memchr2(n1, n2, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle1, needle2, haystack)
+ }
+}
+
+/// Like `memchr`, but searches for any of three bytes instead of just one.
+///
+/// This returns the index corresponding to the first occurrence of `needle1`,
+/// the first occurrence of `needle2`, or the first occurrence of `needle3` in
+/// `haystack` (whichever occurs earliest), or `None` if none are found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle1 || b == needle2 ||
+/// b == needle3)`, `memchr3` will use a highly optimized routine that can be
+/// up to an order of magnitude faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of any of three bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memchr3;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr3(b'k', b'q', b'e', haystack), Some(2));
+/// ```
+#[inline]
+pub fn memchr3(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memchr3(n1, n2, n3, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memchr3(n1, n2, n3, haystack)
+ }
+
+ #[cfg(all(
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memchr3(n1, n2, n3, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle1, needle2, needle3, haystack)
+ }
+}
+
+/// Search for the last occurrence of a byte in a slice.
+///
+/// This returns the index corresponding to the last occurrence of `needle` in
+/// `haystack`, or `None` if one is not found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle)`, `memrchr` will use a highly
+/// optimized routine that can be up to an order of magnitude faster in some
+/// cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of a byte in a byte string.
+///
+/// ```
+/// use memchr::memrchr;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr(b'o', haystack), Some(17));
+/// ```
+#[inline]
+pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memrchr(n1, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memrchr(n1, haystack)
+ }
+
+ #[cfg(all(
+ memchr_libc,
+ target_os = "linux",
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri)
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ c::memrchr(n1, haystack)
+ }
+
+ #[cfg(all(
+ not(all(memchr_libc, target_os = "linux")),
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memrchr(n1, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle, haystack)
+ }
+}
+
+/// Like `memrchr`, but searches for either of two bytes instead of just one.
+///
+/// This returns the index corresponding to the last occurrence of `needle1`
+/// or the last occurrence of `needle2` in `haystack` (whichever occurs later),
+/// or `None` if neither one is found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, `memrchr2`
+/// will use a highly optimized routine that can be up to an order of magnitude
+/// faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of either of two bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memrchr2;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr2(b'k', b'q', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memrchr2(n1, n2, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memrchr2(n1, n2, haystack)
+ }
+
+ #[cfg(all(
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memrchr2(n1, n2, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle1, needle2, haystack)
+ }
+}
+
+/// Like `memrchr`, but searches for any of three bytes instead of just one.
+///
+/// This returns the index corresponding to the last occurrence of `needle1`,
+/// the last occurrence of `needle2`, or the last occurrence of `needle3` in
+/// `haystack` (whichever occurs later), or `None` if none are found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 ||
+/// b == needle3)`, `memrchr3` will use a highly optimized routine that can be
+/// up to an order of magnitude faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of any of three bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memrchr3;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr3(b'k', b'q', b'e', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memrchr3(
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ #[cfg(miri)]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ naive::memrchr3(n1, n2, n3, haystack)
+ }
+
+ #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ x86::memrchr3(n1, n2, n3, haystack)
+ }
+
+ #[cfg(all(
+ not(all(target_arch = "x86_64", memchr_runtime_simd)),
+ not(miri),
+ ))]
+ #[inline(always)]
+ fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ fallback::memrchr3(n1, n2, n3, haystack)
+ }
+
+ if haystack.is_empty() {
+ None
+ } else {
+ imp(needle1, needle2, needle3, haystack)
+ }
+}
diff --git a/third_party/rust/memchr/src/naive.rs b/third_party/rust/memchr/src/naive.rs
new file mode 100644
index 0000000000..3f3053d481
--- /dev/null
+++ b/third_party/rust/memchr/src/naive.rs
@@ -0,0 +1,25 @@
+#![allow(dead_code)]
+
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().position(|&b| b == n1)
+}
+
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().position(|&b| b == n1 || b == n2)
+}
+
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().position(|&b| b == n1 || b == n2 || b == n3)
+}
+
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().rposition(|&b| b == n1)
+}
+
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().rposition(|&b| b == n1 || b == n2)
+}
+
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3)
+}
diff --git a/third_party/rust/memchr/src/tests/iter.rs b/third_party/rust/memchr/src/tests/iter.rs
new file mode 100644
index 0000000000..8f335003b9
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/iter.rs
@@ -0,0 +1,229 @@
+use tests::memchr_tests;
+use {Memchr, Memchr2, Memchr3};
+
+#[test]
+fn memchr1_iter() {
+ for test in memchr_tests() {
+ test.iter_one(false, Memchr::new);
+ }
+}
+
+#[test]
+fn memchr2_iter() {
+ for test in memchr_tests() {
+ test.iter_two(false, Memchr2::new);
+ }
+}
+
+#[test]
+fn memchr3_iter() {
+ for test in memchr_tests() {
+ test.iter_three(false, Memchr3::new);
+ }
+}
+
+#[test]
+fn memrchr1_iter() {
+ for test in memchr_tests() {
+ test.iter_one(true, |n1, corpus| Memchr::new(n1, corpus).rev());
+ }
+}
+
+#[test]
+fn memrchr2_iter() {
+ for test in memchr_tests() {
+ test.iter_two(true, |n1, n2, corpus| {
+ Memchr2::new(n1, n2, corpus).rev()
+ })
+ }
+}
+
+#[test]
+fn memrchr3_iter() {
+ for test in memchr_tests() {
+ test.iter_three(true, |n1, n2, n3, corpus| {
+ Memchr3::new(n1, n2, n3, corpus).rev()
+ })
+ }
+}
+
+quickcheck! {
+ fn qc_memchr_double_ended_iter(
+ needle: u8, data: Vec<u8>, take_side: Vec<bool>
+ ) -> bool {
+ // make nonempty
+ let mut take_side = take_side;
+ if take_side.is_empty() { take_side.push(true) };
+
+ let iter = Memchr::new(needle, &data);
+ let all_found = double_ended_take(
+ iter, take_side.iter().cycle().cloned());
+
+ all_found.iter().cloned().eq(positions1(needle, &data))
+ }
+
+ fn qc_memchr2_double_ended_iter(
+ needle1: u8, needle2: u8, data: Vec<u8>, take_side: Vec<bool>
+ ) -> bool {
+ // make nonempty
+ let mut take_side = take_side;
+ if take_side.is_empty() { take_side.push(true) };
+
+ let iter = Memchr2::new(needle1, needle2, &data);
+ let all_found = double_ended_take(
+ iter, take_side.iter().cycle().cloned());
+
+ all_found.iter().cloned().eq(positions2(needle1, needle2, &data))
+ }
+
+ fn qc_memchr3_double_ended_iter(
+ needle1: u8, needle2: u8, needle3: u8,
+ data: Vec<u8>, take_side: Vec<bool>
+ ) -> bool {
+ // make nonempty
+ let mut take_side = take_side;
+ if take_side.is_empty() { take_side.push(true) };
+
+ let iter = Memchr3::new(needle1, needle2, needle3, &data);
+ let all_found = double_ended_take(
+ iter, take_side.iter().cycle().cloned());
+
+ all_found
+ .iter()
+ .cloned()
+ .eq(positions3(needle1, needle2, needle3, &data))
+ }
+
+ fn qc_memchr1_iter(data: Vec<u8>) -> bool {
+ let needle = 0;
+ let answer = positions1(needle, &data);
+ answer.eq(Memchr::new(needle, &data))
+ }
+
+ fn qc_memchr1_rev_iter(data: Vec<u8>) -> bool {
+ let needle = 0;
+ let answer = positions1(needle, &data);
+ answer.rev().eq(Memchr::new(needle, &data).rev())
+ }
+
+ fn qc_memchr2_iter(data: Vec<u8>) -> bool {
+ let needle1 = 0;
+ let needle2 = 1;
+ let answer = positions2(needle1, needle2, &data);
+ answer.eq(Memchr2::new(needle1, needle2, &data))
+ }
+
+ fn qc_memchr2_rev_iter(data: Vec<u8>) -> bool {
+ let needle1 = 0;
+ let needle2 = 1;
+ let answer = positions2(needle1, needle2, &data);
+ answer.rev().eq(Memchr2::new(needle1, needle2, &data).rev())
+ }
+
+ fn qc_memchr3_iter(data: Vec<u8>) -> bool {
+ let needle1 = 0;
+ let needle2 = 1;
+ let needle3 = 2;
+ let answer = positions3(needle1, needle2, needle3, &data);
+ answer.eq(Memchr3::new(needle1, needle2, needle3, &data))
+ }
+
+ fn qc_memchr3_rev_iter(data: Vec<u8>) -> bool {
+ let needle1 = 0;
+ let needle2 = 1;
+ let needle3 = 2;
+ let answer = positions3(needle1, needle2, needle3, &data);
+ answer.rev().eq(Memchr3::new(needle1, needle2, needle3, &data).rev())
+ }
+
+ fn qc_memchr1_iter_size_hint(data: Vec<u8>) -> bool {
+ // test that the size hint is within reasonable bounds
+ let needle = 0;
+ let mut iter = Memchr::new(needle, &data);
+ let mut real_count = data
+ .iter()
+ .filter(|&&elt| elt == needle)
+ .count();
+
+ while let Some(index) = iter.next() {
+ real_count -= 1;
+ let (lower, upper) = iter.size_hint();
+ assert!(lower <= real_count);
+ assert!(upper.unwrap() >= real_count);
+ assert!(upper.unwrap() <= data.len() - index);
+ }
+ true
+ }
+}
+
+// take items from a DEI, taking front for each true and back for each false.
+// Return a vector with the concatenation of the fronts and the reverse of the
+// backs.
+fn double_ended_take<I, J>(mut iter: I, take_side: J) -> Vec<I::Item>
+where
+ I: DoubleEndedIterator,
+ J: Iterator<Item = bool>,
+{
+ let mut found_front = Vec::new();
+ let mut found_back = Vec::new();
+
+ for take_front in take_side {
+ if take_front {
+ if let Some(pos) = iter.next() {
+ found_front.push(pos);
+ } else {
+ break;
+ }
+ } else {
+ if let Some(pos) = iter.next_back() {
+ found_back.push(pos);
+ } else {
+ break;
+ }
+ };
+ }
+
+ let mut all_found = found_front;
+ all_found.extend(found_back.into_iter().rev());
+ all_found
+}
+
+// return an iterator of the 0-based indices of haystack that match the needle
+fn positions1<'a>(
+ n1: u8,
+ haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+ let it = haystack
+ .iter()
+ .enumerate()
+ .filter(move |&(_, &b)| b == n1)
+ .map(|t| t.0);
+ Box::new(it)
+}
+
+fn positions2<'a>(
+ n1: u8,
+ n2: u8,
+ haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+ let it = haystack
+ .iter()
+ .enumerate()
+ .filter(move |&(_, &b)| b == n1 || b == n2)
+ .map(|t| t.0);
+ Box::new(it)
+}
+
+fn positions3<'a>(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+ let it = haystack
+ .iter()
+ .enumerate()
+ .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3)
+ .map(|t| t.0);
+ Box::new(it)
+}
diff --git a/third_party/rust/memchr/src/tests/memchr.rs b/third_party/rust/memchr/src/tests/memchr.rs
new file mode 100644
index 0000000000..87d3d14edd
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/memchr.rs
@@ -0,0 +1,131 @@
+use fallback;
+use naive;
+use {memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+use tests::memchr_tests;
+
+#[test]
+fn memchr1_find() {
+ for test in memchr_tests() {
+ test.one(false, memchr);
+ }
+}
+
+#[test]
+fn memchr1_fallback_find() {
+ for test in memchr_tests() {
+ test.one(false, fallback::memchr);
+ }
+}
+
+#[test]
+fn memchr2_find() {
+ for test in memchr_tests() {
+ test.two(false, memchr2);
+ }
+}
+
+#[test]
+fn memchr2_fallback_find() {
+ for test in memchr_tests() {
+ test.two(false, fallback::memchr2);
+ }
+}
+
+#[test]
+fn memchr3_find() {
+ for test in memchr_tests() {
+ test.three(false, memchr3);
+ }
+}
+
+#[test]
+fn memchr3_fallback_find() {
+ for test in memchr_tests() {
+ test.three(false, fallback::memchr3);
+ }
+}
+
+#[test]
+fn memrchr1_find() {
+ for test in memchr_tests() {
+ test.one(true, memrchr);
+ }
+}
+
+#[test]
+fn memrchr1_fallback_find() {
+ for test in memchr_tests() {
+ test.one(true, fallback::memrchr);
+ }
+}
+
+#[test]
+fn memrchr2_find() {
+ for test in memchr_tests() {
+ test.two(true, memrchr2);
+ }
+}
+
+#[test]
+fn memrchr2_fallback_find() {
+ for test in memchr_tests() {
+ test.two(true, fallback::memrchr2);
+ }
+}
+
+#[test]
+fn memrchr3_find() {
+ for test in memchr_tests() {
+ test.three(true, memrchr3);
+ }
+}
+
+#[test]
+fn memrchr3_fallback_find() {
+ for test in memchr_tests() {
+ test.three(true, fallback::memrchr3);
+ }
+}
+
+quickcheck! {
+ fn qc_memchr1_matches_naive(n1: u8, corpus: Vec<u8>) -> bool {
+ memchr(n1, &corpus) == naive::memchr(n1, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> bool {
+ memchr2(n1, n2, &corpus) == naive::memchr2(n1, n2, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memchr3_matches_naive(
+ n1: u8, n2: u8, n3: u8,
+ corpus: Vec<u8>
+ ) -> bool {
+ memchr3(n1, n2, n3, &corpus) == naive::memchr3(n1, n2, n3, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memrchr1_matches_naive(n1: u8, corpus: Vec<u8>) -> bool {
+ memrchr(n1, &corpus) == naive::memrchr(n1, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> bool {
+ memrchr2(n1, n2, &corpus) == naive::memrchr2(n1, n2, &corpus)
+ }
+}
+
+quickcheck! {
+ fn qc_memrchr3_matches_naive(
+ n1: u8, n2: u8, n3: u8,
+ corpus: Vec<u8>
+ ) -> bool {
+ memrchr3(n1, n2, n3, &corpus) == naive::memrchr3(n1, n2, n3, &corpus)
+ }
+}
diff --git a/third_party/rust/memchr/src/tests/miri.rs b/third_party/rust/memchr/src/tests/miri.rs
new file mode 100644
index 0000000000..879ef938ec
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/miri.rs
@@ -0,0 +1,19 @@
+// Simple tests using MIRI
+
+use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+#[test]
+fn test_with_miri() {
+ assert_eq!(memchr(b'a', b"abcda"), Some(0));
+ assert_eq!(memchr(b'z', b"abcda"), None);
+ assert_eq!(memchr2(b'a', b'z', b"abcda"), Some(0));
+ assert_eq!(memchr2(b'z', b'y', b"abcda"), None);
+ assert_eq!(memchr3(b'a', b'z', b'b', b"abcda"), Some(0));
+ assert_eq!(memchr3(b'z', b'y', b'x', b"abcda"), None);
+ assert_eq!(memrchr(b'a', b"abcda"), Some(4));
+ assert_eq!(memrchr(b'z', b"abcda"), None);
+ assert_eq!(memrchr2(b'a', b'z', b"abcda"), Some(4));
+ assert_eq!(memrchr2(b'z', b'y', b"abcda"), None);
+ assert_eq!(memrchr3(b'a', b'z', b'b', b"abcda"), Some(4));
+ assert_eq!(memrchr3(b'z', b'y', b'x', b"abcda"), None);
+}
diff --git a/third_party/rust/memchr/src/tests/mod.rs b/third_party/rust/memchr/src/tests/mod.rs
new file mode 100644
index 0000000000..82c1a248e9
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/mod.rs
@@ -0,0 +1,362 @@
+use std::iter::repeat;
+
+mod iter;
+mod memchr;
+
+#[cfg(target_endian = "little")]
+#[test]
+fn byte_order() {
+ eprintln!("LITTLE ENDIAN");
+}
+
+#[cfg(target_endian = "big")]
+#[test]
+fn byte_order() {
+ eprintln!("BIG ENDIAN");
+}
+
+/// Create a sequence of tests that should be run by memchr implementations.
+fn memchr_tests() -> Vec<MemchrTest> {
+ let mut tests = Vec::new();
+ for statict in MEMCHR_TESTS {
+ assert!(!statict.corpus.contains("%"), "% is not allowed in corpora");
+ assert!(!statict.corpus.contains("#"), "# is not allowed in corpora");
+ assert!(!statict.needles.contains(&b'%'), "% is an invalid needle");
+ assert!(!statict.needles.contains(&b'#'), "# is an invalid needle");
+
+ let t = MemchrTest {
+ corpus: statict.corpus.to_string(),
+ needles: statict.needles.to_vec(),
+ positions: statict.positions.to_vec(),
+ };
+ tests.push(t.clone());
+ tests.extend(t.expand());
+ }
+ tests
+}
+
+/// A set of tests for memchr-like functions.
+///
+/// These tests mostly try to cover the short string cases. We cover the longer
+/// string cases via the benchmarks (which are tests themselves), via
+/// quickcheck tests and via automatic expansion of each test case (by
+/// increasing the corpus size). Finally, we cover different alignment cases
+/// in the tests by varying the starting point of the slice.
+const MEMCHR_TESTS: &[MemchrTestStatic] = &[
+ // one needle (applied to memchr + memchr2 + memchr3)
+ MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] },
+ MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] },
+ MemchrTestStatic {
+ corpus: "aaa",
+ needles: &[b'a'],
+ positions: &[0, 1, 2],
+ },
+ MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] },
+ MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] },
+ MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] },
+ MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] },
+ MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] },
+ MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] },
+ MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] },
+ MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] },
+ MemchrTestStatic {
+ corpus: "\x00\x00",
+ needles: &[b'\x00'],
+ positions: &[0, 1],
+ },
+ MemchrTestStatic {
+ corpus: "\x00a\x00",
+ needles: &[b'\x00'],
+ positions: &[0, 2],
+ },
+ MemchrTestStatic {
+ corpus: "zzzzzzzzzzzzzzzza",
+ needles: &[b'a'],
+ positions: &[16],
+ },
+ MemchrTestStatic {
+ corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
+ needles: &[b'a'],
+ positions: &[32],
+ },
+ // two needles (applied to memchr2 + memchr3)
+ MemchrTestStatic {
+ corpus: "az",
+ needles: &[b'a', b'z'],
+ positions: &[0, 1],
+ },
+ MemchrTestStatic {
+ corpus: "az",
+ needles: &[b'a', b'z'],
+ positions: &[0, 1],
+ },
+ MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] },
+ MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] },
+ MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] },
+ MemchrTestStatic {
+ corpus: "yyyyaz",
+ needles: &[b'a', b'z'],
+ positions: &[4, 5],
+ },
+ MemchrTestStatic {
+ corpus: "yyyyaz",
+ needles: &[b'z', b'a'],
+ positions: &[4, 5],
+ },
+ // three needles (applied to memchr3)
+ MemchrTestStatic {
+ corpus: "xyz",
+ needles: &[b'x', b'y', b'z'],
+ positions: &[0, 1, 2],
+ },
+ MemchrTestStatic {
+ corpus: "zxy",
+ needles: &[b'x', b'y', b'z'],
+ positions: &[0, 1, 2],
+ },
+ MemchrTestStatic {
+ corpus: "zxy",
+ needles: &[b'x', b'a', b'z'],
+ positions: &[0, 1],
+ },
+ MemchrTestStatic {
+ corpus: "zxy",
+ needles: &[b't', b'a', b'z'],
+ positions: &[0],
+ },
+ MemchrTestStatic {
+ corpus: "yxz",
+ needles: &[b't', b'a', b'z'],
+ positions: &[2],
+ },
+];
+
+/// A description of a test on a memchr like function.
+#[derive(Clone, Debug)]
+struct MemchrTest {
+ /// The thing to search. We use `&str` instead of `&[u8]` because they
+ /// are nicer to write in tests, and we don't miss much since memchr
+ /// doesn't care about UTF-8.
+ ///
+ /// Corpora cannot contain either '%' or '#'. We use these bytes when
+ /// expanding test cases into many test cases, and we assume they are not
+ /// used. If they are used, `memchr_tests` will panic.
+ corpus: String,
+ /// The needles to search for. This is intended to be an "alternation" of
+ /// needles. The number of needles may cause this test to be skipped for
+ /// some memchr variants. For example, a test with 2 needles cannot be used
+ /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
+ /// However, a test with only 1 needle can be used to test all of `memchr`,
+ /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
+ /// bytes that we never used in the corpus (such as '#').
+ needles: Vec<u8>,
+ /// The positions expected to match for all of the needles.
+ positions: Vec<usize>,
+}
+
+/// Like MemchrTest, but easier to define as a constant.
+#[derive(Clone, Debug)]
+struct MemchrTestStatic {
+ corpus: &'static str,
+ needles: &'static [u8],
+ positions: &'static [usize],
+}
+
+impl MemchrTest {
+ fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
+ let needles = match self.needles(1) {
+ None => return,
+ Some(needles) => needles,
+ };
+ // We test different alignments here. Since some implementations use
+ // AVX2, which can read 32 bytes at a time, we test at least that.
+ // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128
+ // (avx) bytes at a time, so we include that in our offsets as well.
+ //
+ // You might think this would cause most needles to not be found, but
+ // we actually expand our tests to include corpus sizes all the way up
+ // to >500 bytes, so we should exericse most branches.
+ for align in 0..130 {
+ let corpus = self.corpus(align);
+ assert_eq!(
+ self.positions(align, reverse).get(0).cloned(),
+ f(needles[0], corpus.as_bytes()),
+ "search for {:?} failed in: {:?} (len: {}, alignment: {})",
+ needles[0] as char,
+ corpus,
+ corpus.len(),
+ align
+ );
+ }
+ }
+
+ fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
+ let needles = match self.needles(2) {
+ None => return,
+ Some(needles) => needles,
+ };
+ for align in 0..130 {
+ let corpus = self.corpus(align);
+ assert_eq!(
+ self.positions(align, reverse).get(0).cloned(),
+ f(needles[0], needles[1], corpus.as_bytes()),
+ "search for {:?}|{:?} failed in: {:?} \
+ (len: {}, alignment: {})",
+ needles[0] as char,
+ needles[1] as char,
+ corpus,
+ corpus.len(),
+ align
+ );
+ }
+ }
+
+ fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>(
+ &self,
+ reverse: bool,
+ f: F,
+ ) {
+ let needles = match self.needles(3) {
+ None => return,
+ Some(needles) => needles,
+ };
+ for align in 0..130 {
+ let corpus = self.corpus(align);
+ assert_eq!(
+ self.positions(align, reverse).get(0).cloned(),
+ f(needles[0], needles[1], needles[2], corpus.as_bytes()),
+ "search for {:?}|{:?}|{:?} failed in: {:?} \
+ (len: {}, alignment: {})",
+ needles[0] as char,
+ needles[1] as char,
+ needles[2] as char,
+ corpus,
+ corpus.len(),
+ align
+ );
+ }
+ }
+
+ fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F)
+ where
+ F: FnOnce(u8, &'a [u8]) -> I,
+ I: Iterator<Item = usize>,
+ {
+ if let Some(ns) = self.needles(1) {
+ self.iter(reverse, f(ns[0], self.corpus.as_bytes()));
+ }
+ }
+
+ fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F)
+ where
+ F: FnOnce(u8, u8, &'a [u8]) -> I,
+ I: Iterator<Item = usize>,
+ {
+ if let Some(ns) = self.needles(2) {
+ self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes()));
+ }
+ }
+
+ fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F)
+ where
+ F: FnOnce(u8, u8, u8, &'a [u8]) -> I,
+ I: Iterator<Item = usize>,
+ {
+ if let Some(ns) = self.needles(3) {
+ self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes()));
+ }
+ }
+
+ /// Test that the positions yielded by the given iterator match the
+ /// positions in this test. If reverse is true, then reverse the positions
+ /// before comparing them.
+ fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) {
+ assert_eq!(
+ self.positions(0, reverse),
+ it.collect::<Vec<usize>>(),
+ r"search for {:?} failed in: {:?}",
+ self.needles.iter().map(|&b| b as char).collect::<Vec<char>>(),
+ self.corpus
+ );
+ }
+
+ /// Expand this test into many variations of the same test.
+ ///
+ /// In particular, this will generate more tests with larger corpus sizes.
+ /// The expected positions are updated to maintain the integrity of the
+ /// test.
+ ///
+ /// This is important in testing a memchr implementation, because there are
+ /// often different cases depending on the length of the corpus.
+ ///
+ /// Note that we extend the corpus by adding `%` bytes, which we
+ /// don't otherwise use as a needle.
+ fn expand(&self) -> Vec<MemchrTest> {
+ let mut more = Vec::new();
+
+ // Add bytes to the start of the corpus.
+ for i in 1..515 {
+ let mut t = self.clone();
+ let mut new_corpus: String = repeat('%').take(i).collect();
+ new_corpus.push_str(&t.corpus);
+ t.corpus = new_corpus;
+ t.positions = t.positions.into_iter().map(|p| p + i).collect();
+ more.push(t);
+ }
+ // Add bytes to the end of the corpus.
+ for i in 1..515 {
+ let mut t = self.clone();
+ let padding: String = repeat('%').take(i).collect();
+ t.corpus.push_str(&padding);
+ more.push(t);
+ }
+
+ more
+ }
+
+ /// Return the corpus at the given alignment.
+ ///
+ /// If the alignment exceeds the length of the corpus, then this returns
+ /// an empty slice.
+ fn corpus(&self, align: usize) -> &str {
+ self.corpus.get(align..).unwrap_or("")
+ }
+
+ /// Return exactly `count` needles from this test. If this test has less
+ /// than `count` needles, then add `#` until the number of needles
+ /// matches `count`. If this test has more than `count` needles, then
+ /// return `None` (because there is no way to use this test data for a
+ /// search using fewer needles).
+ fn needles(&self, count: usize) -> Option<Vec<u8>> {
+ if self.needles.len() > count {
+ return None;
+ }
+
+ let mut needles = self.needles.to_vec();
+ for _ in needles.len()..count {
+ // we assume # is never used in tests.
+ needles.push(b'#');
+ }
+ Some(needles)
+ }
+
+ /// Return the positions in this test, reversed if `reverse` is true.
+ ///
+ /// If alignment is given, then all positions greater than or equal to that
+ /// alignment are offset by the alignment. Positions less than the
+ /// alignment are dropped.
+ fn positions(&self, align: usize, reverse: bool) -> Vec<usize> {
+ let positions = if reverse {
+ let mut positions = self.positions.to_vec();
+ positions.reverse();
+ positions
+ } else {
+ self.positions.to_vec()
+ };
+ positions
+ .into_iter()
+ .filter(|&p| p >= align)
+ .map(|p| p - align)
+ .collect()
+ }
+}
diff --git a/third_party/rust/memchr/src/x86/avx.rs b/third_party/rust/memchr/src/x86/avx.rs
new file mode 100644
index 0000000000..e3d8e8902e
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/avx.rs
@@ -0,0 +1,703 @@
+use core::arch::x86_64::*;
+use core::cmp;
+use core::mem::size_of;
+
+use x86::sse2;
+
+const VECTOR_SIZE: usize = size_of::<__m256i>();
+const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+// The number of bytes to loop at in one iteration of memchr2/memrchr2 and
+// memchr3/memrchr3. There was no observable difference between 128 and 64
+// bytes in benchmarks. memchr3 in particular only gets a very slight speed up
+// from the loop unrolling.
+const LOOP_SIZE2: usize = 2 * VECTOR_SIZE;
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ // For a high level explanation for how this algorithm works, see the
+ // sse2 implementation. The avx implementation here is the same, but with
+ // 256-bit vectors instead of 128-bit vectors.
+
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ // For small haystacks, defer to the SSE2 implementation. Codegen
+ // suggests this completely avoids touching the AVX vectors.
+ return sse2::memchr(n1, haystack);
+ }
+
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i);
+ let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i);
+ let eqa = _mm256_cmpeq_epi8(vn1, a);
+ let eqb = _mm256_cmpeq_epi8(vn1, b);
+ let eqc = _mm256_cmpeq_epi8(vn1, c);
+ let eqd = _mm256_cmpeq_epi8(vn1, d);
+ let or1 = _mm256_or_si256(eqa, eqb);
+ let or2 = _mm256_or_si256(eqc, eqd);
+ let or3 = _mm256_or_si256(or1, or2);
+ if _mm256_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask = _mm256_movemask_epi8(eqa);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqb);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqc);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqd);
+ debug_assert!(mask != 0);
+ return Some(at + forward_pos(mask));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+ if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search1(start_ptr, end_ptr, ptr, vn1);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let vn2 = _mm256_set1_epi8(n2 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+ let or1 = _mm256_or_si256(eqa1, eqb1);
+ let or2 = _mm256_or_si256(eqa2, eqb2);
+ let or3 = _mm256_or_si256(or1, or2);
+ if _mm256_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask1 = _mm256_movemask_epi8(eqa1);
+ let mask2 = _mm256_movemask_epi8(eqa2);
+ if mask1 != 0 || mask2 != 0 {
+ return Some(at + forward_pos2(mask1, mask2));
+ }
+
+ at += VECTOR_SIZE;
+ let mask1 = _mm256_movemask_epi8(eqb1);
+ let mask2 = _mm256_movemask_epi8(eqb2);
+ return Some(at + forward_pos2(mask1, mask2));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr3(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let vn2 = _mm256_set1_epi8(n2 as i8);
+ let vn3 = _mm256_set1_epi8(n3 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+ let eqa3 = _mm256_cmpeq_epi8(vn3, a);
+ let eqb3 = _mm256_cmpeq_epi8(vn3, b);
+ let or1 = _mm256_or_si256(eqa1, eqb1);
+ let or2 = _mm256_or_si256(eqa2, eqb2);
+ let or3 = _mm256_or_si256(eqa3, eqb3);
+ let or4 = _mm256_or_si256(or1, or2);
+ let or5 = _mm256_or_si256(or3, or4);
+ if _mm256_movemask_epi8(or5) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask1 = _mm256_movemask_epi8(eqa1);
+ let mask2 = _mm256_movemask_epi8(eqa2);
+ let mask3 = _mm256_movemask_epi8(eqa3);
+ if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+ return Some(at + forward_pos3(mask1, mask2, mask3));
+ }
+
+ at += VECTOR_SIZE;
+ let mask1 = _mm256_movemask_epi8(eqb1);
+ let mask2 = _mm256_movemask_epi8(eqb2);
+ let mask3 = _mm256_movemask_epi8(eqb3);
+ return Some(at + forward_pos3(mask1, mask2, mask3));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ if let Some(i) =
+ forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+ {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i);
+ let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i);
+ let eqa = _mm256_cmpeq_epi8(vn1, a);
+ let eqb = _mm256_cmpeq_epi8(vn1, b);
+ let eqc = _mm256_cmpeq_epi8(vn1, c);
+ let eqd = _mm256_cmpeq_epi8(vn1, d);
+ let or1 = _mm256_or_si256(eqa, eqb);
+ let or2 = _mm256_or_si256(eqc, eqd);
+ let or3 = _mm256_or_si256(or1, or2);
+ if _mm256_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr);
+ let mask = _mm256_movemask_epi8(eqd);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqc);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqb);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm256_movemask_epi8(eqa);
+ debug_assert!(mask != 0);
+ return Some(at + reverse_pos(mask));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search1(start_ptr, end_ptr, start_ptr, vn1);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let vn2 = _mm256_set1_epi8(n2 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 || *ptr == n2 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+ let or1 = _mm256_or_si256(eqa1, eqb1);
+ let or2 = _mm256_or_si256(eqa2, eqb2);
+ let or3 = _mm256_or_si256(or1, or2);
+ if _mm256_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+ let mask1 = _mm256_movemask_epi8(eqb1);
+ let mask2 = _mm256_movemask_epi8(eqb2);
+ if mask1 != 0 || mask2 != 0 {
+ return Some(at + reverse_pos2(mask1, mask2));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask1 = _mm256_movemask_epi8(eqa1);
+ let mask2 = _mm256_movemask_epi8(eqa2);
+ return Some(at + reverse_pos2(mask1, mask2));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr3(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ let vn1 = _mm256_set1_epi8(n1 as i8);
+ let vn2 = _mm256_set1_epi8(n2 as i8);
+ let vn3 = _mm256_set1_epi8(n3 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+ let eqa3 = _mm256_cmpeq_epi8(vn3, a);
+ let eqb3 = _mm256_cmpeq_epi8(vn3, b);
+ let or1 = _mm256_or_si256(eqa1, eqb1);
+ let or2 = _mm256_or_si256(eqa2, eqb2);
+ let or3 = _mm256_or_si256(eqa3, eqb3);
+ let or4 = _mm256_or_si256(or1, or2);
+ let or5 = _mm256_or_si256(or3, or4);
+ if _mm256_movemask_epi8(or5) != 0 {
+ let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+ let mask1 = _mm256_movemask_epi8(eqb1);
+ let mask2 = _mm256_movemask_epi8(eqb2);
+ let mask3 = _mm256_movemask_epi8(eqb3);
+ if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+ return Some(at + reverse_pos3(mask1, mask2, mask3));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask1 = _mm256_movemask_epi8(eqa1);
+ let mask2 = _mm256_movemask_epi8(eqa2);
+ let mask3 = _mm256_movemask_epi8(eqa3);
+ return Some(at + reverse_pos3(mask1, mask2, mask3));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) =
+ reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+ {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search1(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vn1));
+ if mask != 0 {
+ Some(sub(ptr, start_ptr) + forward_pos(mask))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search2(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+ vn2: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+ if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 {
+ let mask1 = _mm256_movemask_epi8(eq1);
+ let mask2 = _mm256_movemask_epi8(eq2);
+ Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search3(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+ vn2: __m256i,
+ vn3: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+ let eq3 = _mm256_cmpeq_epi8(chunk, vn3);
+ let or = _mm256_or_si256(eq1, eq2);
+ if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 {
+ let mask1 = _mm256_movemask_epi8(eq1);
+ let mask2 = _mm256_movemask_epi8(eq2);
+ let mask3 = _mm256_movemask_epi8(eq3);
+ Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search1(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(vn1, chunk));
+ if mask != 0 {
+ Some(sub(ptr, start_ptr) + reverse_pos(mask))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search2(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+ vn2: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+ if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 {
+ let mask1 = _mm256_movemask_epi8(eq1);
+ let mask2 = _mm256_movemask_epi8(eq2);
+ Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search3(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m256i,
+ vn2: __m256i,
+ vn3: __m256i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+ let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+ let eq3 = _mm256_cmpeq_epi8(chunk, vn3);
+ let or = _mm256_or_si256(eq1, eq2);
+ if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 {
+ let mask1 = _mm256_movemask_epi8(eq1);
+ let mask2 = _mm256_movemask_epi8(eq2);
+ let mask3 = _mm256_movemask_epi8(eq3);
+ Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3))
+ } else {
+ None
+ }
+}
+
+/// Compute the position of the first matching byte from the given mask. The
+/// position returned is always in the range [0, 31].
+///
+/// The mask given is expected to be the result of _mm256_movemask_epi8.
+fn forward_pos(mask: i32) -> usize {
+ // We are dealing with little endian here, where the most significant byte
+ // is at a higher address. That means the least significant bit that is set
+ // corresponds to the position of our first matching byte. That position
+ // corresponds to the number of zeros after the least significant bit.
+ mask.trailing_zeros() as usize
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos2(mask1: i32, mask2: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0);
+
+ forward_pos(mask1 | mask2)
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+ forward_pos(mask1 | mask2 | mask3)
+}
+
+/// Compute the position of the last matching byte from the given mask. The
+/// position returned is always in the range [0, 31].
+///
+/// The mask given is expected to be the result of _mm256_movemask_epi8.
+fn reverse_pos(mask: i32) -> usize {
+ // We are dealing with little endian here, where the most significant byte
+ // is at a higher address. That means the most significant bit that is set
+ // corresponds to the position of our last matching byte. The position from
+ // the end of the mask is therefore the number of leading zeros in a 32
+ // bit integer, and the position from the start of the mask is therefore
+ // 32 - (leading zeros) - 1.
+ VECTOR_SIZE - (mask as u32).leading_zeros() as usize - 1
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos2(mask1: i32, mask2: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0);
+
+ reverse_pos(mask1 | mask2)
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+ reverse_pos(mask1 | mask2 | mask3)
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/x86/mod.rs b/third_party/rust/memchr/src/x86/mod.rs
new file mode 100644
index 0000000000..855dc8b755
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/mod.rs
@@ -0,0 +1,119 @@
+use fallback;
+
+// We only use AVX when we can detect at runtime whether it's available, which
+// requires std.
+#[cfg(feature = "std")]
+mod avx;
+mod sse2;
+
+// This macro employs a gcc-like "ifunc" trick where by upon first calling
+// `memchr` (for example), CPU feature detection will be performed at runtime
+// to determine the best implementation to use. After CPU feature detection
+// is done, we replace `memchr`'s function pointer with the selection. Upon
+// subsequent invocations, the CPU-specific routine is invoked directly, which
+// skips the CPU feature detection and subsequent branch that's required.
+//
+// While this typically doesn't matter for rare occurrences or when used on
+// larger haystacks, `memchr` can be called in tight loops where the overhead
+// of this branch can actually add up *and is measurable*. This trick was
+// necessary to bring this implementation up to glibc's speeds for the 'tiny'
+// benchmarks, for example.
+//
+// At some point, I expect the Rust ecosystem will get a nice macro for doing
+// exactly this, at which point, we can replace our hand-jammed version of it.
+//
+// N.B. The ifunc strategy does prevent function inlining of course, but on
+// modern CPUs, you'll probably end up with the AVX2 implementation, which
+// probably can't be inlined anyway---unless you've compiled your entire
+// program with AVX2 enabled. However, even then, the various memchr
+// implementations aren't exactly small, so inlining might not help anyway!
+#[cfg(feature = "std")]
+macro_rules! ifunc {
+ ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
+ use std::mem;
+ use std::sync::atomic::{AtomicPtr, Ordering};
+
+ type FnRaw = *mut ();
+
+ static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
+
+ fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
+ let fun =
+ if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
+ avx::$name as FnRaw
+ } else if cfg!(memchr_runtime_sse2) {
+ sse2::$name as FnRaw
+ } else {
+ fallback::$name as FnRaw
+ };
+ FN.store(fun as FnRaw, Ordering::Relaxed);
+ unsafe {
+ mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
+ }
+ }
+
+ unsafe {
+ let fun = FN.load(Ordering::Relaxed);
+ mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
+ }
+ }}
+}
+
+// When std isn't available to provide runtime CPU feature detection, or if
+// runtime CPU feature detection has been explicitly disabled, then just call
+// our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 targets,
+// so no CPU feature detection is necessary.
+#[cfg(not(feature = "std"))]
+macro_rules! ifunc {
+ ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
+ if cfg!(memchr_runtime_sse2) {
+ unsafe { sse2::$name($($needle),+, $haystack) }
+ } else {
+ fallback::$name($($needle),+, $haystack)
+ }
+ }}
+}
+
+#[inline(always)]
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
+}
+
+#[inline(always)]
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memchr2, haystack, n1, n2)
+}
+
+#[inline(always)]
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(
+ fn(u8, u8, u8, &[u8]) -> Option<usize>,
+ memchr3,
+ haystack,
+ n1,
+ n2,
+ n3
+ )
+}
+
+#[inline(always)]
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
+}
+
+#[inline(always)]
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memrchr2, haystack, n1, n2)
+}
+
+#[inline(always)]
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+ ifunc!(
+ fn(u8, u8, u8, &[u8]) -> Option<usize>,
+ memrchr3,
+ haystack,
+ n1,
+ n2,
+ n3
+ )
+}
diff --git a/third_party/rust/memchr/src/x86/sse2.rs b/third_party/rust/memchr/src/x86/sse2.rs
new file mode 100644
index 0000000000..76f5a78c34
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/sse2.rs
@@ -0,0 +1,793 @@
+use core::arch::x86_64::*;
+use core::cmp;
+use core::mem::size_of;
+
+const VECTOR_SIZE: usize = size_of::<__m128i>();
+const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+// The number of bytes to loop at in one iteration of memchr2/memrchr2 and
+// memchr3/memrchr3. There was no observable difference between 64 and 32 bytes
+// in benchmarks. memchr3 in particular only gets a very slight speed up from
+// the loop unrolling.
+const LOOP_SIZE2: usize = 2 * VECTOR_SIZE;
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ // What follows is a fast SSE2-only algorithm to detect the position of
+ // `n1` in `haystack` if it exists. From what I know, this is the "classic"
+ // algorithm. I believe it can be found in places like glibc and Go's
+ // standard library. It appears to be well known and is elaborated on in
+ // more detail here: https://gms.tf/stdfind-and-memchr-optimizations.html
+ //
+ // While this routine is very long, the basic idea is actually very simple
+ // and can be expressed straight-forwardly in pseudo code:
+ //
+ // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1
+ // // Note: shift amount in bytes
+ //
+ // while i <= haystack.len() - 16:
+ // // A 16 byte vector. Each byte in chunk corresponds to a byte in
+ // // the haystack.
+ // chunk = haystack[i:i+16]
+ // // Compare bytes in needle with bytes in chunk. The result is a 16
+ // // byte chunk where each byte is 0xFF if the corresponding bytes
+ // // in needle and chunk were equal, or 0x00 otherwise.
+ // eqs = cmpeq(needle, chunk)
+ // // Return a 32 bit integer where the most significant 16 bits
+ // // are always 0 and the lower 16 bits correspond to whether the
+ // // most significant bit in the correspond byte in `eqs` is set.
+ // // In other words, `mask as u16` has bit i set if and only if
+ // // needle[i] == chunk[i].
+ // mask = movemask(eqs)
+ //
+ // // Mask is 0 if there is no match, and non-zero otherwise.
+ // if mask != 0:
+ // // trailing_zeros tells us the position of the least significant
+ // // bit that is set.
+ // return i + trailing_zeros(mask)
+ //
+ // // haystack length may not be a multiple of 16, so search the rest.
+ // while i < haystack.len():
+ // if haystack[i] == n1:
+ // return i
+ //
+ // // No match found.
+ // return NULL
+ //
+ // In fact, we could loosely translate the above code to Rust line-for-line
+ // and it would be a pretty fast algorithm. But, we pull out all the stops
+ // to go as fast as possible:
+ //
+ // 1. We use aligned loads. That is, we do some finagling to make sure our
+ // primary loop not only proceeds in increments of 16 bytes, but that
+ // the address of haystack's pointer that we dereference is aligned to
+ // 16 bytes. 16 is a magic number here because it is the size of SSE2
+ // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.)
+ // Therefore, to get aligned loads, our pointer's address must be evenly
+ // divisible by 16.
+ // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's
+ // kind of like loop unrolling, but we combine the equality comparisons
+ // using a vector OR such that we only need to extract a single mask to
+ // determine whether a match exists or not. If so, then we do some
+ // book-keeping to determine the precise location but otherwise mush on.
+ // 3. We use our "chunk" comparison routine in as many places as possible,
+ // even if it means using unaligned loads. In particular, if haystack
+ // starts with an unaligned address, then we do an unaligned load to
+ // search the first 16 bytes. We then start our primary loop at the
+ // smallest subsequent aligned address, which will actually overlap with
+ // previously searched bytes. But we're OK with that. We do a similar
+ // dance at the end of our primary loop. Finally, to avoid a
+ // byte-at-a-time loop at the end, we do a final 16 byte unaligned load
+ // that may overlap with a previous load. This is OK because it converts
+ // a loop into a small number of very fast vector instructions.
+ //
+ // The primary downside of this algorithm is that it's effectively
+ // completely unsafe. Therefore, we have to be super careful to avoid
+ // undefined behavior:
+ //
+ // 1. We use raw pointers everywhere. Not only does dereferencing a pointer
+ // require the pointer to be valid, but we actually can't even store the
+ // address of an invalid pointer (unless it's 1 past the end of
+ // haystack) without sacrificing performance.
+ // 2. _mm_loadu_si128 is used when you don't care about alignment, and
+ // _mm_load_si128 is used when you do care. You cannot use the latter
+ // on unaligned pointers.
+ // 3. We make liberal use of debug_assert! to check assumptions.
+ // 4. We make a concerted effort to stick with pointers instead of indices.
+ // Indices are nicer because there's less to worry about with them (see
+ // above about pointer offsets), but I could not get the compiler to
+ // produce as good of code as what the below produces. In any case,
+ // pointers are what we really care about here, and alignment is
+ // expressed a bit more naturally with them.
+ //
+ // In general, most of the algorithms in this crate have a similar
+ // structure to what you see below, so this comment applies fairly well to
+ // all of them.
+
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+ let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+ let eqa = _mm_cmpeq_epi8(vn1, a);
+ let eqb = _mm_cmpeq_epi8(vn1, b);
+ let eqc = _mm_cmpeq_epi8(vn1, c);
+ let eqd = _mm_cmpeq_epi8(vn1, d);
+ let or1 = _mm_or_si128(eqa, eqb);
+ let or2 = _mm_or_si128(eqc, eqd);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask = _mm_movemask_epi8(eqa);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqb);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqc);
+ if mask != 0 {
+ return Some(at + forward_pos(mask));
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqd);
+ debug_assert!(mask != 0);
+ return Some(at + forward_pos(mask));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+ if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search1(start_ptr, end_ptr, ptr, vn1);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let eqa1 = _mm_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm_cmpeq_epi8(vn2, b);
+ let or1 = _mm_or_si128(eqa1, eqb1);
+ let or2 = _mm_or_si128(eqa2, eqb2);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask1 = _mm_movemask_epi8(eqa1);
+ let mask2 = _mm_movemask_epi8(eqa2);
+ if mask1 != 0 || mask2 != 0 {
+ return Some(at + forward_pos2(mask1, mask2));
+ }
+
+ at += VECTOR_SIZE;
+ let mask1 = _mm_movemask_epi8(eqb1);
+ let mask2 = _mm_movemask_epi8(eqb2);
+ return Some(at + forward_pos2(mask1, mask2));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr3(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let vn3 = _mm_set1_epi8(n3 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+
+ if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+ return Some(i);
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let eqa1 = _mm_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm_cmpeq_epi8(vn2, b);
+ let eqa3 = _mm_cmpeq_epi8(vn3, a);
+ let eqb3 = _mm_cmpeq_epi8(vn3, b);
+ let or1 = _mm_or_si128(eqa1, eqb1);
+ let or2 = _mm_or_si128(eqa2, eqb2);
+ let or3 = _mm_or_si128(eqa3, eqb3);
+ let or4 = _mm_or_si128(or1, or2);
+ let or5 = _mm_or_si128(or3, or4);
+ if _mm_movemask_epi8(or5) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask1 = _mm_movemask_epi8(eqa1);
+ let mask2 = _mm_movemask_epi8(eqa2);
+ let mask3 = _mm_movemask_epi8(eqa3);
+ if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+ return Some(at + forward_pos3(mask1, mask2, mask3));
+ }
+
+ at += VECTOR_SIZE;
+ let mask1 = _mm_movemask_epi8(eqb1);
+ let mask2 = _mm_movemask_epi8(eqb2);
+ let mask3 = _mm_movemask_epi8(eqb3);
+ return Some(at + forward_pos3(mask1, mask2, mask3));
+ }
+ ptr = ptr.add(loop_size);
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ if let Some(i) =
+ forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+ {
+ return Some(i);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+ let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+ let eqa = _mm_cmpeq_epi8(vn1, a);
+ let eqb = _mm_cmpeq_epi8(vn1, b);
+ let eqc = _mm_cmpeq_epi8(vn1, c);
+ let eqd = _mm_cmpeq_epi8(vn1, d);
+ let or1 = _mm_or_si128(eqa, eqb);
+ let or2 = _mm_or_si128(eqc, eqd);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr);
+ let mask = _mm_movemask_epi8(eqd);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqc);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqb);
+ if mask != 0 {
+ return Some(at + reverse_pos(mask));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(eqa);
+ debug_assert!(mask != 0);
+ return Some(at + reverse_pos(mask));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search1(start_ptr, end_ptr, start_ptr, vn1);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 || *ptr == n2 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let eqa1 = _mm_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm_cmpeq_epi8(vn2, b);
+ let or1 = _mm_or_si128(eqa1, eqb1);
+ let or2 = _mm_or_si128(eqa2, eqb2);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+ let mask1 = _mm_movemask_epi8(eqb1);
+ let mask2 = _mm_movemask_epi8(eqb2);
+ if mask1 != 0 || mask2 != 0 {
+ return Some(at + reverse_pos2(mask1, mask2));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask1 = _mm_movemask_epi8(eqa1);
+ let mask2 = _mm_movemask_epi8(eqa2);
+ return Some(at + reverse_pos2(mask1, mask2));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr3(
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ haystack: &[u8],
+) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let vn3 = _mm_set1_epi8(n3 as i8);
+ let len = haystack.len();
+ let loop_size = cmp::min(LOOP_SIZE2, len);
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ return None;
+ }
+
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+ return Some(i);
+ }
+
+ ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ ptr = ptr.sub(loop_size);
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let eqa1 = _mm_cmpeq_epi8(vn1, a);
+ let eqb1 = _mm_cmpeq_epi8(vn1, b);
+ let eqa2 = _mm_cmpeq_epi8(vn2, a);
+ let eqb2 = _mm_cmpeq_epi8(vn2, b);
+ let eqa3 = _mm_cmpeq_epi8(vn3, a);
+ let eqb3 = _mm_cmpeq_epi8(vn3, b);
+ let or1 = _mm_or_si128(eqa1, eqb1);
+ let or2 = _mm_or_si128(eqa2, eqb2);
+ let or3 = _mm_or_si128(eqa3, eqb3);
+ let or4 = _mm_or_si128(or1, or2);
+ let or5 = _mm_or_si128(or3, or4);
+ if _mm_movemask_epi8(or5) != 0 {
+ let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+ let mask1 = _mm_movemask_epi8(eqb1);
+ let mask2 = _mm_movemask_epi8(eqb2);
+ let mask3 = _mm_movemask_epi8(eqb3);
+ if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+ return Some(at + reverse_pos3(mask1, mask2, mask3));
+ }
+
+ at -= VECTOR_SIZE;
+ let mask1 = _mm_movemask_epi8(eqa1);
+ let mask2 = _mm_movemask_epi8(eqa2);
+ let mask3 = _mm_movemask_epi8(eqa3);
+ return Some(at + reverse_pos3(mask1, mask2, mask3));
+ }
+ }
+ while ptr >= start_ptr.add(VECTOR_SIZE) {
+ ptr = ptr.sub(VECTOR_SIZE);
+ if let Some(i) =
+ reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+ {
+ return Some(i);
+ }
+ }
+ if ptr > start_ptr {
+ debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+ return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn forward_search1(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vn1));
+ if mask != 0 {
+ Some(sub(ptr, start_ptr) + forward_pos(mask))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn forward_search2(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+ vn2: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+ if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 {
+ let mask1 = _mm_movemask_epi8(eq1);
+ let mask2 = _mm_movemask_epi8(eq2);
+ Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn forward_search3(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+ vn2: __m128i,
+ vn3: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+ let eq3 = _mm_cmpeq_epi8(chunk, vn3);
+ let or = _mm_or_si128(eq1, eq2);
+ if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 {
+ let mask1 = _mm_movemask_epi8(eq1);
+ let mask2 = _mm_movemask_epi8(eq2);
+ let mask3 = _mm_movemask_epi8(eq3);
+ Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search1(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(vn1, chunk));
+ if mask != 0 {
+ Some(sub(ptr, start_ptr) + reverse_pos(mask))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search2(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+ vn2: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+ if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 {
+ let mask1 = _mm_movemask_epi8(eq1);
+ let mask2 = _mm_movemask_epi8(eq2);
+ Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2))
+ } else {
+ None
+ }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search3(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ ptr: *const u8,
+ vn1: __m128i,
+ vn2: __m128i,
+ vn3: __m128i,
+) -> Option<usize> {
+ debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+ let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+ let eq3 = _mm_cmpeq_epi8(chunk, vn3);
+ let or = _mm_or_si128(eq1, eq2);
+ if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 {
+ let mask1 = _mm_movemask_epi8(eq1);
+ let mask2 = _mm_movemask_epi8(eq2);
+ let mask3 = _mm_movemask_epi8(eq3);
+ Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3))
+ } else {
+ None
+ }
+}
+
+/// Compute the position of the first matching byte from the given mask. The
+/// position returned is always in the range [0, 15].
+///
+/// The mask given is expected to be the result of _mm_movemask_epi8.
+fn forward_pos(mask: i32) -> usize {
+ // We are dealing with little endian here, where the most significant byte
+ // is at a higher address. That means the least significant bit that is set
+ // corresponds to the position of our first matching byte. That position
+ // corresponds to the number of zeros after the least significant bit.
+ mask.trailing_zeros() as usize
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos2(mask1: i32, mask2: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0);
+
+ forward_pos(mask1 | mask2)
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+ forward_pos(mask1 | mask2 | mask3)
+}
+
+/// Compute the position of the last matching byte from the given mask. The
+/// position returned is always in the range [0, 15].
+///
+/// The mask given is expected to be the result of _mm_movemask_epi8.
+fn reverse_pos(mask: i32) -> usize {
+ // We are dealing with little endian here, where the most significant byte
+ // is at a higher address. That means the most significant bit that is set
+ // corresponds to the position of our last matching byte. The position from
+ // the end of the mask is therefore the number of leading zeros in a 16
+ // bit integer, and the position from the start of the mask is therefore
+ // 16 - (leading zeros) - 1.
+ VECTOR_SIZE - (mask as u16).leading_zeros() as usize - 1
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos2(mask1: i32, mask2: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0);
+
+ reverse_pos(mask1 | mask2)
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+ debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+ reverse_pos(mask1 | mask2 | mask3)
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/x86/sse42.rs b/third_party/rust/memchr/src/x86/sse42.rs
new file mode 100644
index 0000000000..78a9b37973
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/sse42.rs
@@ -0,0 +1,75 @@
+// This code is unused. PCMPESTRI is gratuitously slow. I imagine it might
+// start winning with a hypothetical memchr4 (or greater). This technique might
+// also be good for exposing searches over ranges of bytes, but that departs
+// from the standard memchr API, so it's not clear whether we actually want
+// that or not.
+//
+// N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind
+// of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means
+// I don't see a way of effectively using PCMPISTRI unless there's some fast
+// way to replace zero bytes with a byte that is not not a needle byte.
+
+use core::arch::x86_64::*;
+use core::mem::size_of;
+
+use x86::sse2;
+
+const VECTOR_SIZE: usize = size_of::<__m128i>();
+const CONTROL_ANY: i32 =
+ _SIDD_UBYTE_OPS
+ | _SIDD_CMP_EQUAL_ANY
+ | _SIDD_POSITIVE_POLARITY
+ | _SIDD_LEAST_SIGNIFICANT;
+
+#[target_feature(enable = "sse4.2")]
+pub unsafe fn memchr3(
+ n1: u8, n2: u8, n3: u8,
+ haystack: &[u8]
+) -> Option<usize> {
+ let vn1 = _mm_set1_epi8(n1 as i8);
+ let vn2 = _mm_set1_epi8(n2 as i8);
+ let vn3 = _mm_set1_epi8(n3 as i8);
+ let vn = _mm_setr_epi8(
+ n1 as i8, n2 as i8, n3 as i8, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ );
+ let len = haystack.len();
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ if haystack.len() < VECTOR_SIZE {
+ while ptr < end_ptr {
+ if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ return None;
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY);
+ if res < 16 {
+ return Some(sub(ptr, start_ptr) + res as usize);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ if ptr < end_ptr {
+ debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+ ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+ debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+ return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+ }
+ None
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}