13 files changed, 3454 insertions, 0 deletions
diff --git a/third_party/rust/memchr/src/c.rs b/third_party/rust/memchr/src/c.rs
new file mode 100644
index 0000000000..63feca979c
--- /dev/null
+++ b/third_party/rust/memchr/src/c.rs
@@ -0,0 +1,44 @@
+// This module defines safe wrappers around memchr (POSIX) and memrchr (GNU
+// extension).
+
+#![allow(dead_code)]
+
+extern crate libc;
+
+use self::libc::{c_int, c_void, size_t};
+
+pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+    let p = unsafe {
+        libc::memchr(
+            haystack.as_ptr() as *const c_void,
+            needle as c_int,
+            haystack.len() as size_t,
+        )
+    };
+    if p.is_null() {
+        None
+    } else {
+        Some(p as usize - (haystack.as_ptr() as usize))
+    }
+}
+
+// memrchr is a GNU extension. We know it's available on Linux, so start there.
+#[cfg(target_os = "linux")]
+pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+    // GNU's memrchr() will - unlike memchr() - error if haystack is empty.
+    if haystack.is_empty() {
+        return None;
+    }
+    let p = unsafe {
+        libc::memrchr(
+            haystack.as_ptr() as *const c_void,
+            needle as c_int,
+            haystack.len() as size_t,
+        )
+    };
+    if p.is_null() {
+        None
+    } else {
+        Some(p as usize - (haystack.as_ptr() as usize))
+    }
+}
diff --git a/third_party/rust/memchr/src/fallback.rs b/third_party/rust/memchr/src/fallback.rs
new file mode 100644
index 0000000000..8bc32b27ba
--- /dev/null
+++ b/third_party/rust/memchr/src/fallback.rs
@@ -0,0 +1,330 @@
+// This module defines pure Rust platform independent implementations of all
+// the memchr routines. We do our best to make them fast. Some of them may even
+// get auto-vectorized.
+
+use core::cmp;
+use core::usize;
+
+#[cfg(target_pointer_width = "16")]
+const USIZE_BYTES: usize = 2;
+
+#[cfg(target_pointer_width = "32")]
+const USIZE_BYTES: usize = 4;
+
+#[cfg(target_pointer_width = "64")]
+const USIZE_BYTES: usize = 8;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+/// Return `true` if `x` contains any zero byte.
+///
+/// From *Matters Computational*, J. Arndt
+///
+/// "The idea is to subtract one from each of the bytes and then look for
+/// bytes where the borrow propagated all the way to the most significant
+/// bit."
+#[inline(always)]
+fn contains_zero_byte(x: usize) -> bool {
+    const LO_U64: u64 = 0x0101010101010101;
+    const HI_U64: u64 = 0x8080808080808080;
+
+    const LO_USIZE: usize = LO_U64 as usize;
+    const HI_USIZE: usize = HI_U64 as usize;
+
+    x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
+}
+
+/// Repeat the given byte into a word size number. That is, every 8 bits
+/// is equivalent to the given byte. For example, if `b` is `\x4E` or
+/// `01001110` in binary, then the returned value on a 32-bit system would be:
+/// `01001110_01001110_01001110_01001110`.
+#[inline(always)]
+fn repeat_byte(b: u8) -> usize {
+    (b as usize) * (usize::MAX / 255)
+}
+
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = repeat_byte(n1);
+    let confirm = |byte| byte == n1;
+    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+    let align = USIZE_BYTES - 1;
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    unsafe {
+        if haystack.len() < USIZE_BYTES {
+            return forward_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        let chunk = (ptr as *const usize).read_unaligned();
+        if contains_zero_byte(chunk ^ vn1) {
+            return forward_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+        debug_assert!(ptr > start_ptr);
+        debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+        while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+            let a = *(ptr as *const usize);
+            let b = *(ptr.add(USIZE_BYTES) as *const usize);
+            let eqa = contains_zero_byte(a ^ vn1);
+            let eqb = contains_zero_byte(b ^ vn1);
+            if eqa || eqb {
+                break;
+            }
+            ptr = ptr.add(LOOP_SIZE);
+        }
+        forward_search(start_ptr, end_ptr, ptr, confirm)
+    }
+}
+
+/// Like `memchr`, but searches for two bytes instead of one.
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = repeat_byte(n1);
+    let vn2 = repeat_byte(n2);
+    let confirm = |byte| byte == n1 || byte == n2;
+    let align = USIZE_BYTES - 1;
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    unsafe {
+        if haystack.len() < USIZE_BYTES {
+            return forward_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        let chunk = (ptr as *const usize).read_unaligned();
+        let eq1 = contains_zero_byte(chunk ^ vn1);
+        let eq2 = contains_zero_byte(chunk ^ vn2);
+        if eq1 || eq2 {
+            return forward_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+        debug_assert!(ptr > start_ptr);
+        debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+        while ptr <= end_ptr.sub(USIZE_BYTES) {
+            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+            let chunk = *(ptr as *const usize);
+            let eq1 = contains_zero_byte(chunk ^ vn1);
+            let eq2 = contains_zero_byte(chunk ^ vn2);
+            if eq1 || eq2 {
+                break;
+            }
+            ptr = ptr.add(USIZE_BYTES);
+        }
+        forward_search(start_ptr, end_ptr, ptr, confirm)
+    }
+}
+
+/// Like `memchr`, but searches for three bytes instead of one.
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = repeat_byte(n1);
+    let vn2 = repeat_byte(n2);
+    let vn3 = repeat_byte(n3);
+    let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
+    let align = USIZE_BYTES - 1;
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    unsafe {
+        if haystack.len() < USIZE_BYTES {
+            return forward_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        let chunk = (ptr as *const usize).read_unaligned();
+        let eq1 = contains_zero_byte(chunk ^ vn1);
+        let eq2 = contains_zero_byte(chunk ^ vn2);
+        let eq3 = contains_zero_byte(chunk ^ vn3);
+        if eq1 || eq2 || eq3 {
+            return forward_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+        debug_assert!(ptr > start_ptr);
+        debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+        while ptr <= end_ptr.sub(USIZE_BYTES) {
+            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+            let chunk = *(ptr as *const usize);
+            let eq1 = contains_zero_byte(chunk ^ vn1);
+            let eq2 = contains_zero_byte(chunk ^ vn2);
+            let eq3 = contains_zero_byte(chunk ^ vn3);
+            if eq1 || eq2 || eq3 {
+                break;
+            }
+            ptr = ptr.add(USIZE_BYTES);
+        }
+        forward_search(start_ptr, end_ptr, ptr, confirm)
+    }
+}
+
+/// Return the last index matching the byte `x` in `text`.
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = repeat_byte(n1);
+    let confirm = |byte| byte == n1;
+    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+    let align = USIZE_BYTES - 1;
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    unsafe {
+        if haystack.len() < USIZE_BYTES {
+            return reverse_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+        if contains_zero_byte(chunk ^ vn1) {
+            return reverse_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        ptr = (end_ptr as usize & !align) as *const u8;
+        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+        while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+            let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
+            let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
+            let eqa = contains_zero_byte(a ^ vn1);
+            let eqb = contains_zero_byte(b ^ vn1);
+            if eqa || eqb {
+                break;
+            }
+            ptr = ptr.sub(loop_size);
+        }
+        reverse_search(start_ptr, end_ptr, ptr, confirm)
+    }
+}
+
+/// Like `memrchr`, but searches for two bytes instead of one.
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = repeat_byte(n1);
+    let vn2 = repeat_byte(n2);
+    let confirm = |byte| byte == n1 || byte == n2;
+    let align = USIZE_BYTES - 1;
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    unsafe {
+        if haystack.len() < USIZE_BYTES {
+            return reverse_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+        let eq1 = contains_zero_byte(chunk ^ vn1);
+        let eq2 = contains_zero_byte(chunk ^ vn2);
+        if eq1 || eq2 {
+            return reverse_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        ptr = (end_ptr as usize & !align) as *const u8;
+        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+        while ptr >= start_ptr.add(USIZE_BYTES) {
+            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+            let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
+            let eq1 = contains_zero_byte(chunk ^ vn1);
+            let eq2 = contains_zero_byte(chunk ^ vn2);
+            if eq1 || eq2 {
+                break;
+            }
+            ptr = ptr.sub(USIZE_BYTES);
+        }
+        reverse_search(start_ptr, end_ptr, ptr, confirm)
+    }
+}
+
+/// Like `memrchr`, but searches for three bytes instead of one.
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = repeat_byte(n1);
+    let vn2 = repeat_byte(n2);
+    let vn3 = repeat_byte(n3);
+    let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
+    let align = USIZE_BYTES - 1;
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    unsafe {
+        if haystack.len() < USIZE_BYTES {
+            return reverse_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
+        let eq1 = contains_zero_byte(chunk ^ vn1);
+        let eq2 = contains_zero_byte(chunk ^ vn2);
+        let eq3 = contains_zero_byte(chunk ^ vn3);
+        if eq1 || eq2 || eq3 {
+            return reverse_search(start_ptr, end_ptr, ptr, confirm);
+        }
+
+        ptr = (end_ptr as usize & !align) as *const u8;
+        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+        while ptr >= start_ptr.add(USIZE_BYTES) {
+            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+            let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
+            let eq1 = contains_zero_byte(chunk ^ vn1);
+            let eq2 = contains_zero_byte(chunk ^ vn2);
+            let eq3 = contains_zero_byte(chunk ^ vn3);
+            if eq1 || eq2 || eq3 {
+                break;
+            }
+            ptr = ptr.sub(USIZE_BYTES);
+        }
+        reverse_search(start_ptr, end_ptr, ptr, confirm)
+    }
+}
+
+#[inline(always)]
+unsafe fn forward_search<F: Fn(u8) -> bool>(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    mut ptr: *const u8,
+    confirm: F,
+) -> Option<usize> {
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr);
+
+    while ptr < end_ptr {
+        if confirm(*ptr) {
+            return Some(sub(ptr, start_ptr));
+        }
+        ptr = ptr.offset(1);
+    }
+    None
+}
+
+#[inline(always)]
+unsafe fn reverse_search<F: Fn(u8) -> bool>(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    mut ptr: *const u8,
+    confirm: F,
+) -> Option<usize> {
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr);
+
+    while ptr > start_ptr {
+        ptr = ptr.offset(-1);
+        if confirm(*ptr) {
+            return Some(sub(ptr, start_ptr));
+        }
+    }
+    None
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+    debug_assert!(a >= b);
+    (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/iter.rs b/third_party/rust/memchr/src/iter.rs
new file mode 100644
index 0000000000..6217ae4a09
--- /dev/null
+++ b/third_party/rust/memchr/src/iter.rs
@@ -0,0 +1,173 @@
+use {memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+macro_rules! iter_next {
+    // Common code for the memchr iterators:
+    // update haystack and position and produce the index
+    //
+    // self: &mut Self where Self is the iterator
+    // search_result: Option<usize> which is the result of the corresponding
+    // memchr function.
+    //
+    // Returns Option<usize> (the next iterator element)
+    ($self_:expr, $search_result:expr) => {
+        $search_result.map(move |index| {
+            // split and take the remaining back half
+            $self_.haystack = $self_.haystack.split_at(index + 1).1;
+            let found_position = $self_.position + index;
+            $self_.position = found_position + 1;
+            found_position
+        })
+    };
+}
+
+macro_rules! iter_next_back {
+    ($self_:expr, $search_result:expr) => {
+        $search_result.map(move |index| {
+            // split and take the remaining front half
+            $self_.haystack = $self_.haystack.split_at(index).0;
+            $self_.position + index
+        })
+    };
+}
+
+/// An iterator for `memchr`.
+pub struct Memchr<'a> {
+    needle: u8,
+    // The haystack to iterate over
+    haystack: &'a [u8],
+    // The index
+    position: usize,
+}
+
+impl<'a> Memchr<'a> {
+    /// Creates a new iterator that yields all positions of needle in haystack.
+    #[inline]
+    pub fn new(needle: u8, haystack: &[u8]) -> Memchr {
+        Memchr { needle: needle, haystack: haystack, position: 0 }
+    }
+}
+
+impl<'a> Iterator for Memchr<'a> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        iter_next!(self, memchr(self.needle, self.haystack))
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (0, Some(self.haystack.len()))
+    }
+}
+
+impl<'a> DoubleEndedIterator for Memchr<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        iter_next_back!(self, memrchr(self.needle, self.haystack))
+    }
+}
+
+/// An iterator for `memchr2`.
+pub struct Memchr2<'a> {
+    needle1: u8,
+    needle2: u8,
+    // The haystack to iterate over
+    haystack: &'a [u8],
+    // The index
+    position: usize,
+}
+
+impl<'a> Memchr2<'a> {
+    /// Creates a new iterator that yields all positions of needle in haystack.
+    #[inline]
+    pub fn new(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2 {
+        Memchr2 {
+            needle1: needle1,
+            needle2: needle2,
+            haystack: haystack,
+            position: 0,
+        }
+    }
+}
+
+impl<'a> Iterator for Memchr2<'a> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        iter_next!(self, memchr2(self.needle1, self.needle2, self.haystack))
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (0, Some(self.haystack.len()))
+    }
+}
+
+impl<'a> DoubleEndedIterator for Memchr2<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        iter_next_back!(
+            self,
+            memrchr2(self.needle1, self.needle2, self.haystack)
+        )
+    }
+}
+
+/// An iterator for `memchr3`.
+pub struct Memchr3<'a> {
+    needle1: u8,
+    needle2: u8,
+    needle3: u8,
+    // The haystack to iterate over
+    haystack: &'a [u8],
+    // The index
+    position: usize,
+}
+
+impl<'a> Memchr3<'a> {
+    /// Create a new `Memchr3` that's initialized to zero with a haystack
+    #[inline]
+    pub fn new(
+        needle1: u8,
+        needle2: u8,
+        needle3: u8,
+        haystack: &[u8],
+    ) -> Memchr3 {
+        Memchr3 {
+            needle1: needle1,
+            needle2: needle2,
+            needle3: needle3,
+            haystack: haystack,
+            position: 0,
+        }
+    }
+}
+
+impl<'a> Iterator for Memchr3<'a> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        iter_next!(
+            self,
+            memchr3(self.needle1, self.needle2, self.needle3, self.haystack)
+        )
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (0, Some(self.haystack.len()))
+    }
+}
+
+impl<'a> DoubleEndedIterator for Memchr3<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        iter_next_back!(
+            self,
+            memrchr3(self.needle1, self.needle2, self.needle3, self.haystack)
+        )
+    }
+}
diff --git a/third_party/rust/memchr/src/lib.rs b/third_party/rust/memchr/src/lib.rs
new file mode 100644
index 0000000000..fed7108734
--- /dev/null
+++ b/third_party/rust/memchr/src/lib.rs
@@ -0,0 +1,451 @@
+/*!
+The `memchr` crate provides heavily optimized routines for searching bytes.
+
+The `memchr` function is traditionally provided by libc, however, the
+performance of `memchr` can vary significantly depending on the specific
+implementation of libc that is used. They can range from manually tuned
+Assembly implementations (like that found in GNU's libc) all the way to
+non-vectorized C implementations (like that found in MUSL).
+
+To smooth out the differences between implementations of libc, at least
+on `x86_64` for Rust 1.27+, this crate provides its own implementation of
+`memchr` that should perform competitively with the one found in GNU's libc.
+The implementation is in pure Rust and has no dependency on a C compiler or an
+Assembler.
+
+Additionally, GNU libc also provides an extension, `memrchr`. This crate
+provides its own implementation of `memrchr` as well, on top of `memchr2`,
+`memchr3`, `memrchr2` and `memrchr3`. The difference between `memchr` and
+`memchr2` is that that `memchr2` permits finding all occurrences of two bytes
+instead of one. Similarly for `memchr3`.
+*/
+
+#![cfg_attr(not(feature = "std"), no_std)]
+#![deny(missing_docs)]
+#![doc(html_root_url = "https://docs.rs/memchr/2.0.0")]
+
+// Supporting 8-bit (or others) would be fine. If you need it, please submit a
+// bug report at https://github.com/BurntSushi/rust-memchr
+#[cfg(not(any(
+    target_pointer_width = "16",
+    target_pointer_width = "32",
+    target_pointer_width = "64"
+)))]
+compile_error!("memchr currently not supported on non-32 or non-64 bit");
+
+#[cfg(feature = "std")]
+extern crate core;
+
+#[cfg(all(test, all(not(miri), feature = "std")))]
+#[macro_use]
+extern crate quickcheck;
+
+use core::iter::Rev;
+
+pub use iter::{Memchr, Memchr2, Memchr3};
+
+// N.B. If you're looking for the cfg knobs for libc, see build.rs.
+#[cfg(memchr_libc)]
+mod c;
+#[allow(dead_code)]
+mod fallback;
+mod iter;
+mod naive;
+#[cfg(all(test, all(not(miri), feature = "std")))]
+mod tests;
+#[cfg(all(test, any(miri, not(feature = "std"))))]
+#[path = "tests/miri.rs"]
+mod tests;
+#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
+mod x86;
+
+/// An iterator over all occurrences of the needle in a haystack.
+#[inline]
+pub fn memchr_iter(needle: u8, haystack: &[u8]) -> Memchr {
+    Memchr::new(needle, haystack)
+}
+
+/// An iterator over all occurrences of the needles in a haystack.
+#[inline]
+pub fn memchr2_iter(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2 {
+    Memchr2::new(needle1, needle2, haystack)
+}
+
+/// An iterator over all occurrences of the needles in a haystack.
+#[inline]
+pub fn memchr3_iter(
+    needle1: u8,
+    needle2: u8,
+    needle3: u8,
+    haystack: &[u8],
+) -> Memchr3 {
+    Memchr3::new(needle1, needle2, needle3, haystack)
+}
+
+/// An iterator over all occurrences of the needle in a haystack, in reverse.
+#[inline]
+pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev<Memchr> {
+    Memchr::new(needle, haystack).rev()
+}
+
+/// An iterator over all occurrences of the needles in a haystack, in reverse.
+#[inline]
+pub fn memrchr2_iter(
+    needle1: u8,
+    needle2: u8,
+    haystack: &[u8],
+) -> Rev<Memchr2> {
+    Memchr2::new(needle1, needle2, haystack).rev()
+}
+
+/// An iterator over all occurrences of the needles in a haystack, in reverse.
+#[inline]
+pub fn memrchr3_iter(
+    needle1: u8,
+    needle2: u8,
+    needle3: u8,
+    haystack: &[u8],
+) -> Rev<Memchr3> {
+    Memchr3::new(needle1, needle2, needle3, haystack).rev()
+}
+
+/// Search for the first occurrence of a byte in a slice.
+///
+/// This returns the index corresponding to the first occurrence of `needle` in
+/// `haystack`, or `None` if one is not found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle)`, `memchr` will use a highly
+/// optimized routine that can be up to an order of magnitude faster in some
+/// cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of a byte in a byte string.
+///
+/// ```
+/// use memchr::memchr;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr(b'k', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+    #[cfg(miri)]
+    #[inline(always)]
+    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+        naive::memchr(n1, haystack)
+    }
+
+    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+    #[inline(always)]
+    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+        x86::memchr(n1, haystack)
+    }
+
+    #[cfg(all(
+        memchr_libc,
+        not(all(target_arch = "x86_64", memchr_runtime_simd)),
+        not(miri),
+    ))]
+    #[inline(always)]
+    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+        c::memchr(n1, haystack)
+    }
+
+    #[cfg(all(
+        not(memchr_libc),
+        not(all(target_arch = "x86_64", memchr_runtime_simd)),
+        not(miri),
+    ))]
+    #[inline(always)]
+    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+        fallback::memchr(n1, haystack)
+    }
+
+    if haystack.is_empty() {
+        None
+    } else {
+        imp(needle, haystack)
+    }
+}
+
+/// Like `memchr`, but searches for either of two bytes instead of just one.
+///
+/// This returns the index corresponding to the first occurrence of `needle1`
+/// or the first occurrence of `needle2` in `haystack` (whichever occurs
+/// earlier), or `None` if neither one is found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, `memchr2`
+/// will use a highly optimized routine that can be up to an order of magnitude
+/// faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of either of two bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memchr2;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4));
+/// ```
+#[inline]
+pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
+    #[cfg(miri)]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        naive::memchr2(n1, n2, haystack)
+    }
+
+    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        x86::memchr2(n1, n2, haystack)
+    }
+
+    #[cfg(all(
+        not(all(target_arch = "x86_64", memchr_runtime_simd)),
+        not(miri),
+    ))]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        fallback::memchr2(n1, n2, haystack)
+    }
+
+    if haystack.is_empty() {
+        None
+    } else {
+        imp(needle1, needle2, haystack)
+    }
+}
+
+/// Like `memchr`, but searches for any of three bytes instead of just one.
+///
+/// This returns the index corresponding to the first occurrence of `needle1`,
+/// the first occurrence of `needle2`, or the first occurrence of `needle3` in
+/// `haystack` (whichever occurs earliest), or `None` if none are found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().position(|&b| b == needle1 || b == needle2 ||
+/// b == needle3)`, `memchr3` will use a highly optimized routine that can be
+/// up to an order of magnitude faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the first position of any of three bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memchr3;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memchr3(b'k', b'q', b'e', haystack), Some(2));
+/// ```
+#[inline]
+pub fn memchr3(
+    needle1: u8,
+    needle2: u8,
+    needle3: u8,
+    haystack: &[u8],
+) -> Option<usize> {
+    #[cfg(miri)]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+        naive::memchr3(n1, n2, n3, haystack)
+    }
+
+    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+        x86::memchr3(n1, n2, n3, haystack)
+    }
+
+    #[cfg(all(
+        not(all(target_arch = "x86_64", memchr_runtime_simd)),
+        not(miri),
+    ))]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+        fallback::memchr3(n1, n2, n3, haystack)
+    }
+
+    if haystack.is_empty() {
+        None
+    } else {
+        imp(needle1, needle2, needle3, haystack)
+    }
+}
+
+/// Search for the last occurrence of a byte in a slice.
+///
+/// This returns the index corresponding to the last occurrence of `needle` in
+/// `haystack`, or `None` if one is not found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle)`, `memrchr` will use a highly
+/// optimized routine that can be up to an order of magnitude faster in some
+/// cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of a byte in a byte string.
+///
+/// ```
+/// use memchr::memrchr;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr(b'o', haystack), Some(17));
+/// ```
+#[inline]
+pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
+    #[cfg(miri)]
+    #[inline(always)]
+    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+        naive::memrchr(n1, haystack)
+    }
+
+    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+    #[inline(always)]
+    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+        x86::memrchr(n1, haystack)
+    }
+
+    #[cfg(all(
+        memchr_libc,
+        target_os = "linux",
+        not(all(target_arch = "x86_64", memchr_runtime_simd)),
+        not(miri)
+    ))]
+    #[inline(always)]
+    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+        c::memrchr(n1, haystack)
+    }
+
+    #[cfg(all(
+        not(all(memchr_libc, target_os = "linux")),
+        not(all(target_arch = "x86_64", memchr_runtime_simd)),
+        not(miri),
+    ))]
+    #[inline(always)]
+    fn imp(n1: u8, haystack: &[u8]) -> Option<usize> {
+        fallback::memrchr(n1, haystack)
+    }
+
+    if haystack.is_empty() {
+        None
+    } else {
+        imp(needle, haystack)
+    }
+}
+
+/// Like `memrchr`, but searches for either of two bytes instead of just one.
+///
+/// This returns the index corresponding to the last occurrence of `needle1`
+/// or the last occurrence of `needle2` in `haystack` (whichever occurs later),
+/// or `None` if neither one is found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, `memrchr2`
+/// will use a highly optimized routine that can be up to an order of magnitude
+/// faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of either of two bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memrchr2;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr2(b'k', b'q', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
+    #[cfg(miri)]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        naive::memrchr2(n1, n2, haystack)
+    }
+
+    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        x86::memrchr2(n1, n2, haystack)
+    }
+
+    #[cfg(all(
+        not(all(target_arch = "x86_64", memchr_runtime_simd)),
+        not(miri),
+    ))]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        fallback::memrchr2(n1, n2, haystack)
+    }
+
+    if haystack.is_empty() {
+        None
+    } else {
+        imp(needle1, needle2, haystack)
+    }
+}
+
+/// Like `memrchr`, but searches for any of three bytes instead of just one.
+///
+/// This returns the index corresponding to the last occurrence of `needle1`,
+/// the last occurrence of `needle2`, or the last occurrence of `needle3` in
+/// `haystack` (whichever occurs later), or `None` if none are found.
+///
+/// While this is operationally the same as something like
+/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 ||
+/// b == needle3)`, `memrchr3` will use a highly optimized routine that can be
+/// up to an order of magnitude faster in some cases.
+///
+/// # Example
+///
+/// This shows how to find the last position of any of three bytes in a byte
+/// string.
+///
+/// ```
+/// use memchr::memrchr3;
+///
+/// let haystack = b"the quick brown fox";
+/// assert_eq!(memrchr3(b'k', b'q', b'e', haystack), Some(8));
+/// ```
+#[inline]
+pub fn memrchr3(
+    needle1: u8,
+    needle2: u8,
+    needle3: u8,
+    haystack: &[u8],
+) -> Option<usize> {
+    #[cfg(miri)]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+        naive::memrchr3(n1, n2, n3, haystack)
+    }
+
+    #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+        x86::memrchr3(n1, n2, n3, haystack)
+    }
+
+    #[cfg(all(
+        not(all(target_arch = "x86_64", memchr_runtime_simd)),
+        not(miri),
+    ))]
+    #[inline(always)]
+    fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+        fallback::memrchr3(n1, n2, n3, haystack)
+    }
+
+    if haystack.is_empty() {
+        None
+    } else {
+        imp(needle1, needle2, needle3, haystack)
+    }
+}
diff --git a/third_party/rust/memchr/src/naive.rs b/third_party/rust/memchr/src/naive.rs
new file mode 100644
index 0000000000..3f3053d481
--- /dev/null
+++ b/third_party/rust/memchr/src/naive.rs
@@ -0,0 +1,25 @@
+#![allow(dead_code)]
+
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    haystack.iter().position(|&b| b == n1)
+}
+
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    haystack.iter().position(|&b| b == n1 || b == n2)
+}
+
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+    haystack.iter().position(|&b| b == n1 || b == n2 || b == n3)
+}
+
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    haystack.iter().rposition(|&b| b == n1)
+}
+
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    haystack.iter().rposition(|&b| b == n1 || b == n2)
+}
+
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+    haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3)
+}
diff --git a/third_party/rust/memchr/src/tests/iter.rs b/third_party/rust/memchr/src/tests/iter.rs
new file mode 100644
index 0000000000..8f335003b9
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/iter.rs
@@ -0,0 +1,229 @@
+use tests::memchr_tests;
+use {Memchr, Memchr2, Memchr3};
+
+#[test]
+fn memchr1_iter() {
+    for test in memchr_tests() {
+        test.iter_one(false, Memchr::new);
+    }
+}
+
+#[test]
+fn memchr2_iter() {
+    for test in memchr_tests() {
+        test.iter_two(false, Memchr2::new);
+    }
+}
+
+#[test]
+fn memchr3_iter() {
+    for test in memchr_tests() {
+        test.iter_three(false, Memchr3::new);
+    }
+}
+
+#[test]
+fn memrchr1_iter() {
+    for test in memchr_tests() {
+        test.iter_one(true, |n1, corpus| Memchr::new(n1, corpus).rev());
+    }
+}
+
+#[test]
+fn memrchr2_iter() {
+    for test in memchr_tests() {
+        test.iter_two(true, |n1, n2, corpus| {
+            Memchr2::new(n1, n2, corpus).rev()
+        })
+    }
+}
+
+#[test]
+fn memrchr3_iter() {
+    for test in memchr_tests() {
+        test.iter_three(true, |n1, n2, n3, corpus| {
+            Memchr3::new(n1, n2, n3, corpus).rev()
+        })
+    }
+}
+
+quickcheck! {
+    fn qc_memchr_double_ended_iter(
+        needle: u8, data: Vec<u8>, take_side: Vec<bool>
+    ) -> bool {
+        // make nonempty
+        let mut take_side = take_side;
+        if take_side.is_empty() { take_side.push(true) };
+
+        let iter = Memchr::new(needle, &data);
+        let all_found = double_ended_take(
+            iter, take_side.iter().cycle().cloned());
+
+        all_found.iter().cloned().eq(positions1(needle, &data))
+    }
+
+    fn qc_memchr2_double_ended_iter(
+        needle1: u8, needle2: u8, data: Vec<u8>, take_side: Vec<bool>
+    ) -> bool {
+        // make nonempty
+        let mut take_side = take_side;
+        if take_side.is_empty() { take_side.push(true) };
+
+        let iter = Memchr2::new(needle1, needle2, &data);
+        let all_found = double_ended_take(
+            iter, take_side.iter().cycle().cloned());
+
+        all_found.iter().cloned().eq(positions2(needle1, needle2, &data))
+    }
+
+    fn qc_memchr3_double_ended_iter(
+        needle1: u8, needle2: u8, needle3: u8,
+        data: Vec<u8>, take_side: Vec<bool>
+    ) -> bool {
+        // make nonempty
+        let mut take_side = take_side;
+        if take_side.is_empty() { take_side.push(true) };
+
+        let iter = Memchr3::new(needle1, needle2, needle3, &data);
+        let all_found = double_ended_take(
+            iter, take_side.iter().cycle().cloned());
+
+        all_found
+            .iter()
+            .cloned()
+            .eq(positions3(needle1, needle2, needle3, &data))
+    }
+
+    fn qc_memchr1_iter(data: Vec<u8>) -> bool {
+        let needle = 0;
+        let answer = positions1(needle, &data);
+        answer.eq(Memchr::new(needle, &data))
+    }
+
+    fn qc_memchr1_rev_iter(data: Vec<u8>) -> bool {
+        let needle = 0;
+        let answer = positions1(needle, &data);
+        answer.rev().eq(Memchr::new(needle, &data).rev())
+    }
+
+    fn qc_memchr2_iter(data: Vec<u8>) -> bool {
+        let needle1 = 0;
+        let needle2 = 1;
+        let answer = positions2(needle1, needle2, &data);
+        answer.eq(Memchr2::new(needle1, needle2, &data))
+    }
+
+    fn qc_memchr2_rev_iter(data: Vec<u8>) -> bool {
+        let needle1 = 0;
+        let needle2 = 1;
+        let answer = positions2(needle1, needle2, &data);
+        answer.rev().eq(Memchr2::new(needle1, needle2, &data).rev())
+    }
+
+    fn qc_memchr3_iter(data: Vec<u8>) -> bool {
+        let needle1 = 0;
+        let needle2 = 1;
+        let needle3 = 2;
+        let answer = positions3(needle1, needle2, needle3, &data);
+        answer.eq(Memchr3::new(needle1, needle2, needle3, &data))
+    }
+
+    fn qc_memchr3_rev_iter(data: Vec<u8>) -> bool {
+        let needle1 = 0;
+        let needle2 = 1;
+        let needle3 = 2;
+        let answer = positions3(needle1, needle2, needle3, &data);
+        answer.rev().eq(Memchr3::new(needle1, needle2, needle3, &data).rev())
+    }
+
+    fn qc_memchr1_iter_size_hint(data: Vec<u8>) -> bool {
+        // test that the size hint is within reasonable bounds
+        let needle = 0;
+        let mut iter = Memchr::new(needle, &data);
+        let mut real_count = data
+            .iter()
+            .filter(|&&elt| elt == needle)
+            .count();
+
+        while let Some(index) = iter.next() {
+            real_count -= 1;
+            let (lower, upper) = iter.size_hint();
+            assert!(lower <= real_count);
+            assert!(upper.unwrap() >= real_count);
+            assert!(upper.unwrap() <= data.len() - index);
+        }
+        true
+    }
+}
+
+// take items from a DEI, taking front for each true and back for each false.
+// Return a vector with the concatenation of the fronts and the reverse of the
+// backs.
+fn double_ended_take<I, J>(mut iter: I, take_side: J) -> Vec<I::Item>
+where
+    I: DoubleEndedIterator,
+    J: Iterator<Item = bool>,
+{
+    let mut found_front = Vec::new();
+    let mut found_back = Vec::new();
+
+    for take_front in take_side {
+        if take_front {
+            if let Some(pos) = iter.next() {
+                found_front.push(pos);
+            } else {
+                break;
+            }
+        } else {
+            if let Some(pos) = iter.next_back() {
+                found_back.push(pos);
+            } else {
+                break;
+            }
+        };
+    }
+
+    let mut all_found = found_front;
+    all_found.extend(found_back.into_iter().rev());
+    all_found
+}
+
+// return an iterator of the 0-based indices of haystack that match the needle
+fn positions1<'a>(
+    n1: u8,
+    haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+    let it = haystack
+        .iter()
+        .enumerate()
+        .filter(move |&(_, &b)| b == n1)
+        .map(|t| t.0);
+    Box::new(it)
+}
+
+fn positions2<'a>(
+    n1: u8,
+    n2: u8,
+    haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+    let it = haystack
+        .iter()
+        .enumerate()
+        .filter(move |&(_, &b)| b == n1 || b == n2)
+        .map(|t| t.0);
+    Box::new(it)
+}
+
+fn positions3<'a>(
+    n1: u8,
+    n2: u8,
+    n3: u8,
+    haystack: &'a [u8],
+) -> Box<dyn DoubleEndedIterator<Item = usize> + 'a> {
+    let it = haystack
+        .iter()
+        .enumerate()
+        .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3)
+        .map(|t| t.0);
+    Box::new(it)
+}
diff --git a/third_party/rust/memchr/src/tests/memchr.rs b/third_party/rust/memchr/src/tests/memchr.rs
new file mode 100644
index 0000000000..87d3d14edd
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/memchr.rs
@@ -0,0 +1,131 @@
+use fallback;
+use naive;
+use {memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+use tests::memchr_tests;
+
+#[test]
+fn memchr1_find() {
+    for test in memchr_tests() {
+        test.one(false, memchr);
+    }
+}
+
+#[test]
+fn memchr1_fallback_find() {
+    for test in memchr_tests() {
+        test.one(false, fallback::memchr);
+    }
+}
+
+#[test]
+fn memchr2_find() {
+    for test in memchr_tests() {
+        test.two(false, memchr2);
+    }
+}
+
+#[test]
+fn memchr2_fallback_find() {
+    for test in memchr_tests() {
+        test.two(false, fallback::memchr2);
+    }
+}
+
+#[test]
+fn memchr3_find() {
+    for test in memchr_tests() {
+        test.three(false, memchr3);
+    }
+}
+
+#[test]
+fn memchr3_fallback_find() {
+    for test in memchr_tests() {
+        test.three(false, fallback::memchr3);
+    }
+}
+
+#[test]
+fn memrchr1_find() {
+    for test in memchr_tests() {
+        test.one(true, memrchr);
+    }
+}
+
+#[test]
+fn memrchr1_fallback_find() {
+    for test in memchr_tests() {
+        test.one(true, fallback::memrchr);
+    }
+}
+
+#[test]
+fn memrchr2_find() {
+    for test in memchr_tests() {
+        test.two(true, memrchr2);
+    }
+}
+
+#[test]
+fn memrchr2_fallback_find() {
+    for test in memchr_tests() {
+        test.two(true, fallback::memrchr2);
+    }
+}
+
+#[test]
+fn memrchr3_find() {
+    for test in memchr_tests() {
+        test.three(true, memrchr3);
+    }
+}
+
+#[test]
+fn memrchr3_fallback_find() {
+    for test in memchr_tests() {
+        test.three(true, fallback::memrchr3);
+    }
+}
+
+quickcheck! {
+    fn qc_memchr1_matches_naive(n1: u8, corpus: Vec<u8>) -> bool {
+        memchr(n1, &corpus) == naive::memchr(n1, &corpus)
+    }
+}
+
+quickcheck! {
+    fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> bool {
+        memchr2(n1, n2, &corpus) == naive::memchr2(n1, n2, &corpus)
+    }
+}
+
+quickcheck! {
+    fn qc_memchr3_matches_naive(
+        n1: u8, n2: u8, n3: u8,
+        corpus: Vec<u8>
+    ) -> bool {
+        memchr3(n1, n2, n3, &corpus) == naive::memchr3(n1, n2, n3, &corpus)
+    }
+}
+
+quickcheck! {
+    fn qc_memrchr1_matches_naive(n1: u8, corpus: Vec<u8>) -> bool {
+        memrchr(n1, &corpus) == naive::memrchr(n1, &corpus)
+    }
+}
+
+quickcheck! {
+    fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> bool {
+        memrchr2(n1, n2, &corpus) == naive::memrchr2(n1, n2, &corpus)
+    }
+}
+
+quickcheck! {
+    fn qc_memrchr3_matches_naive(
+        n1: u8, n2: u8, n3: u8,
+        corpus: Vec<u8>
+    ) -> bool {
+        memrchr3(n1, n2, n3, &corpus) == naive::memrchr3(n1, n2, n3, &corpus)
+    }
+}
diff --git a/third_party/rust/memchr/src/tests/miri.rs b/third_party/rust/memchr/src/tests/miri.rs
new file mode 100644
index 0000000000..879ef938ec
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/miri.rs
@@ -0,0 +1,19 @@
+// Simple tests using MIRI
+
+use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+#[test]
+fn test_with_miri() {
+    assert_eq!(memchr(b'a', b"abcda"), Some(0));
+    assert_eq!(memchr(b'z', b"abcda"), None);
+    assert_eq!(memchr2(b'a', b'z', b"abcda"), Some(0));
+    assert_eq!(memchr2(b'z', b'y', b"abcda"), None);
+    assert_eq!(memchr3(b'a', b'z', b'b', b"abcda"), Some(0));
+    assert_eq!(memchr3(b'z', b'y', b'x', b"abcda"), None);
+    assert_eq!(memrchr(b'a', b"abcda"), Some(4));
+    assert_eq!(memrchr(b'z', b"abcda"), None);
+    assert_eq!(memrchr2(b'a', b'z', b"abcda"), Some(4));
+    assert_eq!(memrchr2(b'z', b'y', b"abcda"), None);
+    assert_eq!(memrchr3(b'a', b'z', b'b', b"abcda"), Some(4));
+    assert_eq!(memrchr3(b'z', b'y', b'x', b"abcda"), None);
+}
diff --git a/third_party/rust/memchr/src/tests/mod.rs b/third_party/rust/memchr/src/tests/mod.rs
new file mode 100644
index 0000000000..82c1a248e9
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/mod.rs
@@ -0,0 +1,362 @@
+use std::iter::repeat;
+
+mod iter;
+mod memchr;
+
+#[cfg(target_endian = "little")]
+#[test]
+fn byte_order() {
+    eprintln!("LITTLE ENDIAN");
+}
+
+#[cfg(target_endian = "big")]
+#[test]
+fn byte_order() {
+    eprintln!("BIG ENDIAN");
+}
+
+/// Create a sequence of tests that should be run by memchr implementations.
+fn memchr_tests() -> Vec<MemchrTest> {
+    let mut tests = Vec::new();
+    for statict in MEMCHR_TESTS {
+        assert!(!statict.corpus.contains("%"), "% is not allowed in corpora");
+        assert!(!statict.corpus.contains("#"), "# is not allowed in corpora");
+        assert!(!statict.needles.contains(&b'%'), "% is an invalid needle");
+        assert!(!statict.needles.contains(&b'#'), "# is an invalid needle");
+
+        let t = MemchrTest {
+            corpus: statict.corpus.to_string(),
+            needles: statict.needles.to_vec(),
+            positions: statict.positions.to_vec(),
+        };
+        tests.push(t.clone());
+        tests.extend(t.expand());
+    }
+    tests
+}
+
+/// A set of tests for memchr-like functions.
+///
+/// These tests mostly try to cover the short string cases. We cover the longer
+/// string cases via the benchmarks (which are tests themselves), via
+/// quickcheck tests and via automatic expansion of each test case (by
+/// increasing the corpus size). Finally, we cover different alignment cases
+/// in the tests by varying the starting point of the slice.
+const MEMCHR_TESTS: &[MemchrTestStatic] = &[
+    // one needle (applied to memchr + memchr2 + memchr3)
+    MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] },
+    MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] },
+    MemchrTestStatic {
+        corpus: "aaa",
+        needles: &[b'a'],
+        positions: &[0, 1, 2],
+    },
+    MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] },
+    MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] },
+    MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] },
+    MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] },
+    MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] },
+    MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] },
+    MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] },
+    MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] },
+    MemchrTestStatic {
+        corpus: "\x00\x00",
+        needles: &[b'\x00'],
+        positions: &[0, 1],
+    },
+    MemchrTestStatic {
+        corpus: "\x00a\x00",
+        needles: &[b'\x00'],
+        positions: &[0, 2],
+    },
+    MemchrTestStatic {
+        corpus: "zzzzzzzzzzzzzzzza",
+        needles: &[b'a'],
+        positions: &[16],
+    },
+    MemchrTestStatic {
+        corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
+        needles: &[b'a'],
+        positions: &[32],
+    },
+    // two needles (applied to memchr2 + memchr3)
+    MemchrTestStatic {
+        corpus: "az",
+        needles: &[b'a', b'z'],
+        positions: &[0, 1],
+    },
+    MemchrTestStatic {
+        corpus: "az",
+        needles: &[b'a', b'z'],
+        positions: &[0, 1],
+    },
+    MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] },
+    MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] },
+    MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] },
+    MemchrTestStatic {
+        corpus: "yyyyaz",
+        needles: &[b'a', b'z'],
+        positions: &[4, 5],
+    },
+    MemchrTestStatic {
+        corpus: "yyyyaz",
+        needles: &[b'z', b'a'],
+        positions: &[4, 5],
+    },
+    // three needles (applied to memchr3)
+    MemchrTestStatic {
+        corpus: "xyz",
+        needles: &[b'x', b'y', b'z'],
+        positions: &[0, 1, 2],
+    },
+    MemchrTestStatic {
+        corpus: "zxy",
+        needles: &[b'x', b'y', b'z'],
+        positions: &[0, 1, 2],
+    },
+    MemchrTestStatic {
+        corpus: "zxy",
+        needles: &[b'x', b'a', b'z'],
+        positions: &[0, 1],
+    },
+    MemchrTestStatic {
+        corpus: "zxy",
+        needles: &[b't', b'a', b'z'],
+        positions: &[0],
+    },
+    MemchrTestStatic {
+        corpus: "yxz",
+        needles: &[b't', b'a', b'z'],
+        positions: &[2],
+    },
+];
+
+/// A description of a test on a memchr like function.
+#[derive(Clone, Debug)]
+struct MemchrTest {
+    /// The thing to search. We use `&str` instead of `&[u8]` because they
+    /// are nicer to write in tests, and we don't miss much since memchr
+    /// doesn't care about UTF-8.
+    ///
+    /// Corpora cannot contain either '%' or '#'. We use these bytes when
+    /// expanding test cases into many test cases, and we assume they are not
+    /// used. If they are used, `memchr_tests` will panic.
+    corpus: String,
+    /// The needles to search for. This is intended to be an "alternation" of
+    /// needles. The number of needles may cause this test to be skipped for
+    /// some memchr variants. For example, a test with 2 needles cannot be used
+    /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
+    /// However, a test with only 1 needle can be used to test all of `memchr`,
+    /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
+    /// bytes that we never used in the corpus (such as '#').
+    needles: Vec<u8>,
+    /// The positions expected to match for all of the needles.
+    positions: Vec<usize>,
+}
+
+/// Like MemchrTest, but easier to define as a constant.
+#[derive(Clone, Debug)]
+struct MemchrTestStatic {
+    corpus: &'static str,
+    needles: &'static [u8],
+    positions: &'static [usize],
+}
+
+impl MemchrTest {
+    fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
+        let needles = match self.needles(1) {
+            None => return,
+            Some(needles) => needles,
+        };
+        // We test different alignments here. Since some implementations use
+        // AVX2, which can read 32 bytes at a time, we test at least that.
+        // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128
+        // (avx) bytes at a time, so we include that in our offsets as well.
+        //
+        // You might think this would cause most needles to not be found, but
+        // we actually expand our tests to include corpus sizes all the way up
+        // to >500 bytes, so we should exericse most branches.
+        for align in 0..130 {
+            let corpus = self.corpus(align);
+            assert_eq!(
+                self.positions(align, reverse).get(0).cloned(),
+                f(needles[0], corpus.as_bytes()),
+                "search for {:?} failed in: {:?} (len: {}, alignment: {})",
+                needles[0] as char,
+                corpus,
+                corpus.len(),
+                align
+            );
+        }
+    }
+
+    fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
+        let needles = match self.needles(2) {
+            None => return,
+            Some(needles) => needles,
+        };
+        for align in 0..130 {
+            let corpus = self.corpus(align);
+            assert_eq!(
+                self.positions(align, reverse).get(0).cloned(),
+                f(needles[0], needles[1], corpus.as_bytes()),
+                "search for {:?}|{:?} failed in: {:?} \
+                 (len: {}, alignment: {})",
+                needles[0] as char,
+                needles[1] as char,
+                corpus,
+                corpus.len(),
+                align
+            );
+        }
+    }
+
+    fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>(
+        &self,
+        reverse: bool,
+        f: F,
+    ) {
+        let needles = match self.needles(3) {
+            None => return,
+            Some(needles) => needles,
+        };
+        for align in 0..130 {
+            let corpus = self.corpus(align);
+            assert_eq!(
+                self.positions(align, reverse).get(0).cloned(),
+                f(needles[0], needles[1], needles[2], corpus.as_bytes()),
+                "search for {:?}|{:?}|{:?} failed in: {:?} \
+                 (len: {}, alignment: {})",
+                needles[0] as char,
+                needles[1] as char,
+                needles[2] as char,
+                corpus,
+                corpus.len(),
+                align
+            );
+        }
+    }
+
+    fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F)
+    where
+        F: FnOnce(u8, &'a [u8]) -> I,
+        I: Iterator<Item = usize>,
+    {
+        if let Some(ns) = self.needles(1) {
+            self.iter(reverse, f(ns[0], self.corpus.as_bytes()));
+        }
+    }
+
+    fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F)
+    where
+        F: FnOnce(u8, u8, &'a [u8]) -> I,
+        I: Iterator<Item = usize>,
+    {
+        if let Some(ns) = self.needles(2) {
+            self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes()));
+        }
+    }
+
+    fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F)
+    where
+        F: FnOnce(u8, u8, u8, &'a [u8]) -> I,
+        I: Iterator<Item = usize>,
+    {
+        if let Some(ns) = self.needles(3) {
+            self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes()));
+        }
+    }
+
+    /// Test that the positions yielded by the given iterator match the
+    /// positions in this test. If reverse is true, then reverse the positions
+    /// before comparing them.
+    fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) {
+        assert_eq!(
+            self.positions(0, reverse),
+            it.collect::<Vec<usize>>(),
+            r"search for {:?} failed in: {:?}",
+            self.needles.iter().map(|&b| b as char).collect::<Vec<char>>(),
+            self.corpus
+        );
+    }
+
+    /// Expand this test into many variations of the same test.
+    ///
+    /// In particular, this will generate more tests with larger corpus sizes.
+    /// The expected positions are updated to maintain the integrity of the
+    /// test.
+    ///
+    /// This is important in testing a memchr implementation, because there are
+    /// often different cases depending on the length of the corpus.
+    ///
+    /// Note that we extend the corpus by adding `%` bytes, which we
+    /// don't otherwise use as a needle.
+    fn expand(&self) -> Vec<MemchrTest> {
+        let mut more = Vec::new();
+
+        // Add bytes to the start of the corpus.
+        for i in 1..515 {
+            let mut t = self.clone();
+            let mut new_corpus: String = repeat('%').take(i).collect();
+            new_corpus.push_str(&t.corpus);
+            t.corpus = new_corpus;
+            t.positions = t.positions.into_iter().map(|p| p + i).collect();
+            more.push(t);
+        }
+        // Add bytes to the end of the corpus.
+        for i in 1..515 {
+            let mut t = self.clone();
+            let padding: String = repeat('%').take(i).collect();
+            t.corpus.push_str(&padding);
+            more.push(t);
+        }
+
+        more
+    }
+
+    /// Return the corpus at the given alignment.
+    ///
+    /// If the alignment exceeds the length of the corpus, then this returns
+    /// an empty slice.
+    fn corpus(&self, align: usize) -> &str {
+        self.corpus.get(align..).unwrap_or("")
+    }
+
+    /// Return exactly `count` needles from this test. If this test has less
+    /// than `count` needles, then add `#` until the number of needles
+    /// matches `count`. If this test has more than `count` needles, then
+    /// return `None` (because there is no way to use this test data for a
+    /// search using fewer needles).
+    fn needles(&self, count: usize) -> Option<Vec<u8>> {
+        if self.needles.len() > count {
+            return None;
+        }
+
+        let mut needles = self.needles.to_vec();
+        for _ in needles.len()..count {
+            // we assume # is never used in tests.
+            needles.push(b'#');
+        }
+        Some(needles)
+    }
+
+    /// Return the positions in this test, reversed if `reverse` is true.
+    ///
+    /// If alignment is given, then all positions greater than or equal to that
+    /// alignment are offset by the alignment. Positions less than the
+    /// alignment are dropped.
+    fn positions(&self, align: usize, reverse: bool) -> Vec<usize> {
+        let positions = if reverse {
+            let mut positions = self.positions.to_vec();
+            positions.reverse();
+            positions
+        } else {
+            self.positions.to_vec()
+        };
+        positions
+            .into_iter()
+            .filter(|&p| p >= align)
+            .map(|p| p - align)
+            .collect()
+    }
+}
diff --git a/third_party/rust/memchr/src/x86/avx.rs b/third_party/rust/memchr/src/x86/avx.rs
new file mode 100644
index 0000000000..e3d8e8902e
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/avx.rs
@@ -0,0 +1,703 @@
+use core::arch::x86_64::*;
+use core::cmp;
+use core::mem::size_of;
+
+use x86::sse2;
+
+const VECTOR_SIZE: usize = size_of::<__m256i>();
+const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+// The number of bytes to loop at in one iteration of memchr2/memrchr2 and
+// memchr3/memrchr3. There was no observable difference between 128 and 64
+// bytes in benchmarks. memchr3 in particular only gets a very slight speed up
+// from the loop unrolling.
+const LOOP_SIZE2: usize = 2 * VECTOR_SIZE;
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    // For a high level explanation for how this algorithm works, see the
+    // sse2 implementation. The avx implementation here is the same, but with
+    // 256-bit vectors instead of 128-bit vectors.
+
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        // For small haystacks, defer to the SSE2 implementation. Codegen
+        // suggests this completely avoids touching the AVX vectors.
+        return sse2::memchr(n1, haystack);
+    }
+
+    let vn1 = _mm256_set1_epi8(n1 as i8);
+    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+    if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+        return Some(i);
+    }
+
+    ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+    debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+    while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        let a = _mm256_load_si256(ptr as *const __m256i);
+        let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+        let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i);
+        let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i);
+        let eqa = _mm256_cmpeq_epi8(vn1, a);
+        let eqb = _mm256_cmpeq_epi8(vn1, b);
+        let eqc = _mm256_cmpeq_epi8(vn1, c);
+        let eqd = _mm256_cmpeq_epi8(vn1, d);
+        let or1 = _mm256_or_si256(eqa, eqb);
+        let or2 = _mm256_or_si256(eqc, eqd);
+        let or3 = _mm256_or_si256(or1, or2);
+        if _mm256_movemask_epi8(or3) != 0 {
+            let mut at = sub(ptr, start_ptr);
+            let mask = _mm256_movemask_epi8(eqa);
+            if mask != 0 {
+                return Some(at + forward_pos(mask));
+            }
+
+            at += VECTOR_SIZE;
+            let mask = _mm256_movemask_epi8(eqb);
+            if mask != 0 {
+                return Some(at + forward_pos(mask));
+            }
+
+            at += VECTOR_SIZE;
+            let mask = _mm256_movemask_epi8(eqc);
+            if mask != 0 {
+                return Some(at + forward_pos(mask));
+            }
+
+            at += VECTOR_SIZE;
+            let mask = _mm256_movemask_epi8(eqd);
+            debug_assert!(mask != 0);
+            return Some(at + forward_pos(mask));
+        }
+        ptr = ptr.add(loop_size);
+    }
+    while ptr <= end_ptr.sub(VECTOR_SIZE) {
+        debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+        if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+            return Some(i);
+        }
+        ptr = ptr.add(VECTOR_SIZE);
+    }
+    if ptr < end_ptr {
+        debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+        ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+        debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+        return forward_search1(start_ptr, end_ptr, ptr, vn1);
+    }
+    None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = _mm256_set1_epi8(n1 as i8);
+    let vn2 = _mm256_set1_epi8(n2 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE2, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr < end_ptr {
+            if *ptr == n1 || *ptr == n2 {
+                return Some(sub(ptr, start_ptr));
+            }
+            ptr = ptr.offset(1);
+        }
+        return None;
+    }
+
+    if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+        return Some(i);
+    }
+
+    ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+    debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+    while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        let a = _mm256_load_si256(ptr as *const __m256i);
+        let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+        let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+        let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+        let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+        let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+        let or1 = _mm256_or_si256(eqa1, eqb1);
+        let or2 = _mm256_or_si256(eqa2, eqb2);
+        let or3 = _mm256_or_si256(or1, or2);
+        if _mm256_movemask_epi8(or3) != 0 {
+            let mut at = sub(ptr, start_ptr);
+            let mask1 = _mm256_movemask_epi8(eqa1);
+            let mask2 = _mm256_movemask_epi8(eqa2);
+            if mask1 != 0 || mask2 != 0 {
+                return Some(at + forward_pos2(mask1, mask2));
+            }
+
+            at += VECTOR_SIZE;
+            let mask1 = _mm256_movemask_epi8(eqb1);
+            let mask2 = _mm256_movemask_epi8(eqb2);
+            return Some(at + forward_pos2(mask1, mask2));
+        }
+        ptr = ptr.add(loop_size);
+    }
+    while ptr <= end_ptr.sub(VECTOR_SIZE) {
+        if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+            return Some(i);
+        }
+        ptr = ptr.add(VECTOR_SIZE);
+    }
+    if ptr < end_ptr {
+        debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+        ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+        debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+        return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2);
+    }
+    None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memchr3(
+    n1: u8,
+    n2: u8,
+    n3: u8,
+    haystack: &[u8],
+) -> Option<usize> {
+    let vn1 = _mm256_set1_epi8(n1 as i8);
+    let vn2 = _mm256_set1_epi8(n2 as i8);
+    let vn3 = _mm256_set1_epi8(n3 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE2, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr < end_ptr {
+            if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+                return Some(sub(ptr, start_ptr));
+            }
+            ptr = ptr.offset(1);
+        }
+        return None;
+    }
+
+    if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+        return Some(i);
+    }
+
+    ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+    debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+    while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        let a = _mm256_load_si256(ptr as *const __m256i);
+        let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+        let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+        let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+        let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+        let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+        let eqa3 = _mm256_cmpeq_epi8(vn3, a);
+        let eqb3 = _mm256_cmpeq_epi8(vn3, b);
+        let or1 = _mm256_or_si256(eqa1, eqb1);
+        let or2 = _mm256_or_si256(eqa2, eqb2);
+        let or3 = _mm256_or_si256(eqa3, eqb3);
+        let or4 = _mm256_or_si256(or1, or2);
+        let or5 = _mm256_or_si256(or3, or4);
+        if _mm256_movemask_epi8(or5) != 0 {
+            let mut at = sub(ptr, start_ptr);
+            let mask1 = _mm256_movemask_epi8(eqa1);
+            let mask2 = _mm256_movemask_epi8(eqa2);
+            let mask3 = _mm256_movemask_epi8(eqa3);
+            if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+                return Some(at + forward_pos3(mask1, mask2, mask3));
+            }
+
+            at += VECTOR_SIZE;
+            let mask1 = _mm256_movemask_epi8(eqb1);
+            let mask2 = _mm256_movemask_epi8(eqb2);
+            let mask3 = _mm256_movemask_epi8(eqb3);
+            return Some(at + forward_pos3(mask1, mask2, mask3));
+        }
+        ptr = ptr.add(loop_size);
+    }
+    while ptr <= end_ptr.sub(VECTOR_SIZE) {
+        if let Some(i) =
+            forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+        {
+            return Some(i);
+        }
+        ptr = ptr.add(VECTOR_SIZE);
+    }
+    if ptr < end_ptr {
+        debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+        ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+        debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+        return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+    }
+    None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = _mm256_set1_epi8(n1 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr > start_ptr {
+            ptr = ptr.offset(-1);
+            if *ptr == n1 {
+                return Some(sub(ptr, start_ptr));
+            }
+        }
+        return None;
+    }
+
+    ptr = ptr.sub(VECTOR_SIZE);
+    if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+        return Some(i);
+    }
+
+    ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+    debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+    while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        ptr = ptr.sub(loop_size);
+        let a = _mm256_load_si256(ptr as *const __m256i);
+        let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+        let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i);
+        let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i);
+        let eqa = _mm256_cmpeq_epi8(vn1, a);
+        let eqb = _mm256_cmpeq_epi8(vn1, b);
+        let eqc = _mm256_cmpeq_epi8(vn1, c);
+        let eqd = _mm256_cmpeq_epi8(vn1, d);
+        let or1 = _mm256_or_si256(eqa, eqb);
+        let or2 = _mm256_or_si256(eqc, eqd);
+        let or3 = _mm256_or_si256(or1, or2);
+        if _mm256_movemask_epi8(or3) != 0 {
+            let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr);
+            let mask = _mm256_movemask_epi8(eqd);
+            if mask != 0 {
+                return Some(at + reverse_pos(mask));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask = _mm256_movemask_epi8(eqc);
+            if mask != 0 {
+                return Some(at + reverse_pos(mask));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask = _mm256_movemask_epi8(eqb);
+            if mask != 0 {
+                return Some(at + reverse_pos(mask));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask = _mm256_movemask_epi8(eqa);
+            debug_assert!(mask != 0);
+            return Some(at + reverse_pos(mask));
+        }
+    }
+    while ptr >= start_ptr.add(VECTOR_SIZE) {
+        ptr = ptr.sub(VECTOR_SIZE);
+        if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+            return Some(i);
+        }
+    }
+    if ptr > start_ptr {
+        debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+        return reverse_search1(start_ptr, end_ptr, start_ptr, vn1);
+    }
+    None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = _mm256_set1_epi8(n1 as i8);
+    let vn2 = _mm256_set1_epi8(n2 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE2, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr > start_ptr {
+            ptr = ptr.offset(-1);
+            if *ptr == n1 || *ptr == n2 {
+                return Some(sub(ptr, start_ptr));
+            }
+        }
+        return None;
+    }
+
+    ptr = ptr.sub(VECTOR_SIZE);
+    if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+        return Some(i);
+    }
+
+    ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+    debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+    while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        ptr = ptr.sub(loop_size);
+        let a = _mm256_load_si256(ptr as *const __m256i);
+        let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+        let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+        let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+        let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+        let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+        let or1 = _mm256_or_si256(eqa1, eqb1);
+        let or2 = _mm256_or_si256(eqa2, eqb2);
+        let or3 = _mm256_or_si256(or1, or2);
+        if _mm256_movemask_epi8(or3) != 0 {
+            let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+            let mask1 = _mm256_movemask_epi8(eqb1);
+            let mask2 = _mm256_movemask_epi8(eqb2);
+            if mask1 != 0 || mask2 != 0 {
+                return Some(at + reverse_pos2(mask1, mask2));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask1 = _mm256_movemask_epi8(eqa1);
+            let mask2 = _mm256_movemask_epi8(eqa2);
+            return Some(at + reverse_pos2(mask1, mask2));
+        }
+    }
+    while ptr >= start_ptr.add(VECTOR_SIZE) {
+        ptr = ptr.sub(VECTOR_SIZE);
+        if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+            return Some(i);
+        }
+    }
+    if ptr > start_ptr {
+        debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+        return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2);
+    }
+    None
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn memrchr3(
+    n1: u8,
+    n2: u8,
+    n3: u8,
+    haystack: &[u8],
+) -> Option<usize> {
+    let vn1 = _mm256_set1_epi8(n1 as i8);
+    let vn2 = _mm256_set1_epi8(n2 as i8);
+    let vn3 = _mm256_set1_epi8(n3 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE2, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr > start_ptr {
+            ptr = ptr.offset(-1);
+            if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+                return Some(sub(ptr, start_ptr));
+            }
+        }
+        return None;
+    }
+
+    ptr = ptr.sub(VECTOR_SIZE);
+    if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+        return Some(i);
+    }
+
+    ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+    debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+    while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        ptr = ptr.sub(loop_size);
+        let a = _mm256_load_si256(ptr as *const __m256i);
+        let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+        let eqa1 = _mm256_cmpeq_epi8(vn1, a);
+        let eqb1 = _mm256_cmpeq_epi8(vn1, b);
+        let eqa2 = _mm256_cmpeq_epi8(vn2, a);
+        let eqb2 = _mm256_cmpeq_epi8(vn2, b);
+        let eqa3 = _mm256_cmpeq_epi8(vn3, a);
+        let eqb3 = _mm256_cmpeq_epi8(vn3, b);
+        let or1 = _mm256_or_si256(eqa1, eqb1);
+        let or2 = _mm256_or_si256(eqa2, eqb2);
+        let or3 = _mm256_or_si256(eqa3, eqb3);
+        let or4 = _mm256_or_si256(or1, or2);
+        let or5 = _mm256_or_si256(or3, or4);
+        if _mm256_movemask_epi8(or5) != 0 {
+            let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+            let mask1 = _mm256_movemask_epi8(eqb1);
+            let mask2 = _mm256_movemask_epi8(eqb2);
+            let mask3 = _mm256_movemask_epi8(eqb3);
+            if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+                return Some(at + reverse_pos3(mask1, mask2, mask3));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask1 = _mm256_movemask_epi8(eqa1);
+            let mask2 = _mm256_movemask_epi8(eqa2);
+            let mask3 = _mm256_movemask_epi8(eqa3);
+            return Some(at + reverse_pos3(mask1, mask2, mask3));
+        }
+    }
+    while ptr >= start_ptr.add(VECTOR_SIZE) {
+        ptr = ptr.sub(VECTOR_SIZE);
+        if let Some(i) =
+            reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+        {
+            return Some(i);
+        }
+    }
+    if ptr > start_ptr {
+        debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+        return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3);
+    }
+    None
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search1(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m256i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+    let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vn1));
+    if mask != 0 {
+        Some(sub(ptr, start_ptr) + forward_pos(mask))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search2(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m256i,
+    vn2: __m256i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+    let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+    let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+    if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 {
+        let mask1 = _mm256_movemask_epi8(eq1);
+        let mask2 = _mm256_movemask_epi8(eq2);
+        Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn forward_search3(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m256i,
+    vn2: __m256i,
+    vn3: __m256i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+    let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+    let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+    let eq3 = _mm256_cmpeq_epi8(chunk, vn3);
+    let or = _mm256_or_si256(eq1, eq2);
+    if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 {
+        let mask1 = _mm256_movemask_epi8(eq1);
+        let mask2 = _mm256_movemask_epi8(eq2);
+        let mask3 = _mm256_movemask_epi8(eq3);
+        Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search1(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m256i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+    let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(vn1, chunk));
+    if mask != 0 {
+        Some(sub(ptr, start_ptr) + reverse_pos(mask))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search2(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m256i,
+    vn2: __m256i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+    let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+    let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+    if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 {
+        let mask1 = _mm256_movemask_epi8(eq1);
+        let mask2 = _mm256_movemask_epi8(eq2);
+        Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn reverse_search3(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m256i,
+    vn2: __m256i,
+    vn3: __m256i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm256_loadu_si256(ptr as *const __m256i);
+    let eq1 = _mm256_cmpeq_epi8(chunk, vn1);
+    let eq2 = _mm256_cmpeq_epi8(chunk, vn2);
+    let eq3 = _mm256_cmpeq_epi8(chunk, vn3);
+    let or = _mm256_or_si256(eq1, eq2);
+    if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 {
+        let mask1 = _mm256_movemask_epi8(eq1);
+        let mask2 = _mm256_movemask_epi8(eq2);
+        let mask3 = _mm256_movemask_epi8(eq3);
+        Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3))
+    } else {
+        None
+    }
+}
+
+/// Compute the position of the first matching byte from the given mask. The
+/// position returned is always in the range [0, 31].
+///
+/// The mask given is expected to be the result of _mm256_movemask_epi8.
+fn forward_pos(mask: i32) -> usize {
+    // We are dealing with little endian here, where the most significant byte
+    // is at a higher address. That means the least significant bit that is set
+    // corresponds to the position of our first matching byte. That position
+    // corresponds to the number of zeros after the least significant bit.
+    mask.trailing_zeros() as usize
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos2(mask1: i32, mask2: i32) -> usize {
+    debug_assert!(mask1 != 0 || mask2 != 0);
+
+    forward_pos(mask1 | mask2)
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+    debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+    forward_pos(mask1 | mask2 | mask3)
+}
+
+/// Compute the position of the last matching byte from the given mask. The
+/// position returned is always in the range [0, 31].
+///
+/// The mask given is expected to be the result of _mm256_movemask_epi8.
+fn reverse_pos(mask: i32) -> usize {
+    // We are dealing with little endian here, where the most significant byte
+    // is at a higher address. That means the most significant bit that is set
+    // corresponds to the position of our last matching byte. The position from
+    // the end of the mask is therefore the number of leading zeros in a 32
+    // bit integer, and the position from the start of the mask is therefore
+    // 32 - (leading zeros) - 1.
+    VECTOR_SIZE - (mask as u32).leading_zeros() as usize - 1
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos2(mask1: i32, mask2: i32) -> usize {
+    debug_assert!(mask1 != 0 || mask2 != 0);
+
+    reverse_pos(mask1 | mask2)
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 31]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm256_movemask_epi8,
+/// where at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+    debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+    reverse_pos(mask1 | mask2 | mask3)
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+    debug_assert!(a >= b);
+    (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/x86/mod.rs b/third_party/rust/memchr/src/x86/mod.rs
new file mode 100644
index 0000000000..855dc8b755
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/mod.rs
@@ -0,0 +1,119 @@
+use fallback;
+
+// We only use AVX when we can detect at runtime whether it's available, which
+// requires std.
+#[cfg(feature = "std")]
+mod avx;
+mod sse2;
+
+// This macro employs a gcc-like "ifunc" trick where by upon first calling
+// `memchr` (for example), CPU feature detection will be performed at runtime
+// to determine the best implementation to use. After CPU feature detection
+// is done, we replace `memchr`'s function pointer with the selection. Upon
+// subsequent invocations, the CPU-specific routine is invoked directly, which
+// skips the CPU feature detection and subsequent branch that's required.
+//
+// While this typically doesn't matter for rare occurrences or when used on
+// larger haystacks, `memchr` can be called in tight loops where the overhead
+// of this branch can actually add up *and is measurable*. This trick was
+// necessary to bring this implementation up to glibc's speeds for the 'tiny'
+// benchmarks, for example.
+//
+// At some point, I expect the Rust ecosystem will get a nice macro for doing
+// exactly this, at which point, we can replace our hand-jammed version of it.
+//
+// N.B. The ifunc strategy does prevent function inlining of course, but on
+// modern CPUs, you'll probably end up with the AVX2 implementation, which
+// probably can't be inlined anyway---unless you've compiled your entire
+// program with AVX2 enabled. However, even then, the various memchr
+// implementations aren't exactly small, so inlining might not help anyway!
+#[cfg(feature = "std")]
+macro_rules! ifunc {
+    ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
+        use std::mem;
+        use std::sync::atomic::{AtomicPtr, Ordering};
+
+        type FnRaw = *mut ();
+
+        static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
+
+        fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
+            let fun =
+                if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
+                    avx::$name as FnRaw
+                } else if cfg!(memchr_runtime_sse2) {
+                    sse2::$name as FnRaw
+                } else {
+                    fallback::$name as FnRaw
+                };
+            FN.store(fun as FnRaw, Ordering::Relaxed);
+            unsafe {
+                mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
+            }
+        }
+
+        unsafe {
+            let fun = FN.load(Ordering::Relaxed);
+            mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
+        }
+    }}
+}
+
+// When std isn't available to provide runtime CPU feature detection, or if
+// runtime CPU feature detection has been explicitly disabled, then just call
+// our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 targets,
+// so no CPU feature detection is necessary.
+#[cfg(not(feature = "std"))]
+macro_rules! ifunc {
+    ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
+        if cfg!(memchr_runtime_sse2) {
+            unsafe { sse2::$name($($needle),+, $haystack) }
+        } else {
+            fallback::$name($($needle),+, $haystack)
+        }
+    }}
+}
+
+#[inline(always)]
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
+}
+
+#[inline(always)]
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memchr2, haystack, n1, n2)
+}
+
+#[inline(always)]
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+    ifunc!(
+        fn(u8, u8, u8, &[u8]) -> Option<usize>,
+        memchr3,
+        haystack,
+        n1,
+        n2,
+        n3
+    )
+}
+
+#[inline(always)]
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
+}
+
+#[inline(always)]
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memrchr2, haystack, n1, n2)
+}
+
+#[inline(always)]
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+    ifunc!(
+        fn(u8, u8, u8, &[u8]) -> Option<usize>,
+        memrchr3,
+        haystack,
+        n1,
+        n2,
+        n3
+    )
+}
diff --git a/third_party/rust/memchr/src/x86/sse2.rs b/third_party/rust/memchr/src/x86/sse2.rs
new file mode 100644
index 0000000000..76f5a78c34
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/sse2.rs
@@ -0,0 +1,793 @@
+use core::arch::x86_64::*;
+use core::cmp;
+use core::mem::size_of;
+
+const VECTOR_SIZE: usize = size_of::<__m128i>();
+const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+// The number of bytes to loop at in one iteration of memchr2/memrchr2 and
+// memchr3/memrchr3. There was no observable difference between 64 and 32 bytes
+// in benchmarks. memchr3 in particular only gets a very slight speed up from
+// the loop unrolling.
+const LOOP_SIZE2: usize = 2 * VECTOR_SIZE;
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    // What follows is a fast SSE2-only algorithm to detect the position of
+    // `n1` in `haystack` if it exists. From what I know, this is the "classic"
+    // algorithm. I believe it can be found in places like glibc and Go's
+    // standard library. It appears to be well known and is elaborated on in
+    // more detail here: https://gms.tf/stdfind-and-memchr-optimizations.html
+    //
+    // While this routine is very long, the basic idea is actually very simple
+    // and can be expressed straight-forwardly in pseudo code:
+    //
+    //     needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1
+    //     // Note: shift amount in bytes
+    //
+    //     while i <= haystack.len() - 16:
+    //       // A 16 byte vector. Each byte in chunk corresponds to a byte in
+    //       // the haystack.
+    //       chunk = haystack[i:i+16]
+    //       // Compare bytes in needle with bytes in chunk. The result is a 16
+    //       // byte chunk where each byte is 0xFF if the corresponding bytes
+    //       // in needle and chunk were equal, or 0x00 otherwise.
+    //       eqs = cmpeq(needle, chunk)
+    //       // Return a 32 bit integer where the most significant 16 bits
+    //       // are always 0 and the lower 16 bits correspond to whether the
+    //       // most significant bit in the correspond byte in `eqs` is set.
+    //       // In other words, `mask as u16` has bit i set if and only if
+    //       // needle[i] == chunk[i].
+    //       mask = movemask(eqs)
+    //
+    //       // Mask is 0 if there is no match, and non-zero otherwise.
+    //       if mask != 0:
+    //         // trailing_zeros tells us the position of the least significant
+    //         // bit that is set.
+    //         return i + trailing_zeros(mask)
+    //
+    //     // haystack length may not be a multiple of 16, so search the rest.
+    //     while i < haystack.len():
+    //       if haystack[i] == n1:
+    //         return i
+    //
+    //     // No match found.
+    //     return NULL
+    //
+    // In fact, we could loosely translate the above code to Rust line-for-line
+    // and it would be a pretty fast algorithm. But, we pull out all the stops
+    // to go as fast as possible:
+    //
+    // 1. We use aligned loads. That is, we do some finagling to make sure our
+    //    primary loop not only proceeds in increments of 16 bytes, but that
+    //    the address of haystack's pointer that we dereference is aligned to
+    //    16 bytes. 16 is a magic number here because it is the size of SSE2
+    //    128-bit vector. (For the AVX2 algorithm, 32 is the magic number.)
+    //    Therefore, to get aligned loads, our pointer's address must be evenly
+    //    divisible by 16.
+    // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's
+    //    kind of like loop unrolling, but we combine the equality comparisons
+    //    using a vector OR such that we only need to extract a single mask to
+    //    determine whether a match exists or not. If so, then we do some
+    //    book-keeping to determine the precise location but otherwise mush on.
+    // 3. We use our "chunk" comparison routine in as many places as possible,
+    //    even if it means using unaligned loads. In particular, if haystack
+    //    starts with an unaligned address, then we do an unaligned load to
+    //    search the first 16 bytes. We then start our primary loop at the
+    //    smallest subsequent aligned address, which will actually overlap with
+    //    previously searched bytes. But we're OK with that. We do a similar
+    //    dance at the end of our primary loop. Finally, to avoid a
+    //    byte-at-a-time loop at the end, we do a final 16 byte unaligned load
+    //    that may overlap with a previous load. This is OK because it converts
+    //    a loop into a small number of very fast vector instructions.
+    //
+    // The primary downside of this algorithm is that it's effectively
+    // completely unsafe. Therefore, we have to be super careful to avoid
+    // undefined behavior:
+    //
+    // 1. We use raw pointers everywhere. Not only does dereferencing a pointer
+    //    require the pointer to be valid, but we actually can't even store the
+    //    address of an invalid pointer (unless it's 1 past the end of
+    //    haystack) without sacrificing performance.
+    // 2. _mm_loadu_si128 is used when you don't care about alignment, and
+    //    _mm_load_si128 is used when you do care. You cannot use the latter
+    //    on unaligned pointers.
+    // 3. We make liberal use of debug_assert! to check assumptions.
+    // 4. We make a concerted effort to stick with pointers instead of indices.
+    //    Indices are nicer because there's less to worry about with them (see
+    //    above about pointer offsets), but I could not get the compiler to
+    //    produce as good of code as what the below produces. In any case,
+    //    pointers are what we really care about here, and alignment is
+    //    expressed a bit more naturally with them.
+    //
+    // In general, most of the algorithms in this crate have a similar
+    // structure to what you see below, so this comment applies fairly well to
+    // all of them.
+
+    let vn1 = _mm_set1_epi8(n1 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr < end_ptr {
+            if *ptr == n1 {
+                return Some(sub(ptr, start_ptr));
+            }
+            ptr = ptr.offset(1);
+        }
+        return None;
+    }
+
+    if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+        return Some(i);
+    }
+
+    ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+    debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+    while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        let a = _mm_load_si128(ptr as *const __m128i);
+        let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+        let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+        let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+        let eqa = _mm_cmpeq_epi8(vn1, a);
+        let eqb = _mm_cmpeq_epi8(vn1, b);
+        let eqc = _mm_cmpeq_epi8(vn1, c);
+        let eqd = _mm_cmpeq_epi8(vn1, d);
+        let or1 = _mm_or_si128(eqa, eqb);
+        let or2 = _mm_or_si128(eqc, eqd);
+        let or3 = _mm_or_si128(or1, or2);
+        if _mm_movemask_epi8(or3) != 0 {
+            let mut at = sub(ptr, start_ptr);
+            let mask = _mm_movemask_epi8(eqa);
+            if mask != 0 {
+                return Some(at + forward_pos(mask));
+            }
+
+            at += VECTOR_SIZE;
+            let mask = _mm_movemask_epi8(eqb);
+            if mask != 0 {
+                return Some(at + forward_pos(mask));
+            }
+
+            at += VECTOR_SIZE;
+            let mask = _mm_movemask_epi8(eqc);
+            if mask != 0 {
+                return Some(at + forward_pos(mask));
+            }
+
+            at += VECTOR_SIZE;
+            let mask = _mm_movemask_epi8(eqd);
+            debug_assert!(mask != 0);
+            return Some(at + forward_pos(mask));
+        }
+        ptr = ptr.add(loop_size);
+    }
+    while ptr <= end_ptr.sub(VECTOR_SIZE) {
+        debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+        if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) {
+            return Some(i);
+        }
+        ptr = ptr.add(VECTOR_SIZE);
+    }
+    if ptr < end_ptr {
+        debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+        ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+        debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+        return forward_search1(start_ptr, end_ptr, ptr, vn1);
+    }
+    None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = _mm_set1_epi8(n1 as i8);
+    let vn2 = _mm_set1_epi8(n2 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE2, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr < end_ptr {
+            if *ptr == n1 || *ptr == n2 {
+                return Some(sub(ptr, start_ptr));
+            }
+            ptr = ptr.offset(1);
+        }
+        return None;
+    }
+
+    if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+        return Some(i);
+    }
+
+    ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+    debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+    while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        let a = _mm_load_si128(ptr as *const __m128i);
+        let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+        let eqa1 = _mm_cmpeq_epi8(vn1, a);
+        let eqb1 = _mm_cmpeq_epi8(vn1, b);
+        let eqa2 = _mm_cmpeq_epi8(vn2, a);
+        let eqb2 = _mm_cmpeq_epi8(vn2, b);
+        let or1 = _mm_or_si128(eqa1, eqb1);
+        let or2 = _mm_or_si128(eqa2, eqb2);
+        let or3 = _mm_or_si128(or1, or2);
+        if _mm_movemask_epi8(or3) != 0 {
+            let mut at = sub(ptr, start_ptr);
+            let mask1 = _mm_movemask_epi8(eqa1);
+            let mask2 = _mm_movemask_epi8(eqa2);
+            if mask1 != 0 || mask2 != 0 {
+                return Some(at + forward_pos2(mask1, mask2));
+            }
+
+            at += VECTOR_SIZE;
+            let mask1 = _mm_movemask_epi8(eqb1);
+            let mask2 = _mm_movemask_epi8(eqb2);
+            return Some(at + forward_pos2(mask1, mask2));
+        }
+        ptr = ptr.add(loop_size);
+    }
+    while ptr <= end_ptr.sub(VECTOR_SIZE) {
+        if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+            return Some(i);
+        }
+        ptr = ptr.add(VECTOR_SIZE);
+    }
+    if ptr < end_ptr {
+        debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+        ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+        debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+        return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2);
+    }
+    None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memchr3(
+    n1: u8,
+    n2: u8,
+    n3: u8,
+    haystack: &[u8],
+) -> Option<usize> {
+    let vn1 = _mm_set1_epi8(n1 as i8);
+    let vn2 = _mm_set1_epi8(n2 as i8);
+    let vn3 = _mm_set1_epi8(n3 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE2, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr < end_ptr {
+            if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+                return Some(sub(ptr, start_ptr));
+            }
+            ptr = ptr.offset(1);
+        }
+        return None;
+    }
+
+    if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+        return Some(i);
+    }
+
+    ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+    debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+    while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        let a = _mm_load_si128(ptr as *const __m128i);
+        let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+        let eqa1 = _mm_cmpeq_epi8(vn1, a);
+        let eqb1 = _mm_cmpeq_epi8(vn1, b);
+        let eqa2 = _mm_cmpeq_epi8(vn2, a);
+        let eqb2 = _mm_cmpeq_epi8(vn2, b);
+        let eqa3 = _mm_cmpeq_epi8(vn3, a);
+        let eqb3 = _mm_cmpeq_epi8(vn3, b);
+        let or1 = _mm_or_si128(eqa1, eqb1);
+        let or2 = _mm_or_si128(eqa2, eqb2);
+        let or3 = _mm_or_si128(eqa3, eqb3);
+        let or4 = _mm_or_si128(or1, or2);
+        let or5 = _mm_or_si128(or3, or4);
+        if _mm_movemask_epi8(or5) != 0 {
+            let mut at = sub(ptr, start_ptr);
+            let mask1 = _mm_movemask_epi8(eqa1);
+            let mask2 = _mm_movemask_epi8(eqa2);
+            let mask3 = _mm_movemask_epi8(eqa3);
+            if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+                return Some(at + forward_pos3(mask1, mask2, mask3));
+            }
+
+            at += VECTOR_SIZE;
+            let mask1 = _mm_movemask_epi8(eqb1);
+            let mask2 = _mm_movemask_epi8(eqb2);
+            let mask3 = _mm_movemask_epi8(eqb3);
+            return Some(at + forward_pos3(mask1, mask2, mask3));
+        }
+        ptr = ptr.add(loop_size);
+    }
+    while ptr <= end_ptr.sub(VECTOR_SIZE) {
+        if let Some(i) =
+            forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+        {
+            return Some(i);
+        }
+        ptr = ptr.add(VECTOR_SIZE);
+    }
+    if ptr < end_ptr {
+        debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+        ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+        debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+        return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+    }
+    None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = _mm_set1_epi8(n1 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr > start_ptr {
+            ptr = ptr.offset(-1);
+            if *ptr == n1 {
+                return Some(sub(ptr, start_ptr));
+            }
+        }
+        return None;
+    }
+
+    ptr = ptr.sub(VECTOR_SIZE);
+    if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+        return Some(i);
+    }
+
+    ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+    debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+    while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        ptr = ptr.sub(loop_size);
+        let a = _mm_load_si128(ptr as *const __m128i);
+        let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+        let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+        let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+        let eqa = _mm_cmpeq_epi8(vn1, a);
+        let eqb = _mm_cmpeq_epi8(vn1, b);
+        let eqc = _mm_cmpeq_epi8(vn1, c);
+        let eqd = _mm_cmpeq_epi8(vn1, d);
+        let or1 = _mm_or_si128(eqa, eqb);
+        let or2 = _mm_or_si128(eqc, eqd);
+        let or3 = _mm_or_si128(or1, or2);
+        if _mm_movemask_epi8(or3) != 0 {
+            let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr);
+            let mask = _mm_movemask_epi8(eqd);
+            if mask != 0 {
+                return Some(at + reverse_pos(mask));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask = _mm_movemask_epi8(eqc);
+            if mask != 0 {
+                return Some(at + reverse_pos(mask));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask = _mm_movemask_epi8(eqb);
+            if mask != 0 {
+                return Some(at + reverse_pos(mask));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask = _mm_movemask_epi8(eqa);
+            debug_assert!(mask != 0);
+            return Some(at + reverse_pos(mask));
+        }
+    }
+    while ptr >= start_ptr.add(VECTOR_SIZE) {
+        ptr = ptr.sub(VECTOR_SIZE);
+        if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) {
+            return Some(i);
+        }
+    }
+    if ptr > start_ptr {
+        debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+        return reverse_search1(start_ptr, end_ptr, start_ptr, vn1);
+    }
+    None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    let vn1 = _mm_set1_epi8(n1 as i8);
+    let vn2 = _mm_set1_epi8(n2 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE2, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr > start_ptr {
+            ptr = ptr.offset(-1);
+            if *ptr == n1 || *ptr == n2 {
+                return Some(sub(ptr, start_ptr));
+            }
+        }
+        return None;
+    }
+
+    ptr = ptr.sub(VECTOR_SIZE);
+    if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+        return Some(i);
+    }
+
+    ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+    debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+    while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        ptr = ptr.sub(loop_size);
+        let a = _mm_load_si128(ptr as *const __m128i);
+        let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+        let eqa1 = _mm_cmpeq_epi8(vn1, a);
+        let eqb1 = _mm_cmpeq_epi8(vn1, b);
+        let eqa2 = _mm_cmpeq_epi8(vn2, a);
+        let eqb2 = _mm_cmpeq_epi8(vn2, b);
+        let or1 = _mm_or_si128(eqa1, eqb1);
+        let or2 = _mm_or_si128(eqa2, eqb2);
+        let or3 = _mm_or_si128(or1, or2);
+        if _mm_movemask_epi8(or3) != 0 {
+            let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+            let mask1 = _mm_movemask_epi8(eqb1);
+            let mask2 = _mm_movemask_epi8(eqb2);
+            if mask1 != 0 || mask2 != 0 {
+                return Some(at + reverse_pos2(mask1, mask2));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask1 = _mm_movemask_epi8(eqa1);
+            let mask2 = _mm_movemask_epi8(eqa2);
+            return Some(at + reverse_pos2(mask1, mask2));
+        }
+    }
+    while ptr >= start_ptr.add(VECTOR_SIZE) {
+        ptr = ptr.sub(VECTOR_SIZE);
+        if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) {
+            return Some(i);
+        }
+    }
+    if ptr > start_ptr {
+        debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+        return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2);
+    }
+    None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn memrchr3(
+    n1: u8,
+    n2: u8,
+    n3: u8,
+    haystack: &[u8],
+) -> Option<usize> {
+    let vn1 = _mm_set1_epi8(n1 as i8);
+    let vn2 = _mm_set1_epi8(n2 as i8);
+    let vn3 = _mm_set1_epi8(n3 as i8);
+    let len = haystack.len();
+    let loop_size = cmp::min(LOOP_SIZE2, len);
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = end_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr > start_ptr {
+            ptr = ptr.offset(-1);
+            if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+                return Some(sub(ptr, start_ptr));
+            }
+        }
+        return None;
+    }
+
+    ptr = ptr.sub(VECTOR_SIZE);
+    if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) {
+        return Some(i);
+    }
+
+    ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8;
+    debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+    while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) {
+        debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+        ptr = ptr.sub(loop_size);
+        let a = _mm_load_si128(ptr as *const __m128i);
+        let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+        let eqa1 = _mm_cmpeq_epi8(vn1, a);
+        let eqb1 = _mm_cmpeq_epi8(vn1, b);
+        let eqa2 = _mm_cmpeq_epi8(vn2, a);
+        let eqb2 = _mm_cmpeq_epi8(vn2, b);
+        let eqa3 = _mm_cmpeq_epi8(vn3, a);
+        let eqb3 = _mm_cmpeq_epi8(vn3, b);
+        let or1 = _mm_or_si128(eqa1, eqb1);
+        let or2 = _mm_or_si128(eqa2, eqb2);
+        let or3 = _mm_or_si128(eqa3, eqb3);
+        let or4 = _mm_or_si128(or1, or2);
+        let or5 = _mm_or_si128(or3, or4);
+        if _mm_movemask_epi8(or5) != 0 {
+            let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr);
+            let mask1 = _mm_movemask_epi8(eqb1);
+            let mask2 = _mm_movemask_epi8(eqb2);
+            let mask3 = _mm_movemask_epi8(eqb3);
+            if mask1 != 0 || mask2 != 0 || mask3 != 0 {
+                return Some(at + reverse_pos3(mask1, mask2, mask3));
+            }
+
+            at -= VECTOR_SIZE;
+            let mask1 = _mm_movemask_epi8(eqa1);
+            let mask2 = _mm_movemask_epi8(eqa2);
+            let mask3 = _mm_movemask_epi8(eqa3);
+            return Some(at + reverse_pos3(mask1, mask2, mask3));
+        }
+    }
+    while ptr >= start_ptr.add(VECTOR_SIZE) {
+        ptr = ptr.sub(VECTOR_SIZE);
+        if let Some(i) =
+            reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3)
+        {
+            return Some(i);
+        }
+    }
+    if ptr > start_ptr {
+        debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE);
+        return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3);
+    }
+    None
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn forward_search1(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m128i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm_loadu_si128(ptr as *const __m128i);
+    let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vn1));
+    if mask != 0 {
+        Some(sub(ptr, start_ptr) + forward_pos(mask))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn forward_search2(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m128i,
+    vn2: __m128i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm_loadu_si128(ptr as *const __m128i);
+    let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+    let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+    if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 {
+        let mask1 = _mm_movemask_epi8(eq1);
+        let mask2 = _mm_movemask_epi8(eq2);
+        Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn forward_search3(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m128i,
+    vn2: __m128i,
+    vn3: __m128i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm_loadu_si128(ptr as *const __m128i);
+    let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+    let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+    let eq3 = _mm_cmpeq_epi8(chunk, vn3);
+    let or = _mm_or_si128(eq1, eq2);
+    if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 {
+        let mask1 = _mm_movemask_epi8(eq1);
+        let mask2 = _mm_movemask_epi8(eq2);
+        let mask3 = _mm_movemask_epi8(eq3);
+        Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search1(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m128i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm_loadu_si128(ptr as *const __m128i);
+    let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(vn1, chunk));
+    if mask != 0 {
+        Some(sub(ptr, start_ptr) + reverse_pos(mask))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search2(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m128i,
+    vn2: __m128i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm_loadu_si128(ptr as *const __m128i);
+    let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+    let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+    if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 {
+        let mask1 = _mm_movemask_epi8(eq1);
+        let mask2 = _mm_movemask_epi8(eq2);
+        Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2))
+    } else {
+        None
+    }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn reverse_search3(
+    start_ptr: *const u8,
+    end_ptr: *const u8,
+    ptr: *const u8,
+    vn1: __m128i,
+    vn2: __m128i,
+    vn3: __m128i,
+) -> Option<usize> {
+    debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE);
+    debug_assert!(start_ptr <= ptr);
+    debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE));
+
+    let chunk = _mm_loadu_si128(ptr as *const __m128i);
+    let eq1 = _mm_cmpeq_epi8(chunk, vn1);
+    let eq2 = _mm_cmpeq_epi8(chunk, vn2);
+    let eq3 = _mm_cmpeq_epi8(chunk, vn3);
+    let or = _mm_or_si128(eq1, eq2);
+    if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 {
+        let mask1 = _mm_movemask_epi8(eq1);
+        let mask2 = _mm_movemask_epi8(eq2);
+        let mask3 = _mm_movemask_epi8(eq3);
+        Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3))
+    } else {
+        None
+    }
+}
+
+/// Compute the position of the first matching byte from the given mask. The
+/// position returned is always in the range [0, 15].
+///
+/// The mask given is expected to be the result of _mm_movemask_epi8.
+fn forward_pos(mask: i32) -> usize {
+    // We are dealing with little endian here, where the most significant byte
+    // is at a higher address. That means the least significant bit that is set
+    // corresponds to the position of our first matching byte. That position
+    // corresponds to the number of zeros after the least significant bit.
+    mask.trailing_zeros() as usize
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos2(mask1: i32, mask2: i32) -> usize {
+    debug_assert!(mask1 != 0 || mask2 != 0);
+
+    forward_pos(mask1 | mask2)
+}
+
+/// Compute the position of the first matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+    debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+    forward_pos(mask1 | mask2 | mask3)
+}
+
+/// Compute the position of the last matching byte from the given mask. The
+/// position returned is always in the range [0, 15].
+///
+/// The mask given is expected to be the result of _mm_movemask_epi8.
+fn reverse_pos(mask: i32) -> usize {
+    // We are dealing with little endian here, where the most significant byte
+    // is at a higher address. That means the most significant bit that is set
+    // corresponds to the position of our last matching byte. The position from
+    // the end of the mask is therefore the number of leading zeros in a 16
+    // bit integer, and the position from the start of the mask is therefore
+    // 16 - (leading zeros) - 1.
+    VECTOR_SIZE - (mask as u16).leading_zeros() as usize - 1
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos2(mask1: i32, mask2: i32) -> usize {
+    debug_assert!(mask1 != 0 || mask2 != 0);
+
+    reverse_pos(mask1 | mask2)
+}
+
+/// Compute the position of the last matching byte from the given masks. The
+/// position returned is always in the range [0, 15]. Each mask corresponds to
+/// the equality comparison of a single byte.
+///
+/// The masks given are expected to be the result of _mm_movemask_epi8, where
+/// at least one of the masks is non-zero (i.e., indicates a match).
+fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize {
+    debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0);
+
+    reverse_pos(mask1 | mask2 | mask3)
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+    debug_assert!(a >= b);
+    (a as usize) - (b as usize)
+}
diff --git a/third_party/rust/memchr/src/x86/sse42.rs b/third_party/rust/memchr/src/x86/sse42.rs
new file mode 100644
index 0000000000..78a9b37973
--- /dev/null
+++ b/third_party/rust/memchr/src/x86/sse42.rs
@@ -0,0 +1,75 @@
+// This code is unused. PCMPESTRI is gratuitously slow. I imagine it might
+// start winning with a hypothetical memchr4 (or greater). This technique might
+// also be good for exposing searches over ranges of bytes, but that departs
+// from the standard memchr API, so it's not clear whether we actually want
+// that or not.
+//
+// N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind
+// of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means
+// I don't see a way of effectively using PCMPISTRI unless there's some fast
+// way to replace zero bytes with a byte that is not not a needle byte.
+
+use core::arch::x86_64::*;
+use core::mem::size_of;
+
+use x86::sse2;
+
+const VECTOR_SIZE: usize = size_of::<__m128i>();
+const CONTROL_ANY: i32 =
+    _SIDD_UBYTE_OPS
+    | _SIDD_CMP_EQUAL_ANY
+    | _SIDD_POSITIVE_POLARITY
+    | _SIDD_LEAST_SIGNIFICANT;
+
+#[target_feature(enable = "sse4.2")]
+pub unsafe fn memchr3(
+    n1: u8, n2: u8, n3: u8,
+    haystack: &[u8]
+) -> Option<usize> {
+    let vn1 = _mm_set1_epi8(n1 as i8);
+    let vn2 = _mm_set1_epi8(n2 as i8);
+    let vn3 = _mm_set1_epi8(n3 as i8);
+    let vn = _mm_setr_epi8(
+        n1 as i8, n2 as i8, n3 as i8, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+    );
+    let len = haystack.len();
+    let start_ptr = haystack.as_ptr();
+    let end_ptr = haystack[haystack.len()..].as_ptr();
+    let mut ptr = start_ptr;
+
+    if haystack.len() < VECTOR_SIZE {
+        while ptr < end_ptr {
+            if *ptr == n1 || *ptr == n2 || *ptr == n3 {
+                return Some(sub(ptr, start_ptr));
+            }
+            ptr = ptr.offset(1);
+        }
+        return None;
+    }
+    while ptr <= end_ptr.sub(VECTOR_SIZE) {
+        let chunk = _mm_loadu_si128(ptr as *const __m128i);
+        let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY);
+        if res < 16 {
+            return Some(sub(ptr, start_ptr) + res as usize);
+        }
+        ptr = ptr.add(VECTOR_SIZE);
+    }
+    if ptr < end_ptr {
+        debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE);
+        ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr));
+        debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE);
+
+        return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3);
+    }
+    None
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+    debug_assert!(a >= b);
+    (a as usize) - (b as usize)
+}