Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
commit: 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree: 173a775858bd501c378080a10dca74132f05bc50 /vendor/packed_simd_2/src
parent: Initial commit. (diff)
download: rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
142 files changed, 14544 insertions, 0 deletions
diff --git a/vendor/packed_simd_2/src/api.rs b/vendor/packed_simd_2/src/api.rs
new file mode 100644
index 000000000..953685925
--- /dev/null
+++ b/vendor/packed_simd_2/src/api.rs
@@ -0,0 +1,309 @@
+//! Implements the Simd<[T; N]> APIs
+
+#[macro_use]
+mod bitmask;
+crate mod cast;
+#[macro_use]
+mod cmp;
+#[macro_use]
+mod default;
+#[macro_use]
+mod fmt;
+#[macro_use]
+mod from;
+#[macro_use]
+mod hash;
+#[macro_use]
+mod math;
+#[macro_use]
+mod minimal;
+#[macro_use]
+mod ops;
+#[macro_use]
+mod ptr;
+#[macro_use]
+mod reductions;
+#[macro_use]
+mod select;
+#[macro_use]
+mod shuffle;
+#[macro_use]
+mod shuffle1_dyn;
+#[macro_use]
+mod slice;
+#[macro_use]
+mod swap_bytes;
+#[macro_use]
+mod bit_manip;
+
+#[cfg(feature = "into_bits")]
+crate mod into_bits;
+
+macro_rules! impl_i {
+    ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
+     | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
+     | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+        impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+                          | $($elem_ids),* | $(#[$doc])*);
+        impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_bitwise!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
+        );
+        impl_ops_scalar_bitwise!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
+        );
+        impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_int_min_max!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt
+        );
+        impl_reduction_integer_arithmetic!(
+            [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+        );
+        impl_reduction_min_max!(
+            [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+        );
+        impl_reduction_bitwise!(
+            [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+            | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
+        );
+        impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
+        impl_from_vectors!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
+        );
+        impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_partial_eq!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)
+        );
+        impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+        impl_cmp_vertical!(
+            [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
+        );
+        impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+        impl_bitmask!($tuple_id | $ibitmask_ty | (-1, 0) | $test_tt);
+
+        test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
+        test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+    }
+}
+
+macro_rules! impl_u {
+    ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
+     | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
+     | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+        impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+                          | $($elem_ids),* | $(#[$doc])*);
+        impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_bitwise!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
+        );
+        impl_ops_scalar_bitwise!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
+        );
+        impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_int_min_max!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt
+        );
+        impl_reduction_integer_arithmetic!(
+            [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+        );
+        impl_reduction_min_max!(
+            [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+        );
+        impl_reduction_bitwise!(
+            [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+            | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
+        );
+        impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
+        impl_from_vectors!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
+        );
+        impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_partial_eq!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0)
+        );
+        impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+        impl_cmp_vertical!(
+            [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
+        );
+        impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+        impl_bitmask!($tuple_id | $ibitmask_ty | ($ielem_ty::max_value(), 0) |
+                      $test_tt);
+
+        test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
+        test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+    }
+}
+
+macro_rules! impl_f {
+    ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
+     | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
+     | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+        impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+                          | $($elem_ids),* | $(#[$doc])*);
+        impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_ops_vector_float_min_max!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt
+        );
+        impl_reduction_float_arithmetic!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_reduction_min_max!(
+            [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+        );
+        impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.));
+        impl_from_vectors!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
+        );
+        impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_partial_eq!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.)
+        );
+        impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+
+        impl_float_consts!([$elem_ty; $elem_n]: $tuple_id);
+        impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty);
+
+        // floating-point math
+        impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_math_float_tanh!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_vertical!(
+            [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.)
+                | $test_tt
+        );
+
+        test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt);
+        test_reduction_float_min_max!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt
+        );
+        test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+    }
+}
+
+macro_rules! impl_m {
+    ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident
+     | $ielem_ty:ident, $ibitmask_ty:ident
+     | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),*
+     | $(#[$doc:meta])*) => {
+        impl_minimal_mask!(
+            [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+            | $($elem_ids),* | $(#[$doc])*
+        );
+        impl_ops_vector_mask_bitwise!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
+        );
+        impl_ops_scalar_mask_bitwise!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
+        );
+        impl_reduction_bitwise!(
+            [bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+                | (|x|{ x != 0 }) | (true, false)
+        );
+        impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt);
+        impl_from_array!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt
+            | (crate::$elem_ty::new(true), true)
+        );
+        impl_from_vectors!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
+        );
+        impl_default!([bool; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_partial_eq!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
+        );
+        impl_cmp_eq!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
+        );
+        impl_cmp_vertical!(
+            [$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false)
+            | $test_tt
+        );
+        impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_cmp_ord!(
+            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true)
+        );
+        impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_bitmask!($tuple_id | $ibitmask_ty | (true, false) | $test_tt);
+
+        test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+    }
+}
+
+macro_rules! impl_const_p {
+    ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
+     $usize_ty:ident, $isize_ty:ident
+     | $test_tt:tt | $($elem_ids:ident),*
+     | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+        impl_minimal_p!(
+            [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
+                | ref_ | $test_tt | $($elem_ids),*
+                | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
+        );
+        impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
+    }
+}
+
+macro_rules! impl_mut_p {
+    ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
+     $usize_ty:ident, $isize_ty:ident
+     | $test_tt:tt | $($elem_ids:ident),*
+     | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+        impl_minimal_p!(
+            [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
+                | ref_mut_ | $test_tt | $($elem_ids),*
+                | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
+        );
+        impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
+        impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
+    }
+}
diff --git a/vendor/packed_simd_2/src/api/bit_manip.rs b/vendor/packed_simd_2/src/api/bit_manip.rs
new file mode 100644
index 000000000..6d8865706
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/bit_manip.rs
@@ -0,0 +1,129 @@
+//! Bit manipulations.
+
+macro_rules! impl_bit_manip {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Returns the number of ones in the binary representation of
+            /// the lanes of `self`.
+            #[inline]
+            pub fn count_ones(self) -> Self {
+                super::codegen::bit_manip::BitManip::ctpop(self)
+            }
+
+            /// Returns the number of zeros in the binary representation of
+            /// the lanes of `self`.
+            #[inline]
+            pub fn count_zeros(self) -> Self {
+                super::codegen::bit_manip::BitManip::ctpop(!self)
+            }
+
+            /// Returns the number of leading zeros in the binary
+            /// representation of the lanes of `self`.
+            #[inline]
+            pub fn leading_zeros(self) -> Self {
+                super::codegen::bit_manip::BitManip::ctlz(self)
+            }
+
+            /// Returns the number of trailing zeros in the binary
+            /// representation of the lanes of `self`.
+            #[inline]
+            pub fn trailing_zeros(self) -> Self {
+                super::codegen::bit_manip::BitManip::cttz(self)
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item_with_macros! {
+                #[allow(overflowing_literals)]
+                pub mod [<$id _bit_manip>] {
+                    #![allow(const_item_mutation)]
+                    use super::*;
+
+                    const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8;
+
+                    macro_rules! test_func {
+                        ($x:expr, $func:ident) => {{
+                            let mut actual = $x;
+                            for i in 0..$id::lanes() {
+                                actual = actual.replace(
+                                    i,
+                                    $x.extract(i).$func() as $elem_ty
+                                );
+                            }
+                            let expected = $x.$func();
+                            assert_eq!(actual, expected);
+                        }};
+                    }
+
+                    const BYTES: [u8; 64] = [
+                        0, 1, 2, 3, 4, 5, 6, 7,
+                        8, 9, 10, 11, 12, 13, 14, 15,
+                        16, 17, 18, 19, 20, 21, 22, 23,
+                        24, 25, 26, 27, 28, 29, 30, 31,
+                        32, 33, 34, 35, 36, 37, 38, 39,
+                        40, 41, 42, 43, 44, 45, 46, 47,
+                        48, 49, 50, 51, 52, 53, 54, 55,
+                        56, 57, 58, 59, 60, 61, 62, 63,
+                    ];
+
+                    fn load_bytes() -> $id {
+                        let elems: &mut [$elem_ty] = unsafe {
+                            slice::from_raw_parts_mut(
+                                BYTES.as_mut_ptr() as *mut $elem_ty,
+                                $id::lanes(),
+                            )
+                        };
+                        $id::from_slice_unaligned(elems)
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn count_ones() {
+                        test_func!($id::splat(0), count_ones);
+                        test_func!($id::splat(!0), count_ones);
+                        test_func!(load_bytes(), count_ones);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn count_zeros() {
+                        test_func!($id::splat(0), count_zeros);
+                        test_func!($id::splat(!0), count_zeros);
+                        test_func!(load_bytes(), count_zeros);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn leading_zeros() {
+                        test_func!($id::splat(0), leading_zeros);
+                        test_func!($id::splat(1), leading_zeros);
+                        // some implementations use `pshufb` which has unique
+                        // behavior when the 8th bit is set.
+                        test_func!($id::splat(0b1000_0010), leading_zeros);
+                        test_func!($id::splat(!0), leading_zeros);
+                        test_func!(
+                            $id::splat(1 << (LANE_WIDTH - 1)),
+                            leading_zeros
+                        );
+                        test_func!(load_bytes(), leading_zeros);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn trailing_zeros() {
+                        test_func!($id::splat(0), trailing_zeros);
+                        test_func!($id::splat(1), trailing_zeros);
+                        test_func!($id::splat(0b1000_0010), trailing_zeros);
+                        test_func!($id::splat(!0), trailing_zeros);
+                        test_func!(
+                            $id::splat(1 << (LANE_WIDTH - 1)),
+                            trailing_zeros
+                        );
+                        test_func!(load_bytes(), trailing_zeros);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/bitmask.rs b/vendor/packed_simd_2/src/api/bitmask.rs
new file mode 100644
index 000000000..a06ff0fab
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/bitmask.rs
@@ -0,0 +1,82 @@
+//! Bitmask API
+
+macro_rules! impl_bitmask {
+    ($id:ident | $ibitmask_ty:ident | ($set:expr, $clear:expr)
+     | $test_tt:tt) => {
+        impl $id {
+            /// Creates a bitmask with the MSB of each vector lane.
+            ///
+            /// If the vector has less than 8 lanes, the bits that do not
+            /// correspond to any vector lanes are cleared.
+            #[inline]
+            pub fn bitmask(self) -> $ibitmask_ty {
+                unsafe { codegen::llvm::simd_bitmask(self.0) }
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                #[cfg(not(any(
+                    // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/210
+                    all(target_arch = "mips", target_endian = "big"),
+                    all(target_arch = "mips64", target_endian = "big"),
+                    target_arch = "sparc64",
+                    target_arch = "s390x",
+                )))]
+                pub mod [<$id _bitmask>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn bitmask() {
+                        // clear all lanes
+                        let vec = $id::splat($clear as _);
+                        let bitmask: $ibitmask_ty = 0;
+                        assert_eq!(vec.bitmask(), bitmask);
+
+                        // set even lanes
+                        let mut vec = $id::splat($clear as _);
+                        for i in 0..$id::lanes() {
+                            if i % 2 == 0 {
+                                vec = vec.replace(i, $set as _);
+                            }
+                        }
+                        // create bitmask with even lanes set:
+                        let mut bitmask: $ibitmask_ty = 0;
+                        for i in 0..$id::lanes() {
+                            if i % 2 == 0 {
+                                bitmask |= 1 << i;
+                            }
+                        }
+                        assert_eq!(vec.bitmask(), bitmask);
+
+
+                        // set odd lanes
+                        let mut vec = $id::splat($clear as _);
+                        for i in 0..$id::lanes() {
+                            if i % 2 != 0 {
+                                vec = vec.replace(i, $set as _);
+                            }
+                        }
+                        // create bitmask with odd lanes set:
+                        let mut bitmask: $ibitmask_ty = 0;
+                        for i in 0..$id::lanes() {
+                            if i % 2 != 0 {
+                                bitmask |= 1 << i;
+                            }
+                        }
+                        assert_eq!(vec.bitmask(), bitmask);
+
+                        // set all lanes
+                        let vec = $id::splat($set as _);
+                        let mut bitmask: $ibitmask_ty = 0;
+                        for i in 0..$id::lanes() {
+                            bitmask |= 1 << i;
+                        }
+                        assert_eq!(vec.bitmask(), bitmask);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/cast.rs b/vendor/packed_simd_2/src/api/cast.rs
new file mode 100644
index 000000000..f1c32ca1a
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cast.rs
@@ -0,0 +1,108 @@
+//! Implementation of `FromCast` and `IntoCast`.
+#![allow(clippy::module_name_repetitions)]
+
+/// Numeric cast from `T` to `Self`.
+///
+/// > Note: This is a temporary workaround until the conversion traits
+/// specified > in [RFC2484] are implemented.
+///
+/// Numeric cast between vectors with the same number of lanes, such that:
+///
+/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
+/// -> `u32xN`) is a **no-op**,
+///
+/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
+/// `u8xN`) will **truncate**,
+///
+/// * casting from a smaller integer to a larger integer   (e.g. `u8xN` ->
+///   `u32xN`) will:
+///    * **zero-extend** if the source is unsigned, or
+///    * **sign-extend** if the source is signed,
+///
+/// * casting from a float to an integer will **round the float towards zero**,
+///
+/// * casting from an integer to float will produce the floating point
+/// representation of the integer, **rounding to nearest, ties to even**,
+///
+/// * casting from an `f32` to an `f64` is perfect and lossless,
+///
+/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
+///
+/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
+pub trait FromCast<T>: crate::marker::Sized {
+    /// Numeric cast from `T` to `Self`.
+    fn from_cast(_: T) -> Self;
+}
+
+/// Numeric cast from `Self` to `T`.
+///
+/// > Note: This is a temporary workaround until the conversion traits
+/// specified > in [RFC2484] are implemented.
+///
+/// Numeric cast between vectors with the same number of lanes, such that:
+///
+/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
+/// -> `u32xN`) is a **no-op**,
+///
+/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
+/// `u8xN`) will **truncate**,
+///
+/// * casting from a smaller integer to a larger integer   (e.g. `u8xN` ->
+///   `u32xN`) will:
+///    * **zero-extend** if the source is unsigned, or
+///    * **sign-extend** if the source is signed,
+///
+/// * casting from a float to an integer will **round the float towards zero**,
+///
+/// * casting from an integer to float will produce the floating point
+/// representation of the integer, **rounding to nearest, ties to even**,
+///
+/// * casting from an `f32` to an `f64` is perfect and lossless,
+///
+/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
+///
+/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
+pub trait Cast<T>: crate::marker::Sized {
+    /// Numeric cast from `self` to `T`.
+    fn cast(self) -> T;
+}
+
+/// `FromCast` implies `Cast`.
+impl<T, U> Cast<U> for T
+where
+    U: FromCast<T>,
+{
+    #[inline]
+    fn cast(self) -> U {
+        U::from_cast(self)
+    }
+}
+
+/// `FromCast` and `Cast` are reflexive
+impl<T> FromCast<T> for T {
+    #[inline]
+    fn from_cast(t: Self) -> Self {
+        t
+    }
+}
+
+#[macro_use]
+mod macros;
+
+mod v16;
+pub use self::v16::*;
+
+mod v32;
+pub use self::v32::*;
+
+mod v64;
+pub use self::v64::*;
+
+mod v128;
+pub use self::v128::*;
+
+mod v256;
+pub use self::v256::*;
+
+mod v512;
+pub use self::v512::*;
diff --git a/vendor/packed_simd_2/src/api/cast/macros.rs b/vendor/packed_simd_2/src/api/cast/macros.rs
new file mode 100644
index 000000000..3bb29f0b8
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cast/macros.rs
@@ -0,0 +1,82 @@
+//! Macros implementing `FromCast`
+
+macro_rules! impl_from_cast_ {
+    ($id:ident[$test_tt:tt]: $from_ty:ident) => {
+        impl crate::api::cast::FromCast<$from_ty> for $id {
+            #[inline]
+            fn from_cast(x: $from_ty) -> Self {
+                use crate::llvm::simd_cast;
+                debug_assert_eq!($from_ty::lanes(), $id::lanes());
+                Simd(unsafe { simd_cast(x.0) })
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _from_cast_ $from_ty>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn test() {
+                        assert_eq!($id::lanes(), $from_ty::lanes());
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_from_cast {
+    ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+        $(
+            impl_from_cast_!($id[$test_tt]: $from_ty);
+        )*
+    }
+}
+
+macro_rules! impl_from_cast_mask_ {
+    ($id:ident[$test_tt:tt]: $from_ty:ident) => {
+        impl crate::api::cast::FromCast<$from_ty> for $id {
+            #[inline]
+            fn from_cast(x: $from_ty) -> Self {
+                debug_assert_eq!($from_ty::lanes(), $id::lanes());
+                x.ne($from_ty::default())
+                    .select($id::splat(true), $id::splat(false))
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _from_cast_ $from_ty>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn test() {
+                        assert_eq!($id::lanes(), $from_ty::lanes());
+
+                        let x = $from_ty::default();
+                        let m: $id = x.cast();
+                        assert!(m.none());
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_from_cast_mask {
+    ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+        $(
+            impl_from_cast_mask_!($id[$test_tt]: $from_ty);
+        )*
+    }
+}
+
+#[allow(unused)]
+macro_rules! impl_into_cast {
+    ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+        $(
+            impl_from_cast_!($from_ty[$test_tt]: $id);
+        )*
+    }
+}
diff --git a/vendor/packed_simd_2/src/api/cast/v128.rs b/vendor/packed_simd_2/src/api/cast/v128.rs
new file mode 100644
index 000000000..ab47ddc00
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cast/v128.rs
@@ -0,0 +1,79 @@
+//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(
+    i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16
+);
+impl_from_cast!(
+    u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16
+);
+impl_from_cast_mask!(
+    m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16
+);
+
+impl_from_cast!(
+    i16x8[test_v128]: i8x8, u8x8, m8x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast!(
+    u16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast_mask!(
+    m16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, u16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+
+impl_from_cast!(
+    i32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast!(
+    u32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast!(
+    f32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast_mask!(
+    m32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+
+impl_from_cast!(
+    i64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast!(
+    u64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast!(
+    f64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast_mask!(
+    m64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+
+impl_from_cast!(
+    isizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, usizex2, msizex2
+);
+impl_from_cast!(
+    usizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, msizex2
+);
+impl_from_cast_mask!(
+    msizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2
+);
+
+// FIXME[test_v128]: 64-bit single element vectors into_cast impls
+impl_from_cast!(i128x1[test_v128]: u128x1, m128x1);
+impl_from_cast!(u128x1[test_v128]: i128x1, m128x1);
+impl_from_cast!(m128x1[test_v128]: i128x1, u128x1);
diff --git a/vendor/packed_simd_2/src/api/cast/v16.rs b/vendor/packed_simd_2/src/api/cast/v16.rs
new file mode 100644
index 000000000..cf974bb08
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cast/v16.rs
@@ -0,0 +1,17 @@
+//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(
+    i8x2[test_v16]: u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast!(
+    u8x2[test_v16]: i8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast_mask!(
+    m8x2[test_v16]: i8x2, u8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
diff --git a/vendor/packed_simd_2/src/api/cast/v256.rs b/vendor/packed_simd_2/src/api/cast/v256.rs
new file mode 100644
index 000000000..9389dcb4c
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cast/v256.rs
@@ -0,0 +1,81 @@
+//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32);
+impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32);
+impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32);
+
+impl_from_cast!(
+    i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16,
+    i32x16, u32x16, f32x16, m32x16
+);
+impl_from_cast!(
+    u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16,
+    i32x16, u32x16, f32x16, m32x16
+);
+impl_from_cast_mask!(
+    m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16,
+    i32x16, u32x16, f32x16, m32x16
+);
+
+impl_from_cast!(
+    i32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast!(
+    u32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast!(
+    f32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast_mask!(
+    m32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+
+impl_from_cast!(
+    i64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast!(
+    u64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast!(
+    f64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast_mask!(
+    m64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+
+impl_from_cast!(
+    i128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast!(
+    u128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast_mask!(
+    m128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, m64x2, f64x2, i128x2, u128x2, isizex2, usizex2, msizex2
+);
+
+impl_from_cast!(
+    isizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, usizex4, msizex4
+);
+impl_from_cast!(
+    usizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, msizex4
+);
+impl_from_cast_mask!(
+    msizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4
+);
diff --git a/vendor/packed_simd_2/src/api/cast/v32.rs b/vendor/packed_simd_2/src/api/cast/v32.rs
new file mode 100644
index 000000000..2b254ba0c
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cast/v32.rs
@@ -0,0 +1,30 @@
+//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(
+    i8x4[test_v32]: u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast!(
+    u8x4[test_v32]: i8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast_mask!(
+    m8x4[test_v32]: i8x4, u8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+
+impl_from_cast!(
+    i16x2[test_v32]: i8x2, u8x2, m8x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast!(
+    u16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast_mask!(
+    m16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, u16x2, i32x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
diff --git a/vendor/packed_simd_2/src/api/cast/v512.rs b/vendor/packed_simd_2/src/api/cast/v512.rs
new file mode 100644
index 000000000..5a10ab066
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cast/v512.rs
@@ -0,0 +1,68 @@
+//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(i8x64[test_v512]: u8x64, m8x64);
+impl_from_cast!(u8x64[test_v512]: i8x64, m8x64);
+impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64);
+
+impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32);
+impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32);
+impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32);
+
+impl_from_cast!(
+    i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16
+);
+impl_from_cast!(
+    u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16
+);
+impl_from_cast!(
+    f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16
+);
+impl_from_cast_mask!(
+    m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16
+);
+
+impl_from_cast!(
+    i64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast!(
+    u64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast!(
+    f64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast_mask!(
+    m64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, isizex8, usizex8, msizex8
+);
+
+impl_from_cast!(
+    i128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast!(
+    u128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast_mask!(
+    m128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, m64x4, f64x4, i128x4, u128x4, isizex4, usizex4, msizex4
+);
+
+impl_from_cast!(
+    isizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, usizex8, msizex8
+);
+impl_from_cast!(
+    usizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, msizex8
+);
+impl_from_cast_mask!(
+    msizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8
+);
diff --git a/vendor/packed_simd_2/src/api/cast/v64.rs b/vendor/packed_simd_2/src/api/cast/v64.rs
new file mode 100644
index 000000000..192a4638a
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cast/v64.rs
@@ -0,0 +1,47 @@
+//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(
+    i8x8[test_v64]: u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast!(
+    u8x8[test_v64]: i8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+impl_from_cast_mask!(
+    m8x8[test_v64]: i8x8, u8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
+    i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
+);
+
+impl_from_cast!(
+    i16x4[test_v64]: i8x4, u8x4, m8x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast!(
+    u16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+impl_from_cast_mask!(
+    m16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, u16x4, i32x4, u32x4, f32x4, m32x4,
+    i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
+);
+
+impl_from_cast!(
+    i32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, u32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast!(
+    u32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, f32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast!(
+    f32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, m32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
+impl_from_cast_mask!(
+    m32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2,
+    i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
+);
diff --git a/vendor/packed_simd_2/src/api/cmp.rs b/vendor/packed_simd_2/src/api/cmp.rs
new file mode 100644
index 000000000..6d5301ddd
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cmp.rs
@@ -0,0 +1,16 @@
+//! Implement cmp traits for vector types
+
+#[macro_use]
+mod partial_eq;
+
+#[macro_use]
+mod eq;
+
+#[macro_use]
+mod partial_ord;
+
+#[macro_use]
+mod ord;
+
+#[macro_use]
+mod vertical;
diff --git a/vendor/packed_simd_2/src/api/cmp/eq.rs b/vendor/packed_simd_2/src/api/cmp/eq.rs
new file mode 100644
index 000000000..3c55d0dce
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cmp/eq.rs
@@ -0,0 +1,27 @@
+//! Implements `Eq` for vector types.
+
+macro_rules! impl_cmp_eq {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $test_tt:tt |
+        ($true:expr, $false:expr)
+    ) => {
+        impl crate::cmp::Eq for $id {}
+        impl crate::cmp::Eq for LexicographicallyOrdered<$id> {}
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_eq>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn eq() {
+                        fn foo<E: crate::cmp::Eq>(_: E) {}
+                        let a = $id::splat($false);
+                        foo(a);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/cmp/ord.rs b/vendor/packed_simd_2/src/api/cmp/ord.rs
new file mode 100644
index 000000000..e54ba3bfd
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cmp/ord.rs
@@ -0,0 +1,43 @@
+//! Implements `Ord` for vector types.
+
+macro_rules! impl_cmp_ord {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $test_tt:tt |
+        ($true:expr, $false:expr)
+    ) => {
+        impl $id {
+            /// Returns a wrapper that implements `Ord`.
+            #[inline]
+            pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> {
+                LexicographicallyOrdered(*self)
+            }
+        }
+
+        impl crate::cmp::Ord for LexicographicallyOrdered<$id> {
+            #[inline]
+            fn cmp(&self, other: &Self) -> crate::cmp::Ordering {
+                match self.partial_cmp(other) {
+                    Some(x) => x,
+                    None => unsafe { crate::hint::unreachable_unchecked() },
+                }
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_ord>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn eq() {
+                        fn foo<E: crate::cmp::Ord>(_: E) {}
+                        let a = $id::splat($false);
+                        foo(a.partial_lex_ord());
+                        foo(a.lex_ord());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/cmp/partial_eq.rs b/vendor/packed_simd_2/src/api/cmp/partial_eq.rs
new file mode 100644
index 000000000..1712a0de5
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cmp/partial_eq.rs
@@ -0,0 +1,67 @@
+//! Implements `PartialEq` for vector types.
+
+macro_rules! impl_cmp_partial_eq {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $test_tt:tt |
+        ($true:expr, $false:expr)
+    ) => {
+        // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
+        #[allow(clippy::partialeq_ne_impl)]
+        impl crate::cmp::PartialEq<$id> for $id {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                $id::eq(*self, *other).all()
+            }
+            #[inline]
+            fn ne(&self, other: &Self) -> bool {
+                $id::ne(*self, *other).any()
+            }
+        }
+
+        // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
+        #[allow(clippy::partialeq_ne_impl)]
+        impl crate::cmp::PartialEq<LexicographicallyOrdered<$id>>
+            for LexicographicallyOrdered<$id>
+        {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                self.0 == other.0
+            }
+            #[inline]
+            fn ne(&self, other: &Self) -> bool {
+                self.0 != other.0
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_PartialEq>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn partial_eq() {
+                        let a = $id::splat($false);
+                        let b = $id::splat($true);
+
+                        assert!(a != b);
+                        assert!(!(a == b));
+                        assert!(a == a);
+                        assert!(!(a != a));
+
+                        if $id::lanes() > 1 {
+                            let a = $id::splat($false).replace(0, $true);
+                            let b = $id::splat($true);
+
+                            assert!(a != b);
+                            assert!(!(a == b));
+                            assert!(a == a);
+                            assert!(!(a != a));
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/cmp/partial_ord.rs b/vendor/packed_simd_2/src/api/cmp/partial_ord.rs
new file mode 100644
index 000000000..a2292918b
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cmp/partial_ord.rs
@@ -0,0 +1,234 @@
+//! Implements `PartialOrd` for vector types.
+//!
+//! This implements a lexicographical order.
+
+macro_rules! impl_cmp_partial_ord {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Returns a wrapper that implements `PartialOrd`.
+            #[inline]
+            pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> {
+                LexicographicallyOrdered(*self)
+            }
+        }
+
+        impl crate::cmp::PartialOrd<LexicographicallyOrdered<$id>>
+            for LexicographicallyOrdered<$id>
+        {
+            #[inline]
+            fn partial_cmp(
+                &self, other: &Self,
+            ) -> Option<crate::cmp::Ordering> {
+                if PartialEq::eq(self, other) {
+                    Some(crate::cmp::Ordering::Equal)
+                } else if PartialOrd::lt(self, other) {
+                    Some(crate::cmp::Ordering::Less)
+                } else if PartialOrd::gt(self, other) {
+                    Some(crate::cmp::Ordering::Greater)
+                } else {
+                    None
+                }
+            }
+            #[inline]
+            fn lt(&self, other: &Self) -> bool {
+                let m_lt = self.0.lt(other.0);
+                let m_eq = self.0.eq(other.0);
+                for i in 0..$id::lanes() {
+                    if m_eq.extract(i) {
+                        continue;
+                    }
+                    return m_lt.extract(i);
+                }
+                false
+            }
+            #[inline]
+            fn le(&self, other: &Self) -> bool {
+                self.lt(other) | PartialEq::eq(self, other)
+            }
+            #[inline]
+            fn ge(&self, other: &Self) -> bool {
+                self.gt(other) | PartialEq::eq(self, other)
+            }
+            #[inline]
+            fn gt(&self, other: &Self) -> bool {
+                let m_gt = self.0.gt(other.0);
+                let m_eq = self.0.eq(other.0);
+                for i in 0..$id::lanes() {
+                    if m_eq.extract(i) {
+                        continue;
+                    }
+                    return m_gt.extract(i);
+                }
+                false
+            }
+        }
+    };
+}
+
+macro_rules! test_cmp_partial_ord_int {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_PartialOrd>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn partial_lex_ord() {
+                        use crate::testing::utils::{test_cmp};
+                        // constant values
+                        let a = $id::splat(0);
+                        let b = $id::splat(1);
+
+                        test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Less));
+                        test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Greater));
+                        test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Equal));
+                        test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Equal));
+
+                        // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0]
+                        let mut a = $id::splat(0);
+                        let mut b = $id::splat(0);
+                        for i in 0..$id::lanes() {
+                            a = a.replace(i, i as $elem_ty);
+                            b = b.replace(i, ($id::lanes() - i) as $elem_ty);
+                        }
+                        test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Less));
+                        test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Greater));
+                        test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Equal));
+                        test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Equal));
+
+                        // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4]
+                        let mut b = a;
+                        b = b.replace(
+                            $id::lanes() - 1,
+                            a.extract($id::lanes() - 1) + 1 as $elem_ty
+                        );
+                        test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Less));
+                        test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Greater));
+                        test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Equal));
+                        test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(crate::cmp::Ordering::Equal));
+
+                        if $id::lanes() > 2 {
+                            // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3]
+                            let b = a;
+                            let mut a = $id::splat(0);
+                            a = a.replace(1, 1 as $elem_ty);
+                            test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                     Some(crate::cmp::Ordering::Less));
+                            test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                     Some(crate::cmp::Ordering::Greater));
+                            test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                     Some(crate::cmp::Ordering::Equal));
+                            test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                     Some(crate::cmp::Ordering::Equal));
+
+                            // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2]
+                            let mut b = a;
+                            b = b.replace(
+                                2, a.extract($id::lanes() - 1) + 1 as $elem_ty
+                            );
+                            test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                     Some(crate::cmp::Ordering::Less));
+                            test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                     Some(crate::cmp::Ordering::Greater));
+                            test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                     Some(crate::cmp::Ordering::Equal));
+                            test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                     Some(crate::cmp::Ordering::Equal));
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! test_cmp_partial_ord_mask {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_PartialOrd>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn partial_lex_ord() {
+                        use crate::testing::utils::{test_cmp};
+                        use crate::cmp::Ordering;
+
+                        // constant values
+                        let a = $id::splat(false);
+                        let b = $id::splat(true);
+
+                        test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(Ordering::Less));
+                        test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(Ordering::Greater));
+                        test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(Ordering::Equal));
+                        test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(Ordering::Equal));
+
+                        // variable values:
+                        // a = [false, false, false, false];
+                        // b = [false, false, false, true]
+                        let a = $id::splat(false);
+                        let mut b = $id::splat(false);
+                        b = b.replace($id::lanes() - 1, true);
+                        test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(Ordering::Less));
+                        test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(Ordering::Greater));
+                        test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(Ordering::Equal));
+                        test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(Ordering::Equal));
+
+                        // variable values:
+                        // a = [true, true, true, false];
+                        // b = [true, true, true, true]
+                        let mut a = $id::splat(true);
+                        let b = $id::splat(true);
+                        a = a.replace($id::lanes() - 1, false);
+                        test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(Ordering::Less));
+                        test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(Ordering::Greater));
+                        test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                 Some(Ordering::Equal));
+                        test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                 Some(Ordering::Equal));
+
+                        if $id::lanes() > 2 {
+                            // variable values
+                            // a = [false, true, false, false];
+                            // b = [false, true, true, true]
+                            let mut a = $id::splat(false);
+                            let mut b = $id::splat(true);
+                            a = a.replace(1, true);
+                            b = b.replace(0, false);
+                            test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+                                     Some(Ordering::Less));
+                            test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+                                     Some(Ordering::Greater));
+                            test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+                                     Some(Ordering::Equal));
+                            test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+                                     Some(Ordering::Equal));
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/cmp/vertical.rs b/vendor/packed_simd_2/src/api/cmp/vertical.rs
new file mode 100644
index 000000000..ea4a0d1a3
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/cmp/vertical.rs
@@ -0,0 +1,114 @@
+//! Vertical (lane-wise) vector comparisons returning vector masks.
+
+macro_rules! impl_cmp_vertical {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident,
+        $mask_ty:ident,
+        $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt
+    ) => {
+        impl $id {
+            /// Lane-wise equality comparison.
+            #[inline]
+            pub fn eq(self, other: Self) -> $mask_ty {
+                use crate::llvm::simd_eq;
+                Simd(unsafe { simd_eq(self.0, other.0) })
+            }
+
+            /// Lane-wise inequality comparison.
+            #[inline]
+            pub fn ne(self, other: Self) -> $mask_ty {
+                use crate::llvm::simd_ne;
+                Simd(unsafe { simd_ne(self.0, other.0) })
+            }
+
+            /// Lane-wise less-than comparison.
+            #[inline]
+            pub fn lt(self, other: Self) -> $mask_ty {
+                use crate::llvm::{simd_gt, simd_lt};
+                if $is_mask {
+                    Simd(unsafe { simd_gt(self.0, other.0) })
+                } else {
+                    Simd(unsafe { simd_lt(self.0, other.0) })
+                }
+            }
+
+            /// Lane-wise less-than-or-equals comparison.
+            #[inline]
+            pub fn le(self, other: Self) -> $mask_ty {
+                use crate::llvm::{simd_ge, simd_le};
+                if $is_mask {
+                    Simd(unsafe { simd_ge(self.0, other.0) })
+                } else {
+                    Simd(unsafe { simd_le(self.0, other.0) })
+                }
+            }
+
+            /// Lane-wise greater-than comparison.
+            #[inline]
+            pub fn gt(self, other: Self) -> $mask_ty {
+                use crate::llvm::{simd_gt, simd_lt};
+                if $is_mask {
+                    Simd(unsafe { simd_lt(self.0, other.0) })
+                } else {
+                    Simd(unsafe { simd_gt(self.0, other.0) })
+                }
+            }
+
+            /// Lane-wise greater-than-or-equals comparison.
+            #[inline]
+            pub fn ge(self, other: Self) -> $mask_ty {
+                use crate::llvm::{simd_ge, simd_le};
+                if $is_mask {
+                    Simd(unsafe { simd_le(self.0, other.0) })
+                } else {
+                    Simd(unsafe { simd_ge(self.0, other.0) })
+                }
+            }
+        }
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_vertical>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn cmp() {
+                        let a = $id::splat($false);
+                        let b = $id::splat($true);
+
+                        let r = a.lt(b);
+                        let e = $mask_ty::splat(true);
+                        assert!(r == e);
+                        let r = a.le(b);
+                        assert!(r == e);
+
+                        let e = $mask_ty::splat(false);
+                        let r = a.gt(b);
+                        assert!(r == e);
+                        let r = a.ge(b);
+                        assert!(r == e);
+                        let r = a.eq(b);
+                        assert!(r == e);
+
+                        let mut a = a;
+                        let mut b = b;
+                        let mut e = e;
+                        for i in 0..$id::lanes() {
+                            if i % 2 == 0 {
+                                a = a.replace(i, $false);
+                                b = b.replace(i, $true);
+                                e = e.replace(i, true);
+                            } else {
+                                a = a.replace(i, $true);
+                                b = b.replace(i, $false);
+                                e = e.replace(i, false);
+                            }
+                        }
+                        let r = a.lt(b);
+                        assert!(r == e);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/default.rs b/vendor/packed_simd_2/src/api/default.rs
new file mode 100644
index 000000000..7af55ea77
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/default.rs
@@ -0,0 +1,30 @@
+//! Implements `Default` for vector types.
+
+macro_rules! impl_default {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl Default for $id {
+            #[inline]
+            fn default() -> Self {
+                Self::splat($elem_ty::default())
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                pub mod [<$id _default>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn default() {
+                        let a = $id::default();
+                        for i in 0..$id::lanes() {
+                            assert_eq!(a.extract(i), $elem_ty::default());
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/fmt.rs b/vendor/packed_simd_2/src/api/fmt.rs
new file mode 100644
index 000000000..f3f55c401
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/fmt.rs
@@ -0,0 +1,12 @@
+//! Implements formatting APIs
+
+#[macro_use]
+mod debug;
+#[macro_use]
+mod lower_hex;
+#[macro_use]
+mod upper_hex;
+#[macro_use]
+mod octal;
+#[macro_use]
+mod binary;
diff --git a/vendor/packed_simd_2/src/api/fmt/binary.rs b/vendor/packed_simd_2/src/api/fmt/binary.rs
new file mode 100644
index 000000000..b60769082
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/fmt/binary.rs
@@ -0,0 +1,56 @@
+//! Implement Octal formatting
+
+macro_rules! impl_fmt_binary {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::fmt::Binary for $id {
+            #[allow(clippy::missing_inline_in_public_items)]
+            fn fmt(
+                &self, f: &mut crate::fmt::Formatter<'_>,
+            ) -> crate::fmt::Result {
+                write!(f, "{}(", stringify!($id))?;
+                for i in 0..$elem_count {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    self.extract(i).fmt(f)?;
+                }
+                write!(f, ")")
+            }
+        }
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _fmt_binary>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn binary() {
+                        use arrayvec::{ArrayString,ArrayVec};
+                        type TinyString = ArrayString<[u8; 512]>;
+
+                        use crate::fmt::Write;
+                        let v = $id::splat($elem_ty::default());
+                        let mut s = TinyString::new();
+                        write!(&mut s, "{:#b}", v).unwrap();
+
+                        let mut beg = TinyString::new();
+                        write!(&mut beg, "{}(", stringify!($id)).unwrap();
+                        assert!(s.starts_with(beg.as_str()));
+                        assert!(s.ends_with(")"));
+                        let s: ArrayVec<[TinyString; 64]>
+                            = s.replace(beg.as_str(), "")
+                            .replace(")", "").split(",")
+                            .map(|v| TinyString::from(v.trim()).unwrap())
+                            .collect();
+                        assert_eq!(s.len(), $id::lanes());
+                        for (index, ss) in s.into_iter().enumerate() {
+                            let mut e = TinyString::new();
+                            write!(&mut e, "{:#b}", v.extract(index)).unwrap();
+                            assert_eq!(ss, e);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/fmt/debug.rs b/vendor/packed_simd_2/src/api/fmt/debug.rs
new file mode 100644
index 000000000..ad0b8a59a
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/fmt/debug.rs
@@ -0,0 +1,62 @@
+//! Implement debug formatting
+
+macro_rules! impl_fmt_debug_tests {
+    ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _fmt_debug>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn debug() {
+                        use arrayvec::{ArrayString,ArrayVec};
+                        type TinyString = ArrayString<[u8; 512]>;
+
+                        use crate::fmt::Write;
+                        let v = $id::default();
+                        let mut s = TinyString::new();
+                        write!(&mut s, "{:?}", v).unwrap();
+
+                        let mut beg = TinyString::new();
+                        write!(&mut beg, "{}(", stringify!($id)).unwrap();
+                        assert!(s.starts_with(beg.as_str()));
+                        assert!(s.ends_with(")"));
+                        let s: ArrayVec<[TinyString; 64]>
+                            = s.replace(beg.as_str(), "")
+                            .replace(")", "").split(",")
+                            .map(|v| TinyString::from(v.trim()).unwrap())
+                            .collect();
+                        assert_eq!(s.len(), $id::lanes());
+                        for (index, ss) in s.into_iter().enumerate() {
+                            let mut e = TinyString::new();
+                            write!(&mut e, "{:?}", v.extract(index)).unwrap();
+                            assert_eq!(ss, e);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_fmt_debug {
+    ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::fmt::Debug for $id {
+            #[allow(clippy::missing_inline_in_public_items)]
+            fn fmt(
+                &self, f: &mut crate::fmt::Formatter<'_>,
+            ) -> crate::fmt::Result {
+                write!(f, "{}(", stringify!($id))?;
+                for i in 0..$elem_count {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    self.extract(i).fmt(f)?;
+                }
+                write!(f, ")")
+            }
+        }
+        impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt);
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/fmt/lower_hex.rs b/vendor/packed_simd_2/src/api/fmt/lower_hex.rs
new file mode 100644
index 000000000..5a7aa14b5
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/fmt/lower_hex.rs
@@ -0,0 +1,56 @@
+//! Implement `LowerHex` formatting
+
+macro_rules! impl_fmt_lower_hex {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::fmt::LowerHex for $id {
+            #[allow(clippy::missing_inline_in_public_items)]
+            fn fmt(
+                &self, f: &mut crate::fmt::Formatter<'_>,
+            ) -> crate::fmt::Result {
+                write!(f, "{}(", stringify!($id))?;
+                for i in 0..$elem_count {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    self.extract(i).fmt(f)?;
+                }
+                write!(f, ")")
+            }
+        }
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _fmt_lower_hex>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn lower_hex() {
+                        use arrayvec::{ArrayString,ArrayVec};
+                        type TinyString = ArrayString<[u8; 512]>;
+
+                        use crate::fmt::Write;
+                        let v = $id::splat($elem_ty::default());
+                        let mut s = TinyString::new();
+                        write!(&mut s, "{:#x}", v).unwrap();
+
+                        let mut beg = TinyString::new();
+                        write!(&mut beg, "{}(", stringify!($id)).unwrap();
+                        assert!(s.starts_with(beg.as_str()));
+                        assert!(s.ends_with(")"));
+                        let s: ArrayVec<[TinyString; 64]>
+                            = s.replace(beg.as_str(), "").replace(")", "")
+                            .split(",")
+                            .map(|v| TinyString::from(v.trim()).unwrap())
+                            .collect();
+                        assert_eq!(s.len(), $id::lanes());
+                        for (index, ss) in s.into_iter().enumerate() {
+                            let mut e = TinyString::new();
+                            write!(&mut e, "{:#x}", v.extract(index)).unwrap();
+                        assert_eq!(ss, e);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/fmt/octal.rs b/vendor/packed_simd_2/src/api/fmt/octal.rs
new file mode 100644
index 000000000..83ac8abc7
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/fmt/octal.rs
@@ -0,0 +1,56 @@
+//! Implement Octal formatting
+
+macro_rules! impl_fmt_octal {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::fmt::Octal for $id {
+            #[allow(clippy::missing_inline_in_public_items)]
+            fn fmt(
+                &self, f: &mut crate::fmt::Formatter<'_>,
+            ) -> crate::fmt::Result {
+                write!(f, "{}(", stringify!($id))?;
+                for i in 0..$elem_count {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    self.extract(i).fmt(f)?;
+                }
+                write!(f, ")")
+            }
+        }
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _fmt_octal>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn octal_hex() {
+                        use arrayvec::{ArrayString,ArrayVec};
+                        type TinyString = ArrayString<[u8; 512]>;
+
+                        use crate::fmt::Write;
+                        let v = $id::splat($elem_ty::default());
+                        let mut s = TinyString::new();
+                        write!(&mut s, "{:#o}", v).unwrap();
+
+                        let mut beg = TinyString::new();
+                        write!(&mut beg, "{}(", stringify!($id)).unwrap();
+                        assert!(s.starts_with(beg.as_str()));
+                        assert!(s.ends_with(")"));
+                        let s: ArrayVec<[TinyString; 64]>
+                            = s.replace(beg.as_str(), "").replace(")", "")
+                            .split(",")
+                            .map(|v| TinyString::from(v.trim()).unwrap())
+                            .collect();
+                        assert_eq!(s.len(), $id::lanes());
+                        for (index, ss) in s.into_iter().enumerate() {
+                            let mut e = TinyString::new();
+                            write!(&mut e, "{:#o}", v.extract(index)).unwrap();
+                            assert_eq!(ss, e);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/fmt/upper_hex.rs b/vendor/packed_simd_2/src/api/fmt/upper_hex.rs
new file mode 100644
index 000000000..aa88f673a
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/fmt/upper_hex.rs
@@ -0,0 +1,56 @@
+//! Implement `UpperHex` formatting
+
+macro_rules! impl_fmt_upper_hex {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::fmt::UpperHex for $id {
+            #[allow(clippy::missing_inline_in_public_items)]
+            fn fmt(
+                &self, f: &mut crate::fmt::Formatter<'_>,
+            ) -> crate::fmt::Result {
+                write!(f, "{}(", stringify!($id))?;
+                for i in 0..$elem_count {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    self.extract(i).fmt(f)?;
+                }
+                write!(f, ")")
+            }
+        }
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _fmt_upper_hex>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn upper_hex() {
+                        use arrayvec::{ArrayString,ArrayVec};
+                        type TinyString = ArrayString<[u8; 512]>;
+
+                        use crate::fmt::Write;
+                        let v = $id::splat($elem_ty::default());
+                        let mut s = TinyString::new();
+                        write!(&mut s, "{:#X}", v).unwrap();
+
+                        let mut beg = TinyString::new();
+                        write!(&mut beg, "{}(", stringify!($id)).unwrap();
+                        assert!(s.starts_with(beg.as_str()));
+                        assert!(s.ends_with(")"));
+                        let s: ArrayVec<[TinyString; 64]>
+                            = s.replace(beg.as_str(), "").replace(")", "")
+                            .split(",")
+                            .map(|v| TinyString::from(v.trim()).unwrap())
+                            .collect();
+                        assert_eq!(s.len(), $id::lanes());
+                        for (index, ss) in s.into_iter().enumerate() {
+                            let mut e = TinyString::new();
+                            write!(&mut e, "{:#X}", v.extract(index)).unwrap();
+                            assert_eq!(ss, e);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/from.rs b/vendor/packed_simd_2/src/api/from.rs
new file mode 100644
index 000000000..c30c4d6e2
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/from.rs
@@ -0,0 +1,7 @@
+//! Implementations of the `From` and `Into` traits
+
+#[macro_use]
+mod from_array;
+
+#[macro_use]
+mod from_vector;
diff --git a/vendor/packed_simd_2/src/api/from/from_array.rs b/vendor/packed_simd_2/src/api/from/from_array.rs
new file mode 100644
index 000000000..b83f93816
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/from/from_array.rs
@@ -0,0 +1,123 @@
+//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types.
+
+macro_rules! impl_from_array {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
+     | ($non_default_array:expr, $non_default_vec:expr)) => {
+        impl From<[$elem_ty; $elem_count]> for $id {
+            #[inline]
+            fn from(array: [$elem_ty; $elem_count]) -> Self {
+                union U {
+                    array: [$elem_ty; $elem_count],
+                    vec: $id,
+                }
+                unsafe { U { array }.vec }
+            }
+        }
+
+        impl From<$id> for [$elem_ty; $elem_count] {
+            #[inline]
+            fn from(vec: $id) -> Self {
+                union U {
+                    array: [$elem_ty; $elem_count],
+                    vec: $id,
+                }
+                unsafe { U { vec }.array }
+            }
+        }
+
+        // FIXME: `Into::into` is not inline, but due to
+        // the blanket impl in `std`, which is not
+        // marked `default`, we cannot override it here with
+        // specialization.
+        /*
+        impl Into<[$elem_ty; $elem_count]> for $id {
+            #[inline]
+            fn into(self) -> [$elem_ty; $elem_count] {
+                union U {
+                    array: [$elem_ty; $elem_count],
+                    vec: $id,
+                }
+                unsafe { U { vec: self }.array }
+            }
+        }
+
+        impl Into<$id> for [$elem_ty; $elem_count] {
+            #[inline]
+            fn into(self) -> $id {
+                union U {
+                    array: [$elem_ty; $elem_count],
+                    vec: $id,
+                }
+                unsafe { U { array: self }.vec }
+            }
+        }
+        */
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                mod [<$id _from>] {
+                    use super::*;
+                    #[test]
+                    fn array() {
+                        let vec: $id = Default::default();
+
+                        // FIXME: Workaround for arrays with more than 32
+                        // elements.
+                        //
+                        // Safe because we never take a reference to any
+                        // uninitialized element.
+                        union W {
+                            array: [$elem_ty; $elem_count],
+                            other: ()
+                        }
+                        let mut array = W { other: () };
+                        for i in 0..$elem_count {
+                            let default: $elem_ty = Default::default();
+                            // note: array.other is the active member and
+                            // initialized so we can take a reference to it:
+                            let p = unsafe {
+                                &mut array.other as *mut () as *mut $elem_ty
+                            };
+                            // note: default is a valid bit-pattern for
+                            // $elem_ty:
+                            unsafe {
+                                crate::ptr::write(p.wrapping_add(i), default)
+                            };
+                        }
+                        // note: the array variant of the union is properly
+                        // initialized:
+                        let mut array = unsafe {
+                            array.array
+                        };
+
+                        array[0] = $non_default_array;
+                        let vec = vec.replace(0, $non_default_vec);
+
+                        let vec_from_array = $id::from(array);
+                        assert_eq!(vec_from_array, vec);
+                        let array_from_vec
+                            = <[$elem_ty; $elem_count]>::from(vec);
+                        // FIXME: Workaround for arrays with more than 32
+                        // elements.
+                        for i in 0..$elem_count {
+                            assert_eq!(array_from_vec[i], array[i]);
+                        }
+
+                        let vec_from_into_array: $id = array.into();
+                        assert_eq!(vec_from_into_array, vec);
+                        let array_from_into_vec: [$elem_ty; $elem_count]
+                            = vec.into();
+                        // FIXME: Workaround for arrays with more than 32
+                        // elements.
+                        for i in 0..$elem_count {
+                            assert_eq!(array_from_into_vec[i], array[i]);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/from/from_vector.rs b/vendor/packed_simd_2/src/api/from/from_vector.rs
new file mode 100644
index 000000000..55f70016d
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/from/from_vector.rs
@@ -0,0 +1,67 @@
+//! Implements `From` and `Into` for vector types.
+
+macro_rules! impl_from_vector {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
+     | $source:ident) => {
+        impl From<$source> for $id {
+            #[inline]
+            fn from(source: $source) -> Self {
+                fn static_assert_same_number_of_lanes<T, U>()
+                where
+                    T: crate::sealed::Simd,
+                    U: crate::sealed::Simd<LanesType = T::LanesType>,
+                {
+                }
+                use crate::llvm::simd_cast;
+                static_assert_same_number_of_lanes::<$id, $source>();
+                Simd(unsafe { simd_cast(source.0) })
+            }
+        }
+
+        // FIXME: `Into::into` is not inline, but due to the blanket impl in
+        // `std`, which is not marked `default`, we cannot override it here
+        // with specialization.
+
+        /*
+           impl Into<$id> for $source {
+               #[inline]
+               fn into(self) -> $id {
+                   unsafe { simd_cast(self) }
+               }
+           }
+        */
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _from_ $source>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn from() {
+                        assert_eq!($id::lanes(), $source::lanes());
+                        let source: $source = Default::default();
+                        let vec: $id = Default::default();
+
+                        let e = $id::from(source);
+                        assert_eq!(e, vec);
+
+                        let e: $id = source.into();
+                        assert_eq!(e, vec);
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_from_vectors {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
+     | $($source:ident),*) => {
+        $(
+            impl_from_vector!(
+                [$elem_ty; $elem_count]: $id | $test_tt | $source
+            );
+        )*
+    }
+}
diff --git a/vendor/packed_simd_2/src/api/hash.rs b/vendor/packed_simd_2/src/api/hash.rs
new file mode 100644
index 000000000..ee80eff93
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/hash.rs
@@ -0,0 +1,49 @@
+//! Implements `Hash` for vector types.
+
+macro_rules! impl_hash {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::hash::Hash for $id {
+            #[inline]
+            fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
+                unsafe {
+                    union A {
+                        data: [$elem_ty; $id::lanes()],
+                        vec: $id,
+                    }
+                    A { vec: *self }.data.hash(state)
+                }
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _hash>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn hash() {
+                        use crate::hash::{Hash, Hasher};
+                        #[allow(deprecated)]
+                        use crate::hash::{SipHasher13};
+                        type A = [$elem_ty; $id::lanes()];
+                        let a: A = [42 as $elem_ty; $id::lanes()];
+                        assert_eq!(
+                            crate::mem::size_of::<A>(),
+                            crate::mem::size_of::<$id>()
+                        );
+                        #[allow(deprecated)]
+                        let mut a_hash = SipHasher13::new();
+                        let mut v_hash = a_hash.clone();
+                        a.hash(&mut a_hash);
+
+                        // Integer within mantissa^1 range.
+                        #[allow(clippy::float_cmp)]
+                        let v = $id::splat(42 as $elem_ty);
+                        v.hash(&mut v_hash);
+                        assert_eq!(a_hash.finish(), v_hash.finish());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/into_bits.rs b/vendor/packed_simd_2/src/api/into_bits.rs
new file mode 100644
index 000000000..f2cc1bae5
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits.rs
@@ -0,0 +1,59 @@
+//! Implementation of `FromBits` and `IntoBits`.
+
+/// Safe lossless bitwise conversion from `T` to `Self`.
+pub trait FromBits<T>: crate::marker::Sized {
+    /// Safe lossless bitwise transmute from `T` to `Self`.
+    fn from_bits(t: T) -> Self;
+}
+
+/// Safe lossless bitwise conversion from `Self` to `T`.
+pub trait IntoBits<T>: crate::marker::Sized {
+    /// Safe lossless bitwise transmute from `self` to `T`.
+    fn into_bits(self) -> T;
+}
+
+/// `FromBits` implies `IntoBits`.
+impl<T, U> IntoBits<U> for T
+where
+    U: FromBits<T>,
+{
+    #[inline]
+    fn into_bits(self) -> U {
+        debug_assert!(
+            crate::mem::size_of::<Self>() == crate::mem::size_of::<U>()
+        );
+        U::from_bits(self)
+    }
+}
+
+/// `FromBits` and `IntoBits` are reflexive
+impl<T> FromBits<T> for T {
+    #[inline]
+    fn from_bits(t: Self) -> Self {
+        t
+    }
+}
+
+#[macro_use]
+mod macros;
+
+mod v16;
+pub use self::v16::*;
+
+mod v32;
+pub use self::v32::*;
+
+mod v64;
+pub use self::v64::*;
+
+mod v128;
+pub use self::v128::*;
+
+mod v256;
+pub use self::v256::*;
+
+mod v512;
+pub use self::v512::*;
+
+mod arch_specific;
+pub use self::arch_specific::*;
diff --git a/vendor/packed_simd_2/src/api/into_bits/arch_specific.rs b/vendor/packed_simd_2/src/api/into_bits/arch_specific.rs
new file mode 100644
index 000000000..fee614005
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits/arch_specific.rs
@@ -0,0 +1,189 @@
+//! `FromBits` and `IntoBits` between portable vector types and the
+//! architecture-specific vector types.
+#[rustfmt::skip]
+
+// FIXME: MIPS FromBits/IntoBits
+
+#[allow(unused)]
+use crate::*;
+
+/// This macro implements FromBits for the portable and the architecture
+/// specific vector types.
+///
+/// The "leaf" case is at the bottom, and the most generic case is at the top.
+/// The generic case is split into smaller cases recursively.
+macro_rules! impl_arch {
+    ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*],
+     $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* |
+     from: $($from_ty:ident),* | into: $($into_ty:ident),* |
+     test: $test_tt:tt) => {
+        impl_arch!(
+            [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] |
+            from: $($from_ty),* |
+            into: $($into_ty),* |
+            test: $test_tt
+        );
+        impl_arch!(
+            $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* |
+            from: $($from_ty),* |
+            into: $($into_ty),* |
+            test: $test_tt
+        );
+    };
+    ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] |
+     from: $($from_ty:ident),* | into: $($into_ty:ident),* |
+     test: $test_tt:tt) => {
+        // note: if target is "arm", "+v7,+neon" must be enabled
+        // and the std library must be recompiled with them
+        #[cfg(any(
+            not(target_arch = "arm"),
+            all(target_feature = "v7", target_feature = "neon",
+                any(feature = "core_arch", libcore_neon)))
+        )]
+        // note: if target is "powerpc", "altivec" must be enabled
+        // and the std library must be recompiled with it
+        #[cfg(any(
+            not(target_arch = "powerpc"),
+            all(target_feature = "altivec", feature = "core_arch"),
+        ))]
+        #[cfg(target_arch = $arch_tt)]
+        use crate::arch::$arch::{
+            $($arch_ty),*
+        };
+
+        #[cfg(any(
+            not(target_arch = "arm"),
+            all(target_feature = "v7", target_feature = "neon",
+                any(feature = "core_arch", libcore_neon)))
+        )]
+        #[cfg(any(
+            not(target_arch = "powerpc"),
+            all(target_feature = "altivec", feature = "core_arch"),
+        ))]
+        #[cfg(target_arch = $arch_tt)]
+        impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* |
+                   test: $test_tt);
+    };
+    ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),*
+     | $($into_ty:ident),* | test: $test_tt:tt) => {
+        impl_arch!($arch_head | $($from_ty),* | $($into_ty),* |
+                   test: $test_tt);
+        impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* |
+                   test: $test_tt);
+    };
+    ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* |
+     test: $test_tt:tt) => {
+        impl_from_bits!($arch_head[$test_tt]: $($from_ty),*);
+        impl_into_bits!($arch_head[$test_tt]: $($into_ty),*);
+    };
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementations for the 64-bit wide vector types:
+
+// FIXME: 64-bit single element types
+// FIXME: arm/aarch float16x4_t missing
+impl_arch!(
+    [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t,
+     poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t,
+     uint64x1_t],
+    [aarch64["aarch64"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t,
+     poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t,
+     float64x1_t] |
+    from: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2 |
+    into: i8x8, u8x8, i16x4, u16x4, i32x2, u32x2, f32x2 |
+    test: test_v64
+);
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementations for the 128-bit wide vector types:
+
+// FIXME: arm/aarch float16x8_t missing
+// FIXME: ppc vector_pixel missing
+// FIXME: ppc64 vector_Float16 missing
+// FIXME: ppc64 vector_signed_long_long missing
+// FIXME: ppc64 vector_unsigned_long_long missing
+// FIXME: ppc64 vector_bool_long_long missing
+// FIXME: ppc64 vector_signed___int128 missing
+// FIXME: ppc64 vector_unsigned___int128 missing
+impl_arch!(
+    [x86["x86"]: __m128, __m128i, __m128d],
+    [x86_64["x86_64"]:  __m128, __m128i, __m128d],
+    [arm["arm"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t,
+     poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t],
+    [aarch64["aarch64"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t,
+     uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t,
+     uint64x2_t, float64x2_t],
+    [powerpc["powerpc"]: vector_signed_char, vector_unsigned_char,
+     vector_signed_short, vector_unsigned_short, vector_signed_int,
+     vector_unsigned_int, vector_float],
+    [powerpc64["powerpc64"]: vector_signed_char, vector_unsigned_char,
+     vector_signed_short, vector_unsigned_short, vector_signed_int,
+     vector_unsigned_int,  vector_float, vector_signed_long,
+     vector_unsigned_long, vector_double] |
+    from: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4,
+    i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1 |
+    into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2,
+    i128x1, u128x1 |
+    test: test_v128
+);
+
+impl_arch!(
+    [powerpc["powerpc"]: vector_bool_char],
+    [powerpc64["powerpc64"]: vector_bool_char] |
+    from: m8x16, m16x8, m32x4, m64x2, m128x1 |
+    into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4,
+    i64x2, u64x2, f64x2, i128x1, u128x1,
+    // Masks:
+    m8x16 |
+    test: test_v128
+);
+
+impl_arch!(
+    [powerpc["powerpc"]: vector_bool_short],
+    [powerpc64["powerpc64"]: vector_bool_short] |
+    from: m16x8, m32x4, m64x2, m128x1 |
+    into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4,
+    i64x2, u64x2, f64x2, i128x1, u128x1,
+    // Masks:
+    m8x16, m16x8 |
+    test: test_v128
+);
+
+impl_arch!(
+    [powerpc["powerpc"]: vector_bool_int],
+    [powerpc64["powerpc64"]: vector_bool_int] |
+    from: m32x4, m64x2, m128x1 |
+    into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4,
+    i64x2, u64x2, f64x2, i128x1, u128x1,
+    // Masks:
+    m8x16, m16x8, m32x4 |
+    test: test_v128
+);
+
+impl_arch!(
+    [powerpc64["powerpc64"]: vector_bool_long] |
+    from: m64x2, m128x1 |
+    into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4,
+    i64x2, u64x2, f64x2, i128x1, u128x1,
+    // Masks:
+    m8x16, m16x8, m32x4, m64x2 |
+    test: test_v128
+);
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementations for the 256-bit wide vector types
+
+impl_arch!(
+    [x86["x86"]: __m256, __m256i, __m256d],
+    [x86_64["x86_64"]:  __m256, __m256i, __m256d] |
+    from: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16,
+    i32x8, u32x8, f32x8, m32x8,
+    i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2 |
+    into: i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, f32x8,
+    i64x4, u64x4, f64x4, i128x2, u128x2 |
+    test: test_v256
+);
+
+////////////////////////////////////////////////////////////////////////////////
+// FIXME: Implementations for the 512-bit wide vector types
diff --git a/vendor/packed_simd_2/src/api/into_bits/macros.rs b/vendor/packed_simd_2/src/api/into_bits/macros.rs
new file mode 100644
index 000000000..8cec5b004
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits/macros.rs
@@ -0,0 +1,74 @@
+//! Macros implementing `FromBits`
+
+macro_rules! impl_from_bits_ {
+    ($id:ident[$test_tt:tt]: $from_ty:ident) => {
+        impl crate::api::into_bits::FromBits<$from_ty> for $id {
+            #[inline]
+            fn from_bits(x: $from_ty) -> Self {
+                unsafe { crate::mem::transmute(x) }
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _from_bits_ $from_ty>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn test() {
+                        use crate::{
+                            ptr::{read_unaligned},
+                            mem::{size_of, zeroed}
+                        };
+                        use crate::IntoBits;
+                        assert_eq!(size_of::<$id>(),
+                                   size_of::<$from_ty>());
+                        // This is safe becasue we never create a reference to
+                        // uninitialized memory:
+                        let a: $from_ty = unsafe { zeroed() };
+
+                        let b_0: $id = crate::FromBits::from_bits(a);
+                        let b_1: $id = a.into_bits();
+
+                        // Check that these are byte-wise equal, that is,
+                        // that the bit patterns are identical:
+                        for i in 0..size_of::<$id>() {
+                            // This is safe because we only read initialized
+                            // memory in bounds. Also, taking a reference to
+                            // `b_i` is ok because the fields are initialized.
+                            unsafe {
+                                let b_0_v: u8 = read_unaligned(
+                                    (&b_0 as *const $id as *const u8)
+                                        .wrapping_add(i)
+                                );
+                                let b_1_v: u8 = read_unaligned(
+                                    (&b_1 as *const $id as *const u8)
+                                        .wrapping_add(i)
+                                );
+                                assert_eq!(b_0_v, b_1_v);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_from_bits {
+    ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+        $(
+            impl_from_bits_!($id[$test_tt]: $from_ty);
+        )*
+    }
+}
+
+#[allow(unused)]
+macro_rules! impl_into_bits {
+    ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+        $(
+            impl_from_bits_!($from_ty[$test_tt]: $id);
+        )*
+    }
+}
diff --git a/vendor/packed_simd_2/src/api/into_bits/v128.rs b/vendor/packed_simd_2/src/api/into_bits/v128.rs
new file mode 100644
index 000000000..e32cd7f9f
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits/v128.rs
@@ -0,0 +1,28 @@
+//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)]  // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x16[test_v128]: u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(u8x16[test_v128]: i8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1);
+
+impl_from_bits!(i16x8[test_v128]: i8x16, u8x16, m8x16, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(u16x8[test_v128]: i8x16, u8x16, m8x16, i16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1);
+
+impl_from_bits!(i32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(u32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(f32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(m32x4[test_v128]: m64x2, m128x1);
+
+impl_from_bits!(i64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(u64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(f64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, m64x2, i128x1, u128x1, m128x1);
+impl_from_bits!(m64x2[test_v128]: m128x1);
+
+impl_from_bits!(i128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, u128x1, m128x1);
+impl_from_bits!(u128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, m128x1);
+// note: m128x1 cannot be constructed from all the other masks bit patterns in here
+
diff --git a/vendor/packed_simd_2/src/api/into_bits/v16.rs b/vendor/packed_simd_2/src/api/into_bits/v16.rs
new file mode 100644
index 000000000..e44d0e7f9
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits/v16.rs
@@ -0,0 +1,9 @@
+//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)]  // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x2[test_v16]: u8x2, m8x2);
+impl_from_bits!(u8x2[test_v16]: i8x2, m8x2);
+// note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns
diff --git a/vendor/packed_simd_2/src/api/into_bits/v256.rs b/vendor/packed_simd_2/src/api/into_bits/v256.rs
new file mode 100644
index 000000000..c4c373e0d
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits/v256.rs
@@ -0,0 +1,27 @@
+//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)]  // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x32[test_v256]: u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(u8x32[test_v256]: i8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2);
+
+impl_from_bits!(i16x16[test_v256]: i8x32, u8x32, m8x32, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(u16x16[test_v256]: i8x32, u8x32, m8x32, i16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2);
+
+impl_from_bits!(i32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(u32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(f32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(m32x8[test_v256]: m64x4, m128x2);
+
+impl_from_bits!(i64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(u64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(f64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, m64x4, i128x2, u128x2, m128x2);
+impl_from_bits!(m64x4[test_v256]: m128x2);
+
+impl_from_bits!(i128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, u128x2, m128x2);
+impl_from_bits!(u128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, m128x2);
+// note: m128x2 cannot be constructed from all the other masks bit patterns in here
diff --git a/vendor/packed_simd_2/src/api/into_bits/v32.rs b/vendor/packed_simd_2/src/api/into_bits/v32.rs
new file mode 100644
index 000000000..5dba38a17
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits/v32.rs
@@ -0,0 +1,13 @@
+//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)]  // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2);
+impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2);
+impl_from_bits!(m8x4[test_v32]: m16x2);
+
+impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2);
+impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2);
+// note: m16x2 cannot be constructed from all m8x4 bit patterns
diff --git a/vendor/packed_simd_2/src/api/into_bits/v512.rs b/vendor/packed_simd_2/src/api/into_bits/v512.rs
new file mode 100644
index 000000000..4a771962c
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits/v512.rs
@@ -0,0 +1,27 @@
+//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)]  // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x64[test_v512]: u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(u8x64[test_v512]: i8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4);
+
+impl_from_bits!(i16x32[test_v512]: i8x64, u8x64, m8x64, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(u16x32[test_v512]: i8x64, u8x64, m8x64, i16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4);
+
+impl_from_bits!(i32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(u32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(f32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(m32x16[test_v512]: m64x8, m128x4);
+
+impl_from_bits!(i64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(u64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(f64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, m64x8, i128x4, u128x4, m128x4);
+impl_from_bits!(m64x8[test_v512]: m128x4);
+
+impl_from_bits!(i128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, u128x4, m128x4);
+impl_from_bits!(u128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, m128x4);
+// note: m128x4 cannot be constructed from all the other masks bit patterns in here
diff --git a/vendor/packed_simd_2/src/api/into_bits/v64.rs b/vendor/packed_simd_2/src/api/into_bits/v64.rs
new file mode 100644
index 000000000..5b065f1bd
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/into_bits/v64.rs
@@ -0,0 +1,18 @@
+//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)]  // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
+impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
+impl_from_bits!(m8x8[test_v64]: m16x4, m32x2);
+
+impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
+impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
+impl_from_bits!(m16x4[test_v64]: m32x2);
+
+impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2);
+impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2);
+impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2);
+// note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns
diff --git a/vendor/packed_simd_2/src/api/math.rs b/vendor/packed_simd_2/src/api/math.rs
new file mode 100644
index 000000000..e7a8d256b
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math.rs
@@ -0,0 +1,4 @@
+//! Implements vertical math operations
+
+#[macro_use]
+mod float;
diff --git a/vendor/packed_simd_2/src/api/math/float.rs b/vendor/packed_simd_2/src/api/math/float.rs
new file mode 100644
index 000000000..d5d2bee2e
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float.rs
@@ -0,0 +1,64 @@
+//! Implements vertical floating-point math operations.
+
+#[macro_use]
+mod abs;
+
+#[macro_use]
+mod consts;
+
+#[macro_use]
+mod cos;
+
+#[macro_use]
+mod exp;
+
+#[macro_use]
+mod powf;
+
+#[macro_use]
+mod ln;
+
+#[macro_use]
+mod mul_add;
+
+#[macro_use]
+mod mul_adde;
+
+#[macro_use]
+mod recpre;
+
+#[macro_use]
+mod rsqrte;
+
+#[macro_use]
+mod sin;
+
+#[macro_use]
+mod sqrt;
+
+#[macro_use]
+mod sqrte;
+
+#[macro_use]
+mod tanh;
+
+macro_rules! impl_float_category {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => {
+        impl $id {
+            #[inline]
+            pub fn is_nan(self) -> $mask_ty {
+                self.ne(self)
+            }
+
+            #[inline]
+            pub fn is_infinite(self) -> $mask_ty {
+                self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY)
+            }
+
+            #[inline]
+            pub fn is_finite(self) -> $mask_ty {
+                !(self.is_nan() | self.is_infinite())
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/abs.rs b/vendor/packed_simd_2/src/api/math/float/abs.rs
new file mode 100644
index 000000000..1865bdb68
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/abs.rs
@@ -0,0 +1,31 @@
+//! Implements vertical (lane-wise) floating-point `abs`.
+
+macro_rules! impl_math_float_abs {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Absolute value.
+            #[inline]
+            pub fn abs(self) -> Self {
+                use crate::codegen::math::float::abs::Abs;
+                Abs::abs(self)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_abs>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn abs() {
+                        let o = $id::splat(1 as $elem_ty);
+                        assert_eq!(o, o.abs());
+
+                        let mo = $id::splat(-1 as $elem_ty);
+                        assert_eq!(o, mo.abs());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/consts.rs b/vendor/packed_simd_2/src/api/math/float/consts.rs
new file mode 100644
index 000000000..89f93a6d6
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/consts.rs
@@ -0,0 +1,86 @@
+macro_rules! impl_float_consts {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident) => {
+        impl $id {
+            /// Machine epsilon value.
+            pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON);
+
+            /// Smallest finite value.
+            pub const MIN: $id = $id::splat(core::$elem_ty::MIN);
+
+            /// Smallest positive normal value.
+            pub const MIN_POSITIVE: $id =
+                $id::splat(core::$elem_ty::MIN_POSITIVE);
+
+            /// Largest finite value.
+            pub const MAX: $id = $id::splat(core::$elem_ty::MAX);
+
+            /// Not a Number (NaN).
+            pub const NAN: $id = $id::splat(core::$elem_ty::NAN);
+
+            /// Infinity (∞).
+            pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY);
+
+            /// Negative infinity (-∞).
+            pub const NEG_INFINITY: $id =
+                $id::splat(core::$elem_ty::NEG_INFINITY);
+
+            /// Archimedes' constant (π)
+            pub const PI: $id = $id::splat(core::$elem_ty::consts::PI);
+
+            /// π/2
+            pub const FRAC_PI_2: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_PI_2);
+
+            /// π/3
+            pub const FRAC_PI_3: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_PI_3);
+
+            /// π/4
+            pub const FRAC_PI_4: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_PI_4);
+
+            /// π/6
+            pub const FRAC_PI_6: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_PI_6);
+
+            /// π/8
+            pub const FRAC_PI_8: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_PI_8);
+
+            /// 1/π
+            pub const FRAC_1_PI: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_1_PI);
+
+            /// 2/π
+            pub const FRAC_2_PI: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_2_PI);
+
+            /// 2/sqrt(π)
+            pub const FRAC_2_SQRT_PI: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI);
+
+            /// sqrt(2)
+            pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2);
+
+            /// 1/sqrt(2)
+            pub const FRAC_1_SQRT_2: $id =
+                $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2);
+
+            /// Euler's number (e)
+            pub const E: $id = $id::splat(core::$elem_ty::consts::E);
+
+            /// log<sub>2</sub>(e)
+            pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E);
+
+            /// log<sub>10</sub>(e)
+            pub const LOG10_E: $id =
+                $id::splat(core::$elem_ty::consts::LOG10_E);
+
+            /// ln(2)
+            pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2);
+
+            /// ln(10)
+            pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10);
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/cos.rs b/vendor/packed_simd_2/src/api/math/float/cos.rs
new file mode 100644
index 000000000..e5b8f4603
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/cos.rs
@@ -0,0 +1,44 @@
+//! Implements vertical (lane-wise) floating-point `cos`.
+
+macro_rules! impl_math_float_cos {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Cosine.
+            #[inline]
+            pub fn cos(self) -> Self {
+                use crate::codegen::math::float::cos::Cos;
+                Cos::cos(self)
+            }
+
+            /// Cosine of `self * PI`.
+            #[inline]
+            pub fn cos_pi(self) -> Self {
+                use crate::codegen::math::float::cos_pi::CosPi;
+                CosPi::cos_pi(self)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_cos>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn cos() {
+                        use crate::$elem_ty::consts::PI;
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let p = $id::splat(PI as $elem_ty);
+                        let ph = $id::splat(PI as $elem_ty / 2.);
+                        let z_r = $id::splat((PI as $elem_ty / 2.).cos());
+                        let o_r = $id::splat((PI as $elem_ty).cos());
+
+                        assert_eq!(o, z.cos());
+                        assert_eq!(z_r, ph.cos());
+                        assert_eq!(o_r, p.cos());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/exp.rs b/vendor/packed_simd_2/src/api/math/float/exp.rs
new file mode 100644
index 000000000..e3356d853
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/exp.rs
@@ -0,0 +1,33 @@
+//! Implements vertical (lane-wise) floating-point `exp`.
+
+macro_rules! impl_math_float_exp {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Returns the exponential function of `self`: `e^(self)`.
+            #[inline]
+            pub fn exp(self) -> Self {
+                use crate::codegen::math::float::exp::Exp;
+                Exp::exp(self)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_exp>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn exp() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        assert_eq!(o, z.exp());
+
+                        let e = $id::splat(crate::f64::consts::E as $elem_ty);
+                        let tol = $id::splat(2.4e-4 as $elem_ty);
+                        assert!((e - o.exp()).abs().le(tol).all());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/ln.rs b/vendor/packed_simd_2/src/api/math/float/ln.rs
new file mode 100644
index 000000000..5ceb9173a
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/ln.rs
@@ -0,0 +1,33 @@
+//! Implements vertical (lane-wise) floating-point `ln`.
+
+macro_rules! impl_math_float_ln {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Returns the natural logarithm of `self`.
+            #[inline]
+            pub fn ln(self) -> Self {
+                use crate::codegen::math::float::ln::Ln;
+                Ln::ln(self)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_ln>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn ln() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        assert_eq!(z, o.ln());
+
+                        let e = $id::splat(crate::f64::consts::E as $elem_ty);
+                        let tol = $id::splat(2.4e-4 as $elem_ty);
+                        assert!((o - e.ln()).abs().le(tol).all());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/mul_add.rs b/vendor/packed_simd_2/src/api/math/float/mul_add.rs
new file mode 100644
index 000000000..4b170ee2b
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/mul_add.rs
@@ -0,0 +1,44 @@
+//! Implements vertical (lane-wise) floating-point `mul_add`.
+
+macro_rules! impl_math_float_mul_add {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Fused multiply add: `self * y + z`
+            #[inline]
+            pub fn mul_add(self, y: Self, z: Self) -> Self {
+                use crate::codegen::math::float::mul_add::MulAdd;
+                MulAdd::mul_add(self, y, z)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_mul_add>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn mul_add() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        let t3 = $id::splat(3 as $elem_ty);
+                        let f = $id::splat(4 as $elem_ty);
+
+                        assert_eq!(z, z.mul_add(z, z));
+                        assert_eq!(o, o.mul_add(o, z));
+                        assert_eq!(o, o.mul_add(z, o));
+                        assert_eq!(o, z.mul_add(o, o));
+
+                        assert_eq!(t, o.mul_add(o, o));
+                        assert_eq!(t, o.mul_add(t, z));
+                        assert_eq!(t, t.mul_add(o, z));
+
+                        assert_eq!(f, t.mul_add(t, z));
+                        assert_eq!(f, t.mul_add(o, t));
+                        assert_eq!(t3, t.mul_add(o, o));
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/mul_adde.rs b/vendor/packed_simd_2/src/api/math/float/mul_adde.rs
new file mode 100644
index 000000000..c5b27110f
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/mul_adde.rs
@@ -0,0 +1,48 @@
+//! Implements vertical (lane-wise) floating-point `mul_adde`.
+
+macro_rules! impl_math_float_mul_adde {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Fused multiply add estimate: ~= `self * y + z`
+            ///
+            /// While fused multiply-add (`fma`) has infinite precision,
+            /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add.
+            /// This might be more efficient on architectures that do not have an `fma` instruction.
+            #[inline]
+            pub fn mul_adde(self, y: Self, z: Self) -> Self {
+                use crate::codegen::math::float::mul_adde::MulAddE;
+                MulAddE::mul_adde(self, y, z)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_mul_adde>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn mul_adde() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        let t3 = $id::splat(3 as $elem_ty);
+                        let f = $id::splat(4 as $elem_ty);
+
+                        assert_eq!(z, z.mul_adde(z, z));
+                        assert_eq!(o, o.mul_adde(o, z));
+                        assert_eq!(o, o.mul_adde(z, o));
+                        assert_eq!(o, z.mul_adde(o, o));
+
+                        assert_eq!(t, o.mul_adde(o, o));
+                        assert_eq!(t, o.mul_adde(t, z));
+                        assert_eq!(t, t.mul_adde(o, z));
+
+                        assert_eq!(f, t.mul_adde(t, z));
+                        assert_eq!(f, t.mul_adde(o, t));
+                        assert_eq!(t3, t.mul_adde(o, o));
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/powf.rs b/vendor/packed_simd_2/src/api/math/float/powf.rs
new file mode 100644
index 000000000..83dc9ff9c
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/powf.rs
@@ -0,0 +1,36 @@
+//! Implements vertical (lane-wise) floating-point `powf`.
+
+macro_rules! impl_math_float_powf {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Raises `self` number to the floating point power of `x`.
+            #[inline]
+            pub fn powf(self, x: Self) -> Self {
+                use crate::codegen::math::float::powf::Powf;
+                Powf::powf(self, x)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_powf>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn powf() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        assert_eq!(o, o.powf(z));
+                        assert_eq!(o, t.powf(z));
+                        assert_eq!(o, o.powf(o));
+                        assert_eq!(t, t.powf(o));
+
+                        let f = $id::splat(4 as $elem_ty);
+                        assert_eq!(f, t.powf(t));
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/recpre.rs b/vendor/packed_simd_2/src/api/math/float/recpre.rs
new file mode 100644
index 000000000..127f0b2ff
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/recpre.rs
@@ -0,0 +1,36 @@
+//! Implements vertical (lane-wise) floating-point `recpre`.
+
+macro_rules! impl_math_float_recpre {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Reciprocal estimate: `~= 1. / self`.
+            ///
+            /// FIXME: The precision of the estimate is currently unspecified.
+            #[inline]
+            pub fn recpre(self) -> Self {
+                $id::splat(1.) / self
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_recpre>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn recpre() {
+                        let tol = $id::splat(2.4e-4 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let error = (o - o.recpre()).abs();
+                        assert!(error.le(tol).all());
+
+                        let t = $id::splat(2 as $elem_ty);
+                        let e = 0.5;
+                        let error = (e - t.recpre()).abs();
+                        assert!(error.le(tol).all());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/rsqrte.rs b/vendor/packed_simd_2/src/api/math/float/rsqrte.rs
new file mode 100644
index 000000000..c77977f7b
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/rsqrte.rs
@@ -0,0 +1,40 @@
+//! Implements vertical (lane-wise) floating-point `rsqrte`.
+
+macro_rules! impl_math_float_rsqrte {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`.
+            ///
+            /// FIXME: The precision of the estimate is currently unspecified.
+            #[inline]
+            pub fn rsqrte(self) -> Self {
+                unsafe {
+                    use crate::llvm::simd_fsqrt;
+                    $id::splat(1.) / Simd(simd_fsqrt(self.0))
+                }
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_rsqrte>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn rsqrte() {
+                        use crate::$elem_ty::consts::SQRT_2;
+                        let tol = $id::splat(2.4e-4 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let error = (o - o.rsqrte()).abs();
+                        assert!(error.le(tol).all());
+
+                        let t = $id::splat(2 as $elem_ty);
+                        let e = 1. / SQRT_2;
+                        let error = (e - t.rsqrte()).abs();
+                        assert!(error.le(tol).all());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/sin.rs b/vendor/packed_simd_2/src/api/math/float/sin.rs
new file mode 100644
index 000000000..49908319b
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/sin.rs
@@ -0,0 +1,50 @@
+//! Implements vertical (lane-wise) floating-point `sin`.
+
+macro_rules! impl_math_float_sin {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Sine.
+            #[inline]
+            pub fn sin(self) -> Self {
+                use crate::codegen::math::float::sin::Sin;
+                Sin::sin(self)
+            }
+
+            /// Sine of `self * PI`.
+            #[inline]
+            pub fn sin_pi(self) -> Self {
+                use crate::codegen::math::float::sin_pi::SinPi;
+                SinPi::sin_pi(self)
+            }
+
+            /// Sine and cosine of `self * PI`.
+            #[inline]
+            pub fn sin_cos_pi(self) -> (Self, Self) {
+                use crate::codegen::math::float::sin_cos_pi::SinCosPi;
+                SinCosPi::sin_cos_pi(self)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_sin>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn sin() {
+                        use crate::$elem_ty::consts::PI;
+                        let z = $id::splat(0 as $elem_ty);
+                        let p = $id::splat(PI as $elem_ty);
+                        let ph = $id::splat(PI as $elem_ty / 2.);
+                        let o_r = $id::splat((PI as $elem_ty / 2.).sin());
+                        let z_r = $id::splat((PI as $elem_ty).sin());
+
+                        assert_eq!(z, z.sin());
+                        assert_eq!(o_r, ph.sin());
+                        assert_eq!(z_r, p.sin());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/sqrt.rs b/vendor/packed_simd_2/src/api/math/float/sqrt.rs
new file mode 100644
index 000000000..ae624122d
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/sqrt.rs
@@ -0,0 +1,35 @@
+//! Implements vertical (lane-wise) floating-point `sqrt`.
+
+macro_rules! impl_math_float_sqrt {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            #[inline]
+            pub fn sqrt(self) -> Self {
+                use crate::codegen::math::float::sqrt::Sqrt;
+                Sqrt::sqrt(self)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_sqrt>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn sqrt() {
+                        use crate::$elem_ty::consts::SQRT_2;
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        assert_eq!(z, z.sqrt());
+                        assert_eq!(o, o.sqrt());
+
+                        let t = $id::splat(2 as $elem_ty);
+                        let e = $id::splat(SQRT_2);
+                        assert_eq!(e, t.sqrt());
+
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/sqrte.rs b/vendor/packed_simd_2/src/api/math/float/sqrte.rs
new file mode 100644
index 000000000..f7ffad748
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/sqrte.rs
@@ -0,0 +1,44 @@
+//! Implements vertical (lane-wise) floating-point `sqrte`.
+
+macro_rules! impl_math_float_sqrte {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Square-root estimate.
+            ///
+            /// FIXME: The precision of the estimate is currently unspecified.
+            #[inline]
+            pub fn sqrte(self) -> Self {
+                use crate::codegen::math::float::sqrte::Sqrte;
+                Sqrte::sqrte(self)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_sqrte>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn sqrte() {
+                        use crate::$elem_ty::consts::SQRT_2;
+                        let tol = $id::splat(2.4e-4 as $elem_ty);
+
+                        let z = $id::splat(0 as $elem_ty);
+                        let error = (z - z.sqrte()).abs();
+                        assert!(error.le(tol).all());
+
+                        let o = $id::splat(1 as $elem_ty);
+                        let error = (o - o.sqrte()).abs();
+                        assert!(error.le(tol).all());
+
+                        let t = $id::splat(2 as $elem_ty);
+                        let e = $id::splat(SQRT_2 as $elem_ty);
+                        let error = (e - t.sqrte()).abs();
+
+                        assert!(error.le(tol).all());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/math/float/tanh.rs b/vendor/packed_simd_2/src/api/math/float/tanh.rs
new file mode 100644
index 000000000..acfd93caa
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/math/float/tanh.rs
@@ -0,0 +1,29 @@
+//! Implements vertical (lane-wise) floating-point `tanh`.
+
+macro_rules! impl_math_float_tanh {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Tanh.
+            #[inline]
+            pub fn tanh(self) -> Self {
+                use crate::codegen::math::float::tanh::Tanh;
+                Tanh::tanh(self)
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _math_tanh>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn tanh() {
+                        let z = $id::splat(0 as $elem_ty);
+
+                        assert_eq!(z, z.tanh());
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/minimal.rs b/vendor/packed_simd_2/src/api/minimal.rs
new file mode 100644
index 000000000..840d9e325
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/minimal.rs
@@ -0,0 +1,6 @@
+#[macro_use]
+mod iuf;
+#[macro_use]
+mod mask;
+#[macro_use]
+mod ptr;
diff --git a/vendor/packed_simd_2/src/api/minimal/iuf.rs b/vendor/packed_simd_2/src/api/minimal/iuf.rs
new file mode 100644
index 000000000..a155ac178
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/minimal/iuf.rs
@@ -0,0 +1,169 @@
+//! Minimal API of signed integer, unsigned integer, and floating-point
+//! vectors.
+
+macro_rules! impl_minimal_iuf {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident |
+     $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
+
+        $(#[$doc])*
+        pub type $id = Simd<[$elem_ty; $elem_count]>;
+
+        impl sealed::Simd for $id {
+            type Element = $elem_ty;
+            const LANES: usize = $elem_count;
+            type LanesType = [u32; $elem_count];
+        }
+
+        impl $id {
+            /// Creates a new instance with each vector elements initialized
+            /// with the provided values.
+            #[inline]
+            #[allow(clippy::too_many_arguments)]
+            pub const fn new($($elem_name: $elem_ty),*) -> Self {
+                Simd(codegen::$id($($elem_name as $ielem_ty),*))
+            }
+
+            /// Returns the number of vector lanes.
+            #[inline]
+            pub const fn lanes() -> usize {
+                $elem_count
+            }
+
+            /// Constructs a new instance with each element initialized to
+            /// `value`.
+            #[inline]
+            pub const fn splat(value: $elem_ty) -> Self {
+                Simd(codegen::$id($({
+                    #[allow(non_camel_case_types, dead_code)]
+                    struct $elem_name;
+                    value as $ielem_ty
+                }),*))
+            }
+
+            /// Extracts the value at `index`.
+            ///
+            /// # Panics
+            ///
+            /// If `index >= Self::lanes()`.
+            #[inline]
+            pub fn extract(self, index: usize) -> $elem_ty {
+                assert!(index < $elem_count);
+                unsafe { self.extract_unchecked(index) }
+            }
+
+            /// Extracts the value at `index`.
+            ///
+            /// # Safety
+            ///
+            /// If `index >= Self::lanes()` the behavior is undefined.
+            #[inline]
+            pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty {
+                use crate::llvm::simd_extract;
+                let e: $ielem_ty = simd_extract(self.0, index as u32);
+                e as $elem_ty
+            }
+
+            /// Returns a new vector where the value at `index` is replaced by `new_value`.
+            ///
+            /// # Panics
+            ///
+            /// If `index >= Self::lanes()`.
+            #[inline]
+            #[must_use = "replace does not modify the original value - \
+                          it returns a new vector with the value at `index` \
+                          replaced by `new_value`d"
+            ]
+            pub fn replace(self, index: usize, new_value: $elem_ty) -> Self {
+                assert!(index < $elem_count);
+                unsafe { self.replace_unchecked(index, new_value) }
+            }
+
+            /// Returns a new vector where the value at `index` is replaced by `new_value`.
+            ///
+            /// # Safety
+            ///
+            /// If `index >= Self::lanes()` the behavior is undefined.
+            #[inline]
+            #[must_use = "replace_unchecked does not modify the original value - \
+                          it returns a new vector with the value at `index` \
+                          replaced by `new_value`d"
+            ]
+            pub unsafe fn replace_unchecked(
+                self,
+                index: usize,
+                new_value: $elem_ty,
+            ) -> Self {
+                use crate::llvm::simd_insert;
+                Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty))
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                pub mod [<$id _minimal>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn minimal() {
+                        // lanes:
+                        assert_eq!($elem_count, $id::lanes());
+
+                        // splat and extract / extract_unchecked:
+                        const VAL: $elem_ty = 7 as $elem_ty;
+                        const VEC: $id = $id::splat(VAL);
+                        for i in 0..$id::lanes() {
+                            assert_eq!(VAL, VEC.extract(i));
+                            assert_eq!(
+                                VAL, unsafe { VEC.extract_unchecked(i) }
+                            );
+                        }
+
+                        // replace / replace_unchecked
+                        let new_vec = VEC.replace(0, 42 as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            if i == 0 {
+                                assert_eq!(42 as $elem_ty, new_vec.extract(i));
+                            } else {
+                                assert_eq!(VAL, new_vec.extract(i));
+                            }
+                        }
+                        let new_vec = unsafe {
+                            VEC.replace_unchecked(0, 42 as $elem_ty)
+                        };
+                        for i in 0..$id::lanes() {
+                            if i == 0 {
+                                assert_eq!(42 as $elem_ty, new_vec.extract(i));
+                            } else {
+                                assert_eq!(VAL, new_vec.extract(i));
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn extract_panic_oob() {
+                        const VAL: $elem_ty = 7 as $elem_ty;
+                        const VEC: $id = $id::splat(VAL);
+                        let _ = VEC.extract($id::lanes());
+                    }
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn replace_panic_oob() {
+                        const VAL: $elem_ty = 7 as $elem_ty;
+                        const VEC: $id = $id::splat(VAL);
+                        let _ = VEC.replace($id::lanes(), 42 as $elem_ty);
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/vendor/packed_simd_2/src/api/minimal/mask.rs b/vendor/packed_simd_2/src/api/minimal/mask.rs
new file mode 100644
index 000000000..a420060b4
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/minimal/mask.rs
@@ -0,0 +1,176 @@
+//! Minimal API of mask vectors.
+
+macro_rules! impl_minimal_mask {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
+    | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
+        $(#[$doc])*
+        pub type $id = Simd<[$elem_ty; $elem_count]>;
+
+        impl sealed::Simd for $id {
+            type Element = $elem_ty;
+            const LANES: usize = $elem_count;
+            type LanesType = [u32; $elem_count];
+        }
+
+        impl $id {
+            /// Creates a new instance with each vector elements initialized
+            /// with the provided values.
+            #[inline]
+            #[allow(clippy::too_many_arguments)]
+            pub const fn new($($elem_name: bool),*) -> Self {
+                Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*))
+            }
+
+            /// Converts a boolean type into the type of the vector lanes.
+            #[inline]
+            #[allow(clippy::indexing_slicing)]
+            const fn bool_to_internal(x: bool) -> $ielem_ty {
+                [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize]
+            }
+
+            /// Returns the number of vector lanes.
+            #[inline]
+            pub const fn lanes() -> usize {
+                $elem_count
+            }
+
+            /// Constructs a new instance with each element initialized to
+            /// `value`.
+            #[inline]
+            pub const fn splat(value: bool) -> Self {
+                Simd(codegen::$id($({
+                    #[allow(non_camel_case_types, dead_code)]
+                    struct $elem_name;
+                    Self::bool_to_internal(value)
+                }),*))
+            }
+
+            /// Extracts the value at `index`.
+            ///
+            /// # Panics
+            ///
+            /// If `index >= Self::lanes()`.
+            #[inline]
+            pub fn extract(self, index: usize) -> bool {
+                assert!(index < $elem_count);
+                unsafe { self.extract_unchecked(index) }
+            }
+
+            /// Extracts the value at `index`.
+            ///
+            /// # Safety
+            ///
+            /// If `index >= Self::lanes()` the behavior is undefined.
+            #[inline]
+            pub unsafe fn extract_unchecked(self, index: usize) -> bool {
+                use crate::llvm::simd_extract;
+                let x: $ielem_ty = simd_extract(self.0, index as u32);
+                x != 0
+            }
+
+            /// Returns a new vector where the value at `index` is replaced by
+            /// `new_value`.
+            ///
+            /// # Panics
+            ///
+            /// If `index >= Self::lanes()`.
+            #[inline]
+            #[must_use = "replace does not modify the original value - \
+                          it returns a new vector with the value at `index` \
+                          replaced by `new_value`d"
+            ]
+            pub fn replace(self, index: usize, new_value: bool) -> Self {
+                assert!(index < $elem_count);
+                unsafe { self.replace_unchecked(index, new_value) }
+            }
+
+            /// Returns a new vector where the value at `index` is replaced by
+            /// `new_value`.
+            ///
+            /// # Safety
+            ///
+            /// If `index >= Self::lanes()` the behavior is undefined.
+            #[inline]
+            #[must_use = "replace_unchecked does not modify the original value - \
+                          it returns a new vector with the value at `index` \
+                          replaced by `new_value`d"
+            ]
+            pub unsafe fn replace_unchecked(
+                self,
+                index: usize,
+                new_value: bool,
+            ) -> Self {
+                use crate::llvm::simd_insert;
+                Simd(simd_insert(self.0, index as u32,
+                                 Self::bool_to_internal(new_value)))
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _minimal>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn minimal() {
+                        // TODO: test new
+
+                        // lanes:
+                        assert_eq!($elem_count, $id::lanes());
+
+                        // splat and extract / extract_unchecked:
+                        let vec = $id::splat(true);
+                        for i in 0..$id::lanes() {
+                            assert_eq!(true, vec.extract(i));
+                            assert_eq!(true,
+                                       unsafe { vec.extract_unchecked(i) }
+                            );
+                        }
+
+                        // replace / replace_unchecked
+                        let new_vec = vec.replace(0, false);
+                        for i in 0..$id::lanes() {
+                            if i == 0 {
+                                assert_eq!(false, new_vec.extract(i));
+                            } else {
+                                assert_eq!(true, new_vec.extract(i));
+                            }
+                        }
+                        let new_vec = unsafe {
+                            vec.replace_unchecked(0, false)
+                        };
+                        for i in 0..$id::lanes() {
+                            if i == 0 {
+                                assert_eq!(false, new_vec.extract(i));
+                            } else {
+                                assert_eq!(true, new_vec.extract(i));
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn extract_panic_oob() {
+                        let vec = $id::splat(false);
+                        let _ = vec.extract($id::lanes());
+                    }
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn replace_panic_oob() {
+                        let vec = $id::splat(false);
+                        let _ = vec.replace($id::lanes(), true);
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/vendor/packed_simd_2/src/api/minimal/ptr.rs b/vendor/packed_simd_2/src/api/minimal/ptr.rs
new file mode 100644
index 000000000..c3d61fbf6
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/minimal/ptr.rs
@@ -0,0 +1,1377 @@
+//! Minimal API of pointer vectors.
+
+macro_rules! impl_minimal_p {
+    ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident,
+     $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt
+     | $($elem_name:ident),+ | ($true:expr, $false:expr) |
+     $(#[$doc:meta])*) => {
+
+        $(#[$doc])*
+        pub type $id<T> = Simd<[$elem_ty; $elem_count]>;
+
+        impl<T> sealed::Simd for $id<T> {
+            type Element = $elem_ty;
+            const LANES: usize = $elem_count;
+            type LanesType = [u32; $elem_count];
+        }
+
+        impl<T> $id<T> {
+            /// Creates a new instance with each vector elements initialized
+            /// with the provided values.
+            #[inline]
+            #[allow(clippy::too_many_arguments)]
+            pub const fn new($($elem_name: $elem_ty),*) -> Self {
+                Simd(codegen::$id($($elem_name),*))
+            }
+
+            /// Returns the number of vector lanes.
+            #[inline]
+            pub const fn lanes() -> usize {
+                $elem_count
+            }
+
+            /// Constructs a new instance with each element initialized to
+            /// `value`.
+            #[inline]
+            pub const fn splat(value: $elem_ty) -> Self {
+                Simd(codegen::$id($({
+                    #[allow(non_camel_case_types, dead_code)]
+                    struct $elem_name;
+                    value
+                }),*))
+            }
+
+            /// Constructs a new instance with each element initialized to
+            /// `null`.
+            #[inline]
+            pub const fn null() -> Self {
+                Self::splat(crate::ptr::null_mut() as $elem_ty)
+            }
+
+            /// Returns a mask that selects those lanes that contain `null`
+            /// pointers.
+            #[inline]
+            pub fn is_null(self) -> $mask_ty {
+                self.eq(Self::null())
+            }
+
+            /// Extracts the value at `index`.
+            ///
+            /// # Panics
+            ///
+            /// If `index >= Self::lanes()`.
+            #[inline]
+            pub fn extract(self, index: usize) -> $elem_ty {
+                assert!(index < $elem_count);
+                unsafe { self.extract_unchecked(index) }
+            }
+
+            /// Extracts the value at `index`.
+            ///
+            /// # Safety
+            ///
+            /// If `index >= Self::lanes()` the behavior is undefined.
+            #[inline]
+            pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty {
+                use crate::llvm::simd_extract;
+                simd_extract(self.0, index as u32)
+            }
+
+            /// Returns a new vector where the value at `index` is replaced by
+            /// `new_value`.
+            ///
+            /// # Panics
+            ///
+            /// If `index >= Self::lanes()`.
+            #[inline]
+            #[must_use = "replace does not modify the original value - \
+                          it returns a new vector with the value at `index` \
+                          replaced by `new_value`d"
+            ]
+            #[allow(clippy::not_unsafe_ptr_arg_deref)]
+            pub fn replace(self, index: usize, new_value: $elem_ty) -> Self {
+                assert!(index < $elem_count);
+                unsafe { self.replace_unchecked(index, new_value) }
+            }
+
+            /// Returns a new vector where the value at `index` is replaced by `new_value`.
+            ///
+            /// # Safety
+            ///
+            /// If `index >= Self::lanes()` the behavior is undefined.
+            #[inline]
+            #[must_use = "replace_unchecked does not modify the original value - \
+                          it returns a new vector with the value at `index` \
+                          replaced by `new_value`d"
+            ]
+            pub unsafe fn replace_unchecked(
+                self,
+                index: usize,
+                new_value: $elem_ty,
+            ) -> Self {
+                use crate::llvm::simd_insert;
+                Simd(simd_insert(self.0, index as u32, new_value))
+            }
+        }
+
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _minimal>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn minimal() {
+                        // lanes:
+                        assert_eq!($elem_count, $id::<i32>::lanes());
+
+                        // splat and extract / extract_unchecked:
+                        let VAL7: <$id<i32> as sealed::Simd>::Element
+                            = $ref!(7);
+                        let VAL42: <$id<i32> as sealed::Simd>::Element
+                            = $ref!(42);
+                        let VEC: $id<i32> = $id::splat(VAL7);
+                        for i in 0..$id::<i32>::lanes() {
+                            assert_eq!(VAL7, VEC.extract(i));
+                            assert_eq!(
+                                VAL7, unsafe { VEC.extract_unchecked(i) }
+                            );
+                        }
+
+                        // replace / replace_unchecked
+                        let new_vec = VEC.replace(0, VAL42);
+                        for i in 0..$id::<i32>::lanes() {
+                            if i == 0 {
+                                assert_eq!(VAL42, new_vec.extract(i));
+                            } else {
+                                assert_eq!(VAL7, new_vec.extract(i));
+                            }
+                        }
+                        let new_vec = unsafe {
+                            VEC.replace_unchecked(0, VAL42)
+                        };
+                        for i in 0..$id::<i32>::lanes() {
+                            if i == 0 {
+                                assert_eq!(VAL42, new_vec.extract(i));
+                            } else {
+                                assert_eq!(VAL7, new_vec.extract(i));
+                            }
+                        }
+
+                        let mut n = $id::<i32>::null();
+                        assert_eq!(
+                            n,
+                            $id::<i32>::splat(unsafe { crate::mem::zeroed() })
+                        );
+                        assert!(n.is_null().all());
+                        n = n.replace(
+                            0, unsafe { crate::mem::transmute(1_isize) }
+                        );
+                        assert!(!n.is_null().all());
+                        if $id::<i32>::lanes() > 1 {
+                            assert!(n.is_null().any());
+                        } else {
+                            assert!(!n.is_null().any());
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn extract_panic_oob() {
+                        let VAL: <$id<i32> as sealed::Simd>::Element
+                            = $ref!(7);
+                        let VEC: $id<i32> = $id::splat(VAL);
+                        let _ = VEC.extract($id::<i32>::lanes());
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn replace_panic_oob() {
+                        let VAL: <$id<i32> as sealed::Simd>::Element
+                            = $ref!(7);
+                        let VAL42: <$id<i32> as sealed::Simd>::Element
+                            = $ref!(42);
+                        let VEC: $id<i32> = $id::splat(VAL);
+                        let _ = VEC.replace($id::<i32>::lanes(), VAL42);
+                    }
+                }
+            }
+        }
+
+        impl<T> crate::fmt::Debug for $id<T> {
+            #[allow(clippy::missing_inline_in_public_items)]
+            fn fmt(&self, f: &mut crate::fmt::Formatter<'_>)
+                   -> crate::fmt::Result {
+                write!(
+                    f,
+                    "{}<{}>(",
+                    stringify!($id),
+                    crate::intrinsics::type_name::<T>()
+                )?;
+                for i in 0..$elem_count {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    self.extract(i).fmt(f)?;
+                }
+                write!(f, ")")
+            }
+        }
+
+         test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _fmt_debug>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn debug() {
+                        use arrayvec::{ArrayString,ArrayVec};
+                        type TinyString = ArrayString<[u8; 512]>;
+
+                        use crate::fmt::Write;
+                        let v = $id::<i32>::default();
+                        let mut s = TinyString::new();
+                        write!(&mut s, "{:?}", v).unwrap();
+
+                        let mut beg = TinyString::new();
+                        write!(&mut beg, "{}<i32>(", stringify!($id)).unwrap();
+                        assert!(
+                            s.starts_with(beg.as_str()),
+                            "s = {} (should start with = {})", s, beg
+                        );
+                        assert!(s.ends_with(")"));
+                        let s: ArrayVec<[TinyString; 64]>
+                            = s.replace(beg.as_str(), "")
+                            .replace(")", "").split(",")
+                            .map(|v| TinyString::from(v.trim()).unwrap())
+                            .collect();
+                        assert_eq!(s.len(), $id::<i32>::lanes());
+                        for (index, ss) in s.into_iter().enumerate() {
+                            let mut e = TinyString::new();
+                            write!(&mut e, "{:?}", v.extract(index)).unwrap();
+                            assert_eq!(ss, e);
+                        }
+                    }
+                }
+            }
+         }
+
+        impl<T> Default for $id<T> {
+            #[inline]
+            fn default() -> Self {
+                // FIXME: ptrs do not implement default
+                Self::null()
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _default>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn default() {
+                        let a = $id::<i32>::default();
+                        for i in 0..$id::<i32>::lanes() {
+                            assert_eq!(
+                                a.extract(i), unsafe { crate::mem::zeroed() }
+                            );
+                        }
+                    }
+                }
+            }
+        }
+
+        impl<T> $id<T> {
+            /// Lane-wise equality comparison.
+            #[inline]
+            pub fn eq(self, other: Self) -> $mask_ty {
+                unsafe {
+                    use crate::llvm::simd_eq;
+                    let a: $usize_ty = crate::mem::transmute(self);
+                    let b: $usize_ty = crate::mem::transmute(other);
+                    Simd(simd_eq(a.0, b.0))
+                }
+            }
+
+            /// Lane-wise inequality comparison.
+            #[inline]
+            pub fn ne(self, other: Self) -> $mask_ty {
+                unsafe {
+                    use crate::llvm::simd_ne;
+                    let a: $usize_ty = crate::mem::transmute(self);
+                    let b: $usize_ty = crate::mem::transmute(other);
+                    Simd(simd_ne(a.0, b.0))
+                }
+            }
+
+            /// Lane-wise less-than comparison.
+            #[inline]
+            pub fn lt(self, other: Self) -> $mask_ty {
+                unsafe {
+                    use crate::llvm::simd_lt;
+                    let a: $usize_ty = crate::mem::transmute(self);
+                    let b: $usize_ty = crate::mem::transmute(other);
+                    Simd(simd_lt(a.0, b.0))
+                }
+            }
+
+            /// Lane-wise less-than-or-equals comparison.
+            #[inline]
+            pub fn le(self, other: Self) -> $mask_ty {
+                unsafe {
+                    use crate::llvm::simd_le;
+                    let a: $usize_ty = crate::mem::transmute(self);
+                    let b: $usize_ty = crate::mem::transmute(other);
+                    Simd(simd_le(a.0, b.0))
+                }
+            }
+
+            /// Lane-wise greater-than comparison.
+            #[inline]
+            pub fn gt(self, other: Self) -> $mask_ty {
+                unsafe {
+                    use crate::llvm::simd_gt;
+                    let a: $usize_ty = crate::mem::transmute(self);
+                    let b: $usize_ty = crate::mem::transmute(other);
+                    Simd(simd_gt(a.0, b.0))
+                }
+            }
+
+            /// Lane-wise greater-than-or-equals comparison.
+            #[inline]
+            pub fn ge(self, other: Self) -> $mask_ty {
+                unsafe {
+                    use crate::llvm::simd_ge;
+                    let a: $usize_ty = crate::mem::transmute(self);
+                    let b: $usize_ty = crate::mem::transmute(other);
+                    Simd(simd_ge(a.0, b.0))
+                }
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_vertical>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn cmp() {
+                        let a = $id::<i32>::null();
+                        let b = $id::<i32>::splat(unsafe {
+                            crate::mem::transmute(1_isize)
+                        });
+
+                        let r = a.lt(b);
+                        let e = $mask_ty::splat(true);
+                        assert!(r == e);
+                        let r = a.le(b);
+                        assert!(r == e);
+
+                        let e = $mask_ty::splat(false);
+                        let r = a.gt(b);
+                        assert!(r == e);
+                        let r = a.ge(b);
+                        assert!(r == e);
+                        let r = a.eq(b);
+                        assert!(r == e);
+
+                        let mut a = a;
+                        let mut b = b;
+                        let mut e = e;
+                        for i in 0..$id::<i32>::lanes() {
+                            if i % 2 == 0 {
+                                a = a.replace(
+                                    i,
+                                    unsafe { crate::mem::transmute(0_isize) }
+                                );
+                                b = b.replace(
+                                    i,
+                                    unsafe { crate::mem::transmute(1_isize) }
+                                );
+                                e = e.replace(i, true);
+                            } else {
+                                a = a.replace(
+                                    i,
+                                    unsafe { crate::mem::transmute(1_isize) }
+                                );
+                                b = b.replace(
+                                    i,
+                                    unsafe { crate::mem::transmute(0_isize) }
+                                );
+                                e = e.replace(i, false);
+                            }
+                        }
+                        let r = a.lt(b);
+                        assert!(r == e);
+                    }
+                }
+            }
+        }
+
+        #[allow(clippy::partialeq_ne_impl)]
+        impl<T> crate::cmp::PartialEq<$id<T>> for $id<T> {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                $id::<T>::eq(*self, *other).all()
+            }
+            #[inline]
+            fn ne(&self, other: &Self) -> bool {
+                $id::<T>::ne(*self, *other).any()
+            }
+        }
+
+        // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
+        #[allow(clippy::partialeq_ne_impl)]
+        impl<T> crate::cmp::PartialEq<LexicographicallyOrdered<$id<T>>>
+            for LexicographicallyOrdered<$id<T>>
+        {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                self.0 == other.0
+            }
+            #[inline]
+            fn ne(&self, other: &Self) -> bool {
+                self.0 != other.0
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_PartialEq>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn partial_eq() {
+                        let a = $id::<i32>::null();
+                        let b = $id::<i32>::splat(unsafe {
+                            crate::mem::transmute(1_isize)
+                        });
+
+                        assert!(a != b);
+                        assert!(!(a == b));
+                        assert!(a == a);
+                        assert!(!(a != a));
+
+                        if $id::<i32>::lanes() > 1 {
+                            let a = $id::<i32>::null().replace(0, unsafe {
+                                crate::mem::transmute(1_isize)
+                            });
+                            let b = $id::<i32>::splat(unsafe {
+                                crate::mem::transmute(1_isize)
+                            });
+
+                            assert!(a != b);
+                            assert!(!(a == b));
+                            assert!(a == a);
+                            assert!(!(a != a));
+                        }
+                    }
+                }
+            }
+        }
+
+        impl<T> crate::cmp::Eq for $id<T> {}
+        impl<T> crate::cmp::Eq for LexicographicallyOrdered<$id<T>> {}
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _cmp_eq>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn eq() {
+                        fn foo<E: crate::cmp::Eq>(_: E) {}
+                        let a = $id::<i32>::null();
+                        foo(a);
+                    }
+                }
+            }
+        }
+
+        impl<T> From<[$elem_ty; $elem_count]> for $id<T> {
+            #[inline]
+            fn from(array: [$elem_ty; $elem_count]) -> Self {
+                unsafe {
+                    // FIXME: unnecessary zeroing; better than UB.
+                    let mut u: Self = crate::mem::zeroed();
+                    crate::ptr::copy_nonoverlapping(
+                        &array as *const [$elem_ty; $elem_count] as *const u8,
+                        &mut u as *mut Self as *mut u8,
+                        crate::mem::size_of::<Self>()
+                    );
+                    u
+                }
+            }
+        }
+        impl<T> Into<[$elem_ty; $elem_count]> for $id<T> {
+            #[inline]
+            fn into(self) -> [$elem_ty; $elem_count] {
+                unsafe {
+                    // FIXME: unnecessary zeroing; better than UB.
+                    let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed();
+                    crate::ptr::copy_nonoverlapping(
+                        &self as *const $id<T> as *const u8,
+                        &mut u as *mut [$elem_ty; $elem_count] as *mut u8,
+                        crate::mem::size_of::<Self>()
+                    );
+                    u
+                }
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _from>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn array() {
+                        let values = [1_i32; $elem_count];
+
+                        let mut vec: $id<i32> = Default::default();
+                        let mut array = [
+                            $id::<i32>::null().extract(0); $elem_count
+                        ];
+
+                        for i in 0..$elem_count {
+                            let ptr = &values[i] as *const i32 as *mut i32;
+                            vec = vec.replace(i, ptr);
+                            array[i] = ptr;
+                        }
+
+                        // FIXME: there is no impl of From<$id<T>> for [$elem_ty; N]
+                        // let a0 = From::from(vec);
+                        // assert_eq!(a0, array);
+                        #[allow(unused_assignments)]
+                        let mut a1 = array;
+                        a1 = vec.into();
+                        assert_eq!(a1, array);
+
+                        let v0: $id<i32> = From::from(array);
+                        assert_eq!(v0, vec);
+                        let v1: $id<i32> = array.into();
+                        assert_eq!(v1, vec);
+                    }
+                }
+            }
+        }
+
+        impl<T> $id<T> {
+            /// Instantiates a new vector with the values of the `slice`.
+            ///
+            /// # Panics
+            ///
+            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
+            /// to an `align_of::<Self>()` boundary.
+            #[inline]
+            pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self {
+                unsafe {
+                    assert!(slice.len() >= $elem_count);
+                    let target_ptr = slice.get_unchecked(0) as *const $elem_ty;
+                    assert!(
+                        target_ptr.align_offset(crate::mem::align_of::<Self>())
+                            == 0
+                    );
+                    Self::from_slice_aligned_unchecked(slice)
+                }
+            }
+
+            /// Instantiates a new vector with the values of the `slice`.
+            ///
+            /// # Panics
+            ///
+            /// If `slice.len() < Self::lanes()`.
+            #[inline]
+            pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self {
+                unsafe {
+                    assert!(slice.len() >= $elem_count);
+                    Self::from_slice_unaligned_unchecked(slice)
+                }
+            }
+
+            /// Instantiates a new vector with the values of the `slice`.
+            ///
+            /// # Safety
+            ///
+            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
+            /// to an `align_of::<Self>()` boundary, the behavior is undefined.
+            #[inline]
+            pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty])
+                                                       -> Self {
+                #[allow(clippy::cast_ptr_alignment)]
+                *(slice.get_unchecked(0) as *const $elem_ty as *const Self)
+            }
+
+            /// Instantiates a new vector with the values of the `slice`.
+            ///
+            /// # Safety
+            ///
+            /// If `slice.len() < Self::lanes()` the behavior is undefined.
+            #[inline]
+            pub unsafe fn from_slice_unaligned_unchecked(
+                slice: &[$elem_ty],
+            ) -> Self {
+                use crate::mem::size_of;
+                let target_ptr =
+                    slice.get_unchecked(0) as *const $elem_ty as *const u8;
+                let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty);
+                let self_ptr = &mut x as *mut Self as *mut u8;
+                crate::ptr::copy_nonoverlapping(
+                    target_ptr,
+                    self_ptr,
+                    size_of::<Self>(),
+                );
+                x
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _slice_from_slice>] {
+                    use super::*;
+                    use crate::iter::Iterator;
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn from_slice_unaligned() {
+                        let (null, non_null) = ptr_vals!($id<i32>);
+
+                        let mut unaligned = [
+                            non_null; $id::<i32>::lanes() + 1
+                        ];
+                        unaligned[0] = null;
+                        let vec = $id::<i32>::from_slice_unaligned(
+                            &unaligned[1..]
+                        );
+                        for (index, &b) in unaligned.iter().enumerate() {
+                            if index == 0 {
+                                assert_eq!(b, null);
+                            } else {
+                                assert_eq!(b, non_null);
+                                assert_eq!(b, vec.extract(index - 1));
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn from_slice_unaligned_fail() {
+                        let (_null, non_null) = ptr_vals!($id<i32>);
+                        let unaligned = [non_null; $id::<i32>::lanes() + 1];
+                        // the slice is not large enough => panic
+                        let _vec = $id::<i32>::from_slice_unaligned(
+                            &unaligned[2..]
+                        );
+                    }
+
+                    union A {
+                        data: [<$id<i32> as sealed::Simd>::Element;
+                               2 * $id::<i32>::lanes()],
+                        _vec: $id<i32>,
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn from_slice_aligned() {
+                        let (null, non_null) = ptr_vals!($id<i32>);
+                        let mut aligned = A {
+                            data: [null; 2 * $id::<i32>::lanes()],
+                        };
+                        for i in
+                            $id::<i32>::lanes()..(2 * $id::<i32>::lanes()) {
+                            unsafe {
+                                aligned.data[i] = non_null;
+                            }
+                        }
+
+                        let vec = unsafe {
+                            $id::<i32>::from_slice_aligned(
+                                &aligned.data[$id::<i32>::lanes()..]
+                            )
+                        };
+                        for (index, &b) in unsafe {
+                            aligned.data.iter().enumerate()
+                        } {
+                            if index < $id::<i32>::lanes() {
+                                assert_eq!(b, null);
+                            } else {
+                                assert_eq!(b, non_null);
+                                assert_eq!(
+                                    b, vec.extract(index - $id::<i32>::lanes())
+                                );
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn from_slice_aligned_fail_lanes() {
+                        let (_null, non_null) = ptr_vals!($id<i32>);
+                        let aligned = A {
+                            data: [non_null; 2 * $id::<i32>::lanes()],
+                        };
+                        // the slice is not large enough => panic
+                        let _vec = unsafe {
+                            $id::<i32>::from_slice_aligned(
+                                &aligned.data[2 * $id::<i32>::lanes()..]
+                            )
+                        };
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn from_slice_aligned_fail_align() {
+                        unsafe {
+                            let (null, _non_null) = ptr_vals!($id<i32>);
+                            let aligned = A {
+                                data: [null; 2 * $id::<i32>::lanes()],
+                            };
+
+                            // get a pointer to the front of data
+                            let ptr = aligned.data.as_ptr();
+                            // offset pointer by one element
+                            let ptr = ptr.wrapping_add(1);
+
+                            if ptr.align_offset(
+                                crate::mem::align_of::<$id<i32>>()
+                            ) == 0 {
+                                // the pointer is properly aligned, so
+                                // from_slice_aligned won't fail here (e.g. this
+                                // can happen for i128x1). So we panic to make
+                                // the "should_fail" test pass:
+                                panic!("ok");
+                            }
+
+                            // create a slice - this is safe, because the
+                            // elements of the slice exist, are properly
+                            // initialized, and properly aligned:
+                            let s = slice::from_raw_parts(
+                                ptr, $id::<i32>::lanes()
+                            );
+                            // this should always panic because the slice
+                            // alignment does not match the alignment
+                            // requirements for the vector type:
+                            let _vec = $id::<i32>::from_slice_aligned(s);
+                        }
+                    }
+                }
+            }
+        }
+
+        impl<T> $id<T> {
+            /// Writes the values of the vector to the `slice`.
+            ///
+            /// # Panics
+            ///
+            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
+            /// aligned to an `align_of::<Self>()` boundary.
+            #[inline]
+            pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) {
+                unsafe {
+                    assert!(slice.len() >= $elem_count);
+                    let target_ptr =
+                        slice.get_unchecked_mut(0) as *mut $elem_ty;
+                    assert!(
+                        target_ptr.align_offset(crate::mem::align_of::<Self>())
+                            == 0
+                    );
+                    self.write_to_slice_aligned_unchecked(slice);
+                }
+            }
+
+            /// Writes the values of the vector to the `slice`.
+            ///
+            /// # Panics
+            ///
+            /// If `slice.len() < Self::lanes()`.
+            #[inline]
+            pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) {
+                unsafe {
+                    assert!(slice.len() >= $elem_count);
+                    self.write_to_slice_unaligned_unchecked(slice);
+                }
+            }
+
+            /// Writes the values of the vector to the `slice`.
+            ///
+            /// # Safety
+            ///
+            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
+            /// aligned to an `align_of::<Self>()` boundary, the behavior is
+            /// undefined.
+            #[inline]
+            pub unsafe fn write_to_slice_aligned_unchecked(
+                self, slice: &mut [$elem_ty],
+            ) {
+                #[allow(clippy::cast_ptr_alignment)]
+                *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) =
+                    self;
+            }
+
+            /// Writes the values of the vector to the `slice`.
+            ///
+            /// # Safety
+            ///
+            /// If `slice.len() < Self::lanes()` the behavior is undefined.
+            #[inline]
+            pub unsafe fn write_to_slice_unaligned_unchecked(
+                self, slice: &mut [$elem_ty],
+            ) {
+                let target_ptr =
+                    slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8;
+                let self_ptr = &self as *const Self as *const u8;
+                crate::ptr::copy_nonoverlapping(
+                    self_ptr,
+                    target_ptr,
+                    crate::mem::size_of::<Self>(),
+                );
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _slice_write_to_slice>] {
+                    use super::*;
+                    use crate::iter::Iterator;
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn write_to_slice_unaligned() {
+                        let (null, non_null) = ptr_vals!($id<i32>);
+                        let mut unaligned = [null; $id::<i32>::lanes() + 1];
+                        let vec = $id::<i32>::splat(non_null);
+                        vec.write_to_slice_unaligned(&mut unaligned[1..]);
+                        for (index, &b) in unaligned.iter().enumerate() {
+                            if index == 0 {
+                                assert_eq!(b, null);
+                            } else {
+                                assert_eq!(b, non_null);
+                                assert_eq!(b, vec.extract(index - 1));
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn write_to_slice_unaligned_fail() {
+                        let (null, non_null) = ptr_vals!($id<i32>);
+                        let mut unaligned = [null; $id::<i32>::lanes() + 1];
+                        let vec = $id::<i32>::splat(non_null);
+                        // the slice is not large enough => panic
+                        vec.write_to_slice_unaligned(&mut unaligned[2..]);
+                    }
+
+                    union A {
+                        data: [<$id<i32> as sealed::Simd>::Element;
+                               2 * $id::<i32>::lanes()],
+                        _vec: $id<i32>,
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn write_to_slice_aligned() {
+                        let (null, non_null) = ptr_vals!($id<i32>);
+                        let mut aligned = A {
+                            data: [null; 2 * $id::<i32>::lanes()],
+                        };
+                        let vec = $id::<i32>::splat(non_null);
+                        unsafe {
+                            vec.write_to_slice_aligned(
+                                &mut aligned.data[$id::<i32>::lanes()..]
+                            )
+                        };
+                        for (index, &b) in
+                            unsafe { aligned.data.iter().enumerate() } {
+                            if index < $id::<i32>::lanes() {
+                                assert_eq!(b, null);
+                            } else {
+                                assert_eq!(b, non_null);
+                                assert_eq!(
+                                    b, vec.extract(index - $id::<i32>::lanes())
+                                );
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn write_to_slice_aligned_fail_lanes() {
+                        let (null, non_null) = ptr_vals!($id<i32>);
+                        let mut aligned = A {
+                            data: [null; 2 * $id::<i32>::lanes()],
+                        };
+                        let vec = $id::<i32>::splat(non_null);
+                        // the slice is not large enough => panic
+                        unsafe {
+                            vec.write_to_slice_aligned(
+                                &mut aligned.data[2 * $id::<i32>::lanes()..]
+                            )
+                        };
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn write_to_slice_aligned_fail_align() {
+                        let (null, non_null) = ptr_vals!($id<i32>);
+                        unsafe {
+                            let mut aligned = A {
+                                data: [null; 2 * $id::<i32>::lanes()],
+                            };
+
+                            // get a pointer to the front of data
+                            let ptr = aligned.data.as_mut_ptr();
+                            // offset pointer by one element
+                            let ptr = ptr.wrapping_add(1);
+
+                            if ptr.align_offset(
+                                crate::mem::align_of::<$id<i32>>()
+                            ) == 0 {
+                                // the pointer is properly aligned, so
+                                // write_to_slice_aligned won't fail here (e.g.
+                                // this can happen for i128x1). So we panic to
+                                // make the "should_fail" test pass:
+                                panic!("ok");
+                            }
+
+                            // create a slice - this is safe, because the
+                            // elements of the slice exist, are properly
+                            // initialized, and properly aligned:
+                            let s = slice::from_raw_parts_mut(
+                                ptr, $id::<i32>::lanes()
+                            );
+                            // this should always panic because the slice
+                            // alignment does not match the alignment
+                            // requirements for the vector type:
+                            let vec = $id::<i32>::splat(non_null);
+                            vec.write_to_slice_aligned(s);
+                        }
+                    }
+                }
+            }
+        }
+
+        impl<T> crate::hash::Hash for $id<T> {
+            #[inline]
+            fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
+                let s: $usize_ty = unsafe { crate::mem::transmute(*self) };
+                s.hash(state)
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _hash>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn hash() {
+                        use crate::hash::{Hash, Hasher};
+                        #[allow(deprecated)]
+                        use crate::hash::{SipHasher13};
+
+                        let values = [1_i32; $elem_count];
+
+                        let mut vec: $id<i32> = Default::default();
+                        let mut array = [
+                            $id::<i32>::null().extract(0);
+                            $elem_count
+                        ];
+
+                        for i in 0..$elem_count {
+                            let ptr = &values[i] as *const i32 as *mut i32;
+                            vec = vec.replace(i, ptr);
+                            array[i] = ptr;
+                        }
+
+                        #[allow(deprecated)]
+                        let mut a_hash = SipHasher13::new();
+                        let mut v_hash = a_hash.clone();
+                        array.hash(&mut a_hash);
+                        vec.hash(&mut v_hash);
+                        assert_eq!(a_hash.finish(), v_hash.finish());
+                    }
+                }
+            }
+        }
+
+        impl<T> $id<T> {
+            /// Calculates the offset from a pointer.
+            ///
+            /// `count` is in units of `T`; e.g. a count of `3` represents a
+            /// pointer offset of `3 * size_of::<T>()` bytes.
+            ///
+            /// # Safety
+            ///
+            /// If any of the following conditions are violated, the result is
+            /// Undefined Behavior:
+            ///
+            /// * Both the starting and resulting pointer must be either in
+            /// bounds or one byte past the end of an allocated object.
+            ///
+            /// * The computed offset, in bytes, cannot overflow an `isize`.
+            ///
+            /// * The offset being in bounds cannot rely on "wrapping around"
+            /// the address space. That is, the infinite-precision sum, in bytes
+            /// must fit in a `usize`.
+            ///
+            /// The compiler and standard library generally tries to ensure
+            /// allocations never reach a size where an offset is a concern. For
+            /// instance, `Vec` and `Box` ensure they never allocate more than
+            /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)`
+            /// is always safe.
+            ///
+            /// Most platforms fundamentally can't even construct such an
+            /// allocation. For instance, no known 64-bit platform can ever
+            /// serve a request for 263 bytes due to page-table limitations or
+            /// splitting the address space. However, some 32-bit and 16-bit
+            /// platforms may successfully serve a request for more than
+            /// `isize::MAX` bytes with things like Physical Address Extension.
+            /// As such, memory acquired directly from allocators or memory
+            /// mapped files may be too large to handle with this function.
+            ///
+            /// Consider using `wrapping_offset` instead if these constraints
+            /// are difficult to satisfy. The only advantage of this method is
+            /// that it enables more aggressive compiler optimizations.
+            #[inline]
+            pub unsafe fn offset(self, count: $isize_ty) -> Self {
+                // FIXME: should use LLVM's `add nsw nuw`
+                self.wrapping_offset(count)
+            }
+
+            /// Calculates the offset from a pointer using wrapping arithmetic.
+            ///
+            /// `count` is in units of `T`; e.g. a count of `3` represents a
+            /// pointer offset of `3 * size_of::<T>()` bytes.
+            ///
+            /// # Safety
+            ///
+            /// The resulting pointer does not need to be in bounds, but it is
+            /// potentially hazardous to dereference (which requires unsafe).
+            ///
+            /// Always use `.offset(count)` instead when possible, because
+            /// offset allows the compiler to optimize better.
+            #[inline]
+            pub fn wrapping_offset(self, count: $isize_ty) -> Self {
+                unsafe {
+                    let x: $isize_ty = crate::mem::transmute(self);
+                    // note: {+,*} currently performs a `wrapping_{add, mul}`
+                    crate::mem::transmute(
+                        x + (count * crate::mem::size_of::<T>() as isize)
+                    )
+                }
+            }
+
+            /// Calculates the distance between two pointers.
+            ///
+            /// The returned value is in units of `T`: the distance in bytes is
+            /// divided by `mem::size_of::<T>()`.
+            ///
+            /// This function is the inverse of offset.
+            ///
+            /// # Safety
+            ///
+            /// If any of the following conditions are violated, the result is
+            /// Undefined Behavior:
+            ///
+            /// * Both the starting and other pointer must be either in bounds
+            /// or one byte past the end of the same allocated object.
+            ///
+            /// * The distance between the pointers, in bytes, cannot overflow
+            /// an `isize`.
+            ///
+            /// * The distance between the pointers, in bytes, must be an exact
+            /// multiple of the size of `T`.
+            ///
+            /// * The distance being in bounds cannot rely on "wrapping around"
+            /// the address space.
+            ///
+            /// The compiler and standard library generally try to ensure
+            /// allocations never reach a size where an offset is a concern. For
+            /// instance, `Vec` and `Box` ensure they never allocate more than
+            /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())`
+            /// is always safe.
+            ///
+            /// Most platforms fundamentally can't even construct such an
+            /// allocation. For instance, no known 64-bit platform can ever
+            /// serve a request for 263 bytes due to page-table limitations or
+            /// splitting the address space. However, some 32-bit and 16-bit
+            /// platforms may successfully serve a request for more than
+            /// `isize::MAX` bytes with things like Physical Address Extension.
+            /// As such, memory acquired directly from allocators or memory
+            /// mapped files may be too large to handle with this function.
+            ///
+            /// Consider using `wrapping_offset_from` instead if these constraints
+            /// are difficult to satisfy. The only advantage of this method is
+            /// that it enables more aggressive compiler optimizations.
+            #[inline]
+            pub unsafe fn offset_from(self, origin: Self) -> $isize_ty {
+                // FIXME: should use LLVM's `sub nsw nuw`.
+                self.wrapping_offset_from(origin)
+            }
+
+            /// Calculates the distance between two pointers.
+            ///
+            /// The returned value is in units of `T`: the distance in bytes is
+            /// divided by `mem::size_of::<T>()`.
+            ///
+            /// If the address different between the two pointers is not a
+            /// multiple of `mem::size_of::<T>()` then the result of the
+            /// division is rounded towards zero.
+            ///
+            /// Though this method is safe for any two pointers, note that its
+            /// result will be mostly useless if the two pointers aren't into
+            /// the same allocated object, for example if they point to two
+            /// different local variables.
+            #[inline]
+            pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty {
+                let x: $isize_ty = unsafe { crate::mem::transmute(self) };
+                let y: $isize_ty = unsafe { crate::mem::transmute(origin) };
+                // note: {-,/} currently perform wrapping_{sub, div}
+                (y - x) / (crate::mem::size_of::<T>() as isize)
+            }
+
+            /// Calculates the offset from a pointer (convenience for
+            /// `.offset(count as isize)`).
+            ///
+            /// `count` is in units of `T`; e.g. a count of 3 represents a
+            /// pointer offset of `3 * size_of::<T>()` bytes.
+            ///
+            /// # Safety
+            ///
+            /// If any of the following conditions are violated, the result is
+            /// Undefined Behavior:
+            ///
+            /// * Both the starting and resulting pointer must be either in
+            /// bounds or one byte past the end of an allocated object.
+            ///
+            /// * The computed offset, in bytes, cannot overflow an `isize`.
+            ///
+            /// * The offset being in bounds cannot rely on "wrapping around"
+            /// the address space. That is, the infinite-precision sum must fit
+            /// in a `usize`.
+            ///
+            /// The compiler and standard library generally tries to ensure
+            /// allocations never reach a size where an offset is a concern. For
+            /// instance, `Vec` and `Box` ensure they never allocate more than
+            /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always
+            /// safe.
+            ///
+            /// Most platforms fundamentally can't even construct such an
+            /// allocation. For instance, no known 64-bit platform can ever
+            /// serve a request for 263 bytes due to page-table limitations or
+            /// splitting the address space. However, some 32-bit and 16-bit
+            /// platforms may successfully serve a request for more than
+            /// `isize::MAX` bytes with things like Physical Address Extension.
+            /// As such, memory acquired directly from allocators or memory
+            /// mapped files may be too large to handle with this function.
+            ///
+            /// Consider using `wrapping_offset` instead if these constraints
+            /// are difficult to satisfy. The only advantage of this method is
+            /// that it enables more aggressive compiler optimizations.
+            #[inline]
+            #[allow(clippy::should_implement_trait)]
+            pub unsafe fn add(self, count: $usize_ty) -> Self {
+                self.offset(count.cast())
+            }
+
+            /// Calculates the offset from a pointer (convenience for
+            /// `.offset((count as isize).wrapping_neg())`).
+            ///
+            /// `count` is in units of T; e.g. a `count` of 3 represents a
+            /// pointer offset of `3 * size_of::<T>()` bytes.
+            ///
+            /// # Safety
+            ///
+            /// If any of the following conditions are violated, the result is
+            /// Undefined Behavior:
+            ///
+            /// * Both the starting and resulting pointer must be either in
+            /// bounds or one byte past the end of an allocated object.
+            ///
+            /// * The computed offset cannot exceed `isize::MAX` **bytes**.
+            ///
+            /// * The offset being in bounds cannot rely on "wrapping around"
+            /// the address space. That is, the infinite-precision sum must fit
+            /// in a usize.
+            ///
+            /// The compiler and standard library generally tries to ensure
+            /// allocations never reach a size where an offset is a concern. For
+            /// instance, `Vec` and `Box` ensure they never allocate more than
+            /// `isize::MAX` bytes, so
+            /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe.
+            ///
+            /// Most platforms fundamentally can't even construct such an
+            /// allocation. For instance, no known 64-bit platform can ever
+            /// serve a request for 2<sup>63</sup> bytes due to page-table
+            /// limitations or splitting the address space. However, some 32-bit
+            /// and 16-bit platforms may successfully serve a request for more
+            /// than `isize::MAX` bytes with things like Physical Address
+            /// Extension. As such, memory acquired directly from allocators or
+            /// memory mapped files *may* be too large to handle with this
+            /// function.
+            ///
+            /// Consider using `wrapping_offset` instead if these constraints
+            /// are difficult to satisfy. The only advantage of this method is
+            /// that it enables more aggressive compiler optimizations.
+            #[inline]
+            #[allow(clippy::should_implement_trait)]
+            pub unsafe fn sub(self, count: $usize_ty) -> Self {
+                let x: $isize_ty = count.cast();
+                // note: - is currently wrapping_neg
+                self.offset(-x)
+            }
+
+            /// Calculates the offset from a pointer using wrapping arithmetic.
+            /// (convenience for `.wrapping_offset(count as isize)`)
+            ///
+            /// `count` is in units of T; e.g. a `count` of 3 represents a
+            /// pointer offset of `3 * size_of::<T>()` bytes.
+            ///
+            /// # Safety
+            ///
+            /// The resulting pointer does not need to be in bounds, but it is
+            /// potentially hazardous to dereference (which requires `unsafe`).
+            ///
+            /// Always use `.add(count)` instead when possible, because `add`
+            /// allows the compiler to optimize better.
+            #[inline]
+            pub fn wrapping_add(self, count: $usize_ty) -> Self {
+                self.wrapping_offset(count.cast())
+            }
+
+            /// Calculates the offset from a pointer using wrapping arithmetic.
+            /// (convenience for `.wrapping_offset((count as
+            /// isize).wrapping_sub())`)
+            ///
+            /// `count` is in units of T; e.g. a `count` of 3 represents a
+            /// pointer offset of `3 * size_of::<T>()` bytes.
+            ///
+            /// # Safety
+            ///
+            /// The resulting pointer does not need to be in bounds, but it is
+            /// potentially hazardous to dereference (which requires `unsafe`).
+            ///
+            /// Always use `.sub(count)` instead when possible, because `sub`
+            /// allows the compiler to optimize better.
+            #[inline]
+            pub fn wrapping_sub(self, count: $usize_ty) -> Self {
+                let x: $isize_ty = count.cast();
+                self.wrapping_offset(-1 * x)
+            }
+        }
+
+        impl<T> $id<T> {
+            /// Shuffle vector elements according to `indices`.
+            #[inline]
+            pub fn shuffle1_dyn<I>(self, indices: I) -> Self
+                where
+                Self: codegen::shuffle1_dyn::Shuffle1Dyn<Indices = I>,
+            {
+                codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices)
+            }
+        }
+
+        test_if! {
+                $test_tt:
+            paste::item! {
+                pub mod [<$id _shuffle1_dyn>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn shuffle1_dyn() {
+                        let (null, non_null) = ptr_vals!($id<i32>);
+
+                        // alternating = [non_null, null, non_null, null, ...]
+                        let mut alternating = $id::<i32>::splat(null);
+                        for i in 0..$id::<i32>::lanes() {
+                            if i % 2 == 0 {
+                                alternating = alternating.replace(i, non_null);
+                            }
+                        }
+
+                        type Indices = <$id<i32>
+                            as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices;
+                        // even = [0, 0, 2, 2, 4, 4, ..]
+                        let even = {
+                            let mut v = Indices::splat(0);
+                            for i in 0..$id::<i32>::lanes() {
+                                if i % 2 == 0 {
+                                    v = v.replace(i, (i as u8).into());
+                                } else {
+                                v = v.replace(i, (i as u8 - 1).into());
+                                }
+                            }
+                            v
+                        };
+                        // odd = [1, 1, 3, 3, 5, 5, ...]
+                        let odd = {
+                            let mut v = Indices::splat(0);
+                            for i in 0..$id::<i32>::lanes() {
+                                if i % 2 != 0 {
+                                    v = v.replace(i, (i as u8).into());
+                                } else {
+                                    v = v.replace(i, (i as u8 + 1).into());
+                                }
+                            }
+                            v
+                        };
+
+                        assert_eq!(
+                            alternating.shuffle1_dyn(even),
+                            $id::<i32>::splat(non_null)
+                        );
+                        if $id::<i32>::lanes() > 1 {
+                            assert_eq!(
+                                alternating.shuffle1_dyn(odd),
+                                $id::<i32>::splat(null)
+                            );
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops.rs b/vendor/packed_simd_2/src/api/ops.rs
new file mode 100644
index 000000000..f71c98795
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops.rs
@@ -0,0 +1,32 @@
+//! Implementation of the `ops` traits
+#[macro_use]
+mod vector_mask_bitwise;
+#[macro_use]
+mod scalar_mask_bitwise;
+
+#[macro_use]
+mod vector_arithmetic;
+#[macro_use]
+mod scalar_arithmetic;
+
+#[macro_use]
+mod vector_bitwise;
+#[macro_use]
+mod scalar_bitwise;
+
+#[macro_use]
+mod vector_shifts;
+#[macro_use]
+mod scalar_shifts;
+
+#[macro_use]
+mod vector_rotates;
+
+#[macro_use]
+mod vector_neg;
+
+#[macro_use]
+mod vector_int_min_max;
+
+#[macro_use]
+mod vector_float_min_max;
diff --git a/vendor/packed_simd_2/src/api/ops/scalar_arithmetic.rs b/vendor/packed_simd_2/src/api/ops/scalar_arithmetic.rs
new file mode 100644
index 000000000..da1a2037e
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/scalar_arithmetic.rs
@@ -0,0 +1,203 @@
+//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations.
+
+macro_rules! impl_ops_scalar_arithmetic {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::ops::Add<$elem_ty> for $id {
+            type Output = Self;
+            #[inline]
+            fn add(self, other: $elem_ty) -> Self {
+                self + $id::splat(other)
+            }
+        }
+        impl crate::ops::Add<$id> for $elem_ty {
+            type Output = $id;
+            #[inline]
+            fn add(self, other: $id) -> $id {
+                $id::splat(self) + other
+            }
+        }
+
+        impl crate::ops::Sub<$elem_ty> for $id {
+            type Output = Self;
+            #[inline]
+            fn sub(self, other: $elem_ty) -> Self {
+                self - $id::splat(other)
+            }
+        }
+        impl crate::ops::Sub<$id> for $elem_ty {
+            type Output = $id;
+            #[inline]
+            fn sub(self, other: $id) -> $id {
+                $id::splat(self) - other
+            }
+        }
+
+        impl crate::ops::Mul<$elem_ty> for $id {
+            type Output = Self;
+            #[inline]
+            fn mul(self, other: $elem_ty) -> Self {
+                self * $id::splat(other)
+            }
+        }
+        impl crate::ops::Mul<$id> for $elem_ty {
+            type Output = $id;
+            #[inline]
+            fn mul(self, other: $id) -> $id {
+                $id::splat(self) * other
+            }
+        }
+
+        impl crate::ops::Div<$elem_ty> for $id {
+            type Output = Self;
+            #[inline]
+            fn div(self, other: $elem_ty) -> Self {
+                self / $id::splat(other)
+            }
+        }
+        impl crate::ops::Div<$id> for $elem_ty {
+            type Output = $id;
+            #[inline]
+            fn div(self, other: $id) -> $id {
+                $id::splat(self) / other
+            }
+        }
+
+        impl crate::ops::Rem<$elem_ty> for $id {
+            type Output = Self;
+            #[inline]
+            fn rem(self, other: $elem_ty) -> Self {
+                self % $id::splat(other)
+            }
+        }
+        impl crate::ops::Rem<$id> for $elem_ty {
+            type Output = $id;
+            #[inline]
+            fn rem(self, other: $id) -> $id {
+                $id::splat(self) % other
+            }
+        }
+
+        impl crate::ops::AddAssign<$elem_ty> for $id {
+            #[inline]
+            fn add_assign(&mut self, other: $elem_ty) {
+                *self = *self + other;
+            }
+        }
+
+        impl crate::ops::SubAssign<$elem_ty> for $id {
+            #[inline]
+            fn sub_assign(&mut self, other: $elem_ty) {
+                *self = *self - other;
+            }
+        }
+
+        impl crate::ops::MulAssign<$elem_ty> for $id {
+            #[inline]
+            fn mul_assign(&mut self, other: $elem_ty) {
+                *self = *self * other;
+            }
+        }
+
+        impl crate::ops::DivAssign<$elem_ty> for $id {
+            #[inline]
+            fn div_assign(&mut self, other: $elem_ty) {
+                *self = *self / other;
+            }
+        }
+
+        impl crate::ops::RemAssign<$elem_ty> for $id {
+            #[inline]
+            fn rem_assign(&mut self, other: $elem_ty) {
+                *self = *self % other;
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _ops_scalar_arith>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn ops_scalar_arithmetic() {
+                        let zi = 0 as $elem_ty;
+                        let oi = 1 as $elem_ty;
+                        let ti = 2 as $elem_ty;
+                        let fi = 4 as $elem_ty;
+                        let z = $id::splat(zi);
+                        let o = $id::splat(oi);
+                        let t = $id::splat(ti);
+                        let f = $id::splat(fi);
+
+                        // add
+                        assert_eq!(zi + z, z);
+                        assert_eq!(z + zi, z);
+                        assert_eq!(oi + z, o);
+                        assert_eq!(o + zi, o);
+                        assert_eq!(ti + z, t);
+                        assert_eq!(t + zi, t);
+                        assert_eq!(ti + t, f);
+                        assert_eq!(t + ti, f);
+                        // sub
+                        assert_eq!(zi - z, z);
+                        assert_eq!(z - zi, z);
+                        assert_eq!(oi - z, o);
+                        assert_eq!(o - zi, o);
+                        assert_eq!(ti - z, t);
+                        assert_eq!(t - zi, t);
+                        assert_eq!(fi - t, t);
+                        assert_eq!(f - ti, t);
+                        assert_eq!(f - o - o, t);
+                        assert_eq!(f - oi - oi, t);
+                        // mul
+                        assert_eq!(zi * z, z);
+                        assert_eq!(z * zi, z);
+                        assert_eq!(zi * o, z);
+                        assert_eq!(z * oi, z);
+                        assert_eq!(zi * t, z);
+                        assert_eq!(z * ti, z);
+                        assert_eq!(oi * t, t);
+                        assert_eq!(o * ti, t);
+                        assert_eq!(ti * t, f);
+                        assert_eq!(t * ti, f);
+                        // div
+                        assert_eq!(zi / o, z);
+                        assert_eq!(z / oi, z);
+                        assert_eq!(ti / o, t);
+                        assert_eq!(t / oi, t);
+                        assert_eq!(fi / o, f);
+                        assert_eq!(f / oi, f);
+                        assert_eq!(ti / t, o);
+                        assert_eq!(t / ti, o);
+                        assert_eq!(fi / t, t);
+                        assert_eq!(f / ti, t);
+                        // rem
+                        assert_eq!(oi % o, z);
+                        assert_eq!(o % oi, z);
+                        assert_eq!(fi % t, z);
+                        assert_eq!(f % ti, z);
+
+                        {
+                            let mut v = z;
+                            assert_eq!(v, z);
+                            v += oi; // add_assign
+                            assert_eq!(v, o);
+                            v -= oi; // sub_assign
+                            assert_eq!(v, z);
+                            v = t;
+                            v *= oi; // mul_assign
+                            assert_eq!(v, t);
+                            v *= ti;
+                            assert_eq!(v, f);
+                            v /= oi; // div_assign
+                            assert_eq!(v, f);
+                            v /= ti;
+                            assert_eq!(v, t);
+                            v %= ti; // rem_assign
+                            assert_eq!(v, z);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/scalar_bitwise.rs b/vendor/packed_simd_2/src/api/ops/scalar_bitwise.rs
new file mode 100644
index 000000000..88216769a
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/scalar_bitwise.rs
@@ -0,0 +1,162 @@
+//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations.
+
+macro_rules! impl_ops_scalar_bitwise {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $test_tt:tt |
+        ($true:expr, $false:expr)
+    ) => {
+        impl crate::ops::BitXor<$elem_ty> for $id {
+            type Output = Self;
+            #[inline]
+            fn bitxor(self, other: $elem_ty) -> Self {
+                self ^ $id::splat(other)
+            }
+        }
+        impl crate::ops::BitXor<$id> for $elem_ty {
+            type Output = $id;
+            #[inline]
+            fn bitxor(self, other: $id) -> $id {
+                $id::splat(self) ^ other
+            }
+        }
+
+        impl crate::ops::BitAnd<$elem_ty> for $id {
+            type Output = Self;
+            #[inline]
+            fn bitand(self, other: $elem_ty) -> Self {
+                self & $id::splat(other)
+            }
+        }
+        impl crate::ops::BitAnd<$id> for $elem_ty {
+            type Output = $id;
+            #[inline]
+            fn bitand(self, other: $id) -> $id {
+                $id::splat(self) & other
+            }
+        }
+
+        impl crate::ops::BitOr<$elem_ty> for $id {
+            type Output = Self;
+            #[inline]
+            fn bitor(self, other: $elem_ty) -> Self {
+                self | $id::splat(other)
+            }
+        }
+        impl crate::ops::BitOr<$id> for $elem_ty {
+            type Output = $id;
+            #[inline]
+            fn bitor(self, other: $id) -> $id {
+                $id::splat(self) | other
+            }
+        }
+
+        impl crate::ops::BitAndAssign<$elem_ty> for $id {
+            #[inline]
+            fn bitand_assign(&mut self, other: $elem_ty) {
+                *self = *self & other;
+            }
+        }
+        impl crate::ops::BitOrAssign<$elem_ty> for $id {
+            #[inline]
+            fn bitor_assign(&mut self, other: $elem_ty) {
+                *self = *self | other;
+            }
+        }
+        impl crate::ops::BitXorAssign<$elem_ty> for $id {
+            #[inline]
+            fn bitxor_assign(&mut self, other: $elem_ty) {
+                *self = *self ^ other;
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _ops_scalar_bitwise>] {
+                    use super::*;
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn ops_scalar_bitwise() {
+                        let zi = 0 as $elem_ty;
+                        let oi = 1 as $elem_ty;
+                        let ti = 2 as $elem_ty;
+                        let z = $id::splat(zi);
+                        let o = $id::splat(oi);
+                        let t = $id::splat(ti);
+
+                        // BitAnd:
+                        assert_eq!(oi & o, o);
+                        assert_eq!(o & oi, o);
+                        assert_eq!(oi & z, z);
+                        assert_eq!(o & zi, z);
+                        assert_eq!(zi & o, z);
+                        assert_eq!(z & oi, z);
+                        assert_eq!(zi & z, z);
+                        assert_eq!(z & zi, z);
+
+                        assert_eq!(ti & t, t);
+                        assert_eq!(t & ti, t);
+                        assert_eq!(ti & o, z);
+                        assert_eq!(t & oi, z);
+                        assert_eq!(oi & t, z);
+                        assert_eq!(o & ti, z);
+
+                        // BitOr:
+                        assert_eq!(oi | o, o);
+                        assert_eq!(o | oi, o);
+                        assert_eq!(oi | z, o);
+                        assert_eq!(o | zi, o);
+                        assert_eq!(zi | o, o);
+                        assert_eq!(z | oi, o);
+                        assert_eq!(zi | z, z);
+                        assert_eq!(z | zi, z);
+
+                        assert_eq!(ti | t, t);
+                        assert_eq!(t | ti, t);
+                        assert_eq!(zi | t, t);
+                        assert_eq!(z | ti, t);
+                        assert_eq!(ti | z, t);
+                        assert_eq!(t | zi, t);
+
+                        // BitXOR:
+                        assert_eq!(oi ^ o, z);
+                        assert_eq!(o ^ oi, z);
+                        assert_eq!(zi ^ z, z);
+                        assert_eq!(z ^ zi, z);
+                        assert_eq!(zi ^ o, o);
+                        assert_eq!(z ^ oi, o);
+                        assert_eq!(oi ^ z, o);
+                        assert_eq!(o ^ zi, o);
+
+                        assert_eq!(ti ^ t, z);
+                        assert_eq!(t ^ ti, z);
+                        assert_eq!(ti ^ z, t);
+                        assert_eq!(t ^ zi, t);
+                        assert_eq!(zi ^ t, t);
+                        assert_eq!(z ^ ti, t);
+
+                        {
+                            // AndAssign:
+                            let mut v = o;
+                            v &= ti;
+                            assert_eq!(v, z);
+                        }
+                        {
+                            // OrAssign:
+                            let mut v = z;
+                            v |= oi;
+                            assert_eq!(v, o);
+                        }
+                        {
+                            // XORAssign:
+                            let mut v = z;
+                            v ^= oi;
+                            assert_eq!(v, o);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/scalar_mask_bitwise.rs b/vendor/packed_simd_2/src/api/ops/scalar_mask_bitwise.rs
new file mode 100644
index 000000000..523a85207
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/scalar_mask_bitwise.rs
@@ -0,0 +1,140 @@
+//! Vertical (lane-wise) vector-vector bitwise operations.
+
+macro_rules! impl_ops_scalar_mask_bitwise {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $test_tt:tt |
+        ($true:expr, $false:expr)
+    ) => {
+        impl crate::ops::BitXor<bool> for $id {
+            type Output = Self;
+            #[inline]
+            fn bitxor(self, other: bool) -> Self {
+                self ^ $id::splat(other)
+            }
+        }
+        impl crate::ops::BitXor<$id> for bool {
+            type Output = $id;
+            #[inline]
+            fn bitxor(self, other: $id) -> $id {
+                $id::splat(self) ^ other
+            }
+        }
+
+        impl crate::ops::BitAnd<bool> for $id {
+            type Output = Self;
+            #[inline]
+            fn bitand(self, other: bool) -> Self {
+                self & $id::splat(other)
+            }
+        }
+        impl crate::ops::BitAnd<$id> for bool {
+            type Output = $id;
+            #[inline]
+            fn bitand(self, other: $id) -> $id {
+                $id::splat(self) & other
+            }
+        }
+
+        impl crate::ops::BitOr<bool> for $id {
+            type Output = Self;
+            #[inline]
+            fn bitor(self, other: bool) -> Self {
+                self | $id::splat(other)
+            }
+        }
+        impl crate::ops::BitOr<$id> for bool {
+            type Output = $id;
+            #[inline]
+            fn bitor(self, other: $id) -> $id {
+                $id::splat(self) | other
+            }
+        }
+
+        impl crate::ops::BitAndAssign<bool> for $id {
+            #[inline]
+            fn bitand_assign(&mut self, other: bool) {
+                *self = *self & other;
+            }
+        }
+        impl crate::ops::BitOrAssign<bool> for $id {
+            #[inline]
+            fn bitor_assign(&mut self, other: bool) {
+                *self = *self | other;
+            }
+        }
+        impl crate::ops::BitXorAssign<bool> for $id {
+            #[inline]
+            fn bitxor_assign(&mut self, other: bool) {
+                *self = *self ^ other;
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _ops_scalar_mask_bitwise>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn ops_scalar_mask_bitwise() {
+                        let ti = true;
+                        let fi = false;
+                        let t = $id::splat(ti);
+                        let f = $id::splat(fi);
+                        assert!(t != f);
+                        assert!(!(t == f));
+
+                        // BitAnd:
+                        assert_eq!(ti & f, f);
+                        assert_eq!(t & fi, f);
+                        assert_eq!(fi & t, f);
+                        assert_eq!(f & ti, f);
+                        assert_eq!(ti & t, t);
+                        assert_eq!(t & ti, t);
+                        assert_eq!(fi & f, f);
+                        assert_eq!(f & fi, f);
+
+                        // BitOr:
+                        assert_eq!(ti | f, t);
+                        assert_eq!(t | fi, t);
+                        assert_eq!(fi | t, t);
+                        assert_eq!(f | ti, t);
+                        assert_eq!(ti | t, t);
+                        assert_eq!(t | ti, t);
+                        assert_eq!(fi | f, f);
+                        assert_eq!(f | fi, f);
+
+                        // BitXOR:
+                        assert_eq!(ti ^ f, t);
+                        assert_eq!(t ^ fi, t);
+                        assert_eq!(fi ^ t, t);
+                        assert_eq!(f ^ ti, t);
+                        assert_eq!(ti ^ t, f);
+                        assert_eq!(t ^ ti, f);
+                        assert_eq!(fi ^ f, f);
+                        assert_eq!(f ^ fi, f);
+
+                        {
+                            // AndAssign:
+                            let mut v = f;
+                            v &= ti;
+                            assert_eq!(v, f);
+                        }
+                        {
+                            // OrAssign:
+                            let mut v = f;
+                            v |= ti;
+                            assert_eq!(v, t);
+                        }
+                        {
+                            // XORAssign:
+                            let mut v = f;
+                            v ^= ti;
+                            assert_eq!(v, t);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/scalar_shifts.rs b/vendor/packed_simd_2/src/api/ops/scalar_shifts.rs
new file mode 100644
index 000000000..9c164ad56
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/scalar_shifts.rs
@@ -0,0 +1,107 @@
+//! Vertical (lane-wise) vector-scalar shifts operations.
+
+macro_rules! impl_ops_scalar_shifts {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::ops::Shl<u32> for $id {
+            type Output = Self;
+            #[inline]
+            fn shl(self, other: u32) -> Self {
+                self << $id::splat(other as $elem_ty)
+            }
+        }
+        impl crate::ops::Shr<u32> for $id {
+            type Output = Self;
+            #[inline]
+            fn shr(self, other: u32) -> Self {
+                self >> $id::splat(other as $elem_ty)
+            }
+        }
+
+        impl crate::ops::ShlAssign<u32> for $id {
+            #[inline]
+            fn shl_assign(&mut self, other: u32) {
+                *self = *self << other;
+            }
+        }
+        impl crate::ops::ShrAssign<u32> for $id {
+            #[inline]
+            fn shr_assign(&mut self, other: u32) {
+                *self = *self >> other;
+            }
+        }
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _ops_scalar_shifts>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"),
+                               allow(unreachable_code,
+                                     unused_variables,
+                                     unused_mut)
+                    )]
+                    // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344
+                    fn ops_scalar_shifts() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        let f = $id::splat(4 as $elem_ty);
+
+                        {
+                            let zi = 0 as u32;
+                            let oi = 1 as u32;
+                            let ti = 2 as u32;
+                            let maxi
+                                = (mem::size_of::<$elem_ty>() * 8 - 1) as u32;
+
+                            // shr
+                            assert_eq!(z >> zi, z);
+                            assert_eq!(z >> oi, z);
+                            assert_eq!(z >> ti, z);
+                            assert_eq!(z >> ti, z);
+
+                            #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] {
+                                // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13
+                                return;
+                            }
+
+                            assert_eq!(o >> zi, o);
+                            assert_eq!(t >> zi, t);
+                            assert_eq!(f >> zi, f);
+                            assert_eq!(f >> maxi, z);
+
+                            assert_eq!(o >> oi, z);
+                            assert_eq!(t >> oi, o);
+                            assert_eq!(t >> ti, z);
+                            assert_eq!(f >> oi, t);
+                            assert_eq!(f >> ti, o);
+                            assert_eq!(f >> maxi, z);
+
+                            // shl
+                            assert_eq!(z << zi, z);
+                            assert_eq!(o << zi, o);
+                            assert_eq!(t << zi, t);
+                            assert_eq!(f << zi, f);
+                            assert_eq!(f << maxi, z);
+
+                            assert_eq!(o << oi, t);
+                            assert_eq!(o << ti, f);
+                            assert_eq!(t << oi, f);
+
+                            {  // shr_assign
+                                let mut v = o;
+                                v >>= oi;
+                                assert_eq!(v, z);
+                            }
+                            {  // shl_assign
+                                let mut v = o;
+                                v <<= oi;
+                                assert_eq!(v, t);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/vector_arithmetic.rs b/vendor/packed_simd_2/src/api/ops/vector_arithmetic.rs
new file mode 100644
index 000000000..7057f52d0
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/vector_arithmetic.rs
@@ -0,0 +1,148 @@
+//! Vertical (lane-wise) vector-vector arithmetic operations.
+
+macro_rules! impl_ops_vector_arithmetic {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::ops::Add for $id {
+            type Output = Self;
+            #[inline]
+            fn add(self, other: Self) -> Self {
+                use crate::llvm::simd_add;
+                unsafe { Simd(simd_add(self.0, other.0)) }
+            }
+        }
+
+        impl crate::ops::Sub for $id {
+            type Output = Self;
+            #[inline]
+            fn sub(self, other: Self) -> Self {
+                use crate::llvm::simd_sub;
+                unsafe { Simd(simd_sub(self.0, other.0)) }
+            }
+        }
+
+        impl crate::ops::Mul for $id {
+            type Output = Self;
+            #[inline]
+            fn mul(self, other: Self) -> Self {
+                use crate::llvm::simd_mul;
+                unsafe { Simd(simd_mul(self.0, other.0)) }
+            }
+        }
+
+        impl crate::ops::Div for $id {
+            type Output = Self;
+            #[inline]
+            fn div(self, other: Self) -> Self {
+                use crate::llvm::simd_div;
+                unsafe { Simd(simd_div(self.0, other.0)) }
+            }
+        }
+
+        impl crate::ops::Rem for $id {
+            type Output = Self;
+            #[inline]
+            fn rem(self, other: Self) -> Self {
+                use crate::llvm::simd_rem;
+                unsafe { Simd(simd_rem(self.0, other.0)) }
+            }
+        }
+
+        impl crate::ops::AddAssign for $id {
+            #[inline]
+            fn add_assign(&mut self, other: Self) {
+                *self = *self + other;
+            }
+        }
+
+        impl crate::ops::SubAssign for $id {
+            #[inline]
+            fn sub_assign(&mut self, other: Self) {
+                *self = *self - other;
+            }
+        }
+
+        impl crate::ops::MulAssign for $id {
+            #[inline]
+            fn mul_assign(&mut self, other: Self) {
+                *self = *self * other;
+            }
+        }
+
+        impl crate::ops::DivAssign for $id {
+            #[inline]
+            fn div_assign(&mut self, other: Self) {
+                *self = *self / other;
+            }
+        }
+
+        impl crate::ops::RemAssign for $id {
+            #[inline]
+            fn rem_assign(&mut self, other: Self) {
+                *self = *self % other;
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+               pub mod [<$id _ops_vector_arith>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn ops_vector_arithmetic() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        let f = $id::splat(4 as $elem_ty);
+
+                        // add
+                        assert_eq!(z + z, z);
+                        assert_eq!(o + z, o);
+                        assert_eq!(t + z, t);
+                        assert_eq!(t + t, f);
+                        // sub
+                        assert_eq!(z - z, z);
+                        assert_eq!(o - z, o);
+                        assert_eq!(t - z, t);
+                        assert_eq!(f - t, t);
+                        assert_eq!(f - o - o, t);
+                        // mul
+                        assert_eq!(z * z, z);
+                        assert_eq!(z * o, z);
+                        assert_eq!(z * t, z);
+                        assert_eq!(o * t, t);
+                        assert_eq!(t * t, f);
+                        // div
+                        assert_eq!(z / o, z);
+                        assert_eq!(t / o, t);
+                        assert_eq!(f / o, f);
+                        assert_eq!(t / t, o);
+                        assert_eq!(f / t, t);
+                        // rem
+                        assert_eq!(o % o, z);
+                        assert_eq!(f % t, z);
+
+                        {
+                            let mut v = z;
+                            assert_eq!(v, z);
+                            v += o; // add_assign
+                            assert_eq!(v, o);
+                            v -= o; // sub_assign
+                            assert_eq!(v, z);
+                            v = t;
+                            v *= o; // mul_assign
+                            assert_eq!(v, t);
+                            v *= t;
+                            assert_eq!(v, f);
+                            v /= o; // div_assign
+                            assert_eq!(v, f);
+                            v /= t;
+                            assert_eq!(v, t);
+                            v %= t; // rem_assign
+                            assert_eq!(v, z);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/vector_bitwise.rs b/vendor/packed_simd_2/src/api/ops/vector_bitwise.rs
new file mode 100644
index 000000000..7be9603fa
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/vector_bitwise.rs
@@ -0,0 +1,129 @@
+//! Vertical (lane-wise) vector-vector bitwise operations.
+
+macro_rules! impl_ops_vector_bitwise {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $test_tt:tt |
+        ($true:expr, $false:expr)
+    ) => {
+        impl crate::ops::Not for $id {
+            type Output = Self;
+            #[inline]
+            fn not(self) -> Self {
+                Self::splat($true) ^ self
+            }
+        }
+        impl crate::ops::BitXor for $id {
+            type Output = Self;
+            #[inline]
+            fn bitxor(self, other: Self) -> Self {
+                use crate::llvm::simd_xor;
+                unsafe { Simd(simd_xor(self.0, other.0)) }
+            }
+        }
+        impl crate::ops::BitAnd for $id {
+            type Output = Self;
+            #[inline]
+            fn bitand(self, other: Self) -> Self {
+                use crate::llvm::simd_and;
+                unsafe { Simd(simd_and(self.0, other.0)) }
+            }
+        }
+        impl crate::ops::BitOr for $id {
+            type Output = Self;
+            #[inline]
+            fn bitor(self, other: Self) -> Self {
+                use crate::llvm::simd_or;
+                unsafe { Simd(simd_or(self.0, other.0)) }
+            }
+        }
+        impl crate::ops::BitAndAssign for $id {
+            #[inline]
+            fn bitand_assign(&mut self, other: Self) {
+                *self = *self & other;
+            }
+        }
+        impl crate::ops::BitOrAssign for $id {
+            #[inline]
+            fn bitor_assign(&mut self, other: Self) {
+                *self = *self | other;
+            }
+        }
+        impl crate::ops::BitXorAssign for $id {
+            #[inline]
+            fn bitxor_assign(&mut self, other: Self) {
+                *self = *self ^ other;
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _ops_vector_bitwise>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn ops_vector_bitwise() {
+
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        let m = $id::splat(!z.extract(0));
+
+                        // Not:
+                        assert_eq!(!z, m);
+                        assert_eq!(!m, z);
+
+                        // BitAnd:
+                        assert_eq!(o & o, o);
+                        assert_eq!(o & z, z);
+                        assert_eq!(z & o, z);
+                        assert_eq!(z & z, z);
+
+                        assert_eq!(t & t, t);
+                        assert_eq!(t & o, z);
+                        assert_eq!(o & t, z);
+
+                        // BitOr:
+                        assert_eq!(o | o, o);
+                        assert_eq!(o | z, o);
+                        assert_eq!(z | o, o);
+                        assert_eq!(z | z, z);
+
+                        assert_eq!(t | t, t);
+                        assert_eq!(z | t, t);
+                        assert_eq!(t | z, t);
+
+                        // BitXOR:
+                        assert_eq!(o ^ o, z);
+                        assert_eq!(z ^ z, z);
+                        assert_eq!(z ^ o, o);
+                        assert_eq!(o ^ z, o);
+
+                        assert_eq!(t ^ t, z);
+                        assert_eq!(t ^ z, t);
+                        assert_eq!(z ^ t, t);
+
+                        {
+                            // AndAssign:
+                            let mut v = o;
+                            v &= t;
+                            assert_eq!(v, z);
+                        }
+                        {
+                            // OrAssign:
+                            let mut v = z;
+                            v |= o;
+                            assert_eq!(v, o);
+                        }
+                        {
+                            // XORAssign:
+                            let mut v = z;
+                            v ^= o;
+                            assert_eq!(v, o);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/vector_float_min_max.rs b/vendor/packed_simd_2/src/api/ops/vector_float_min_max.rs
new file mode 100644
index 000000000..8310667b7
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/vector_float_min_max.rs
@@ -0,0 +1,74 @@
+//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors.
+
+macro_rules! impl_ops_vector_float_min_max {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Minimum of two vectors.
+            ///
+            /// Returns a new vector containing the minimum value of each of
+            /// the input vector lanes.
+            #[inline]
+            pub fn min(self, x: Self) -> Self {
+                use crate::llvm::simd_fmin;
+                unsafe { Simd(simd_fmin(self.0, x.0)) }
+            }
+
+            /// Maximum of two vectors.
+            ///
+            /// Returns a new vector containing the maximum value of each of
+            /// the input vector lanes.
+            #[inline]
+            pub fn max(self, x: Self) -> Self {
+                use crate::llvm::simd_fmax;
+                unsafe { Simd(simd_fmax(self.0, x.0)) }
+            }
+        }
+        test_if!{
+            $test_tt:
+            paste::item! {
+                #[cfg(not(any(
+                    // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/223
+                    all(target_arch = "mips", target_endian = "big"),
+                    target_arch = "mips64",
+                )))]
+                pub mod [<$id _ops_vector_min_max>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn min_max() {
+                        let n = crate::$elem_ty::NAN;
+                        let o = $id::splat(1. as $elem_ty);
+                        let t = $id::splat(2. as $elem_ty);
+
+                        let mut m = o; // [1., 2., 1., 2., ...]
+                        let mut on = o;
+                        for i in 0..$id::lanes() {
+                            if i % 2 == 0 {
+                                m = m.replace(i, 2. as $elem_ty);
+                                on = on.replace(i, n);
+                            }
+                        }
+
+                        assert_eq!(o.min(t), o);
+                        assert_eq!(t.min(o), o);
+                        assert_eq!(m.min(o), o);
+                        assert_eq!(o.min(m), o);
+                        assert_eq!(m.min(t), m);
+                        assert_eq!(t.min(m), m);
+
+                        assert_eq!(o.max(t), t);
+                        assert_eq!(t.max(o), t);
+                        assert_eq!(m.max(o), m);
+                        assert_eq!(o.max(m), m);
+                        assert_eq!(m.max(t), t);
+                        assert_eq!(t.max(m), t);
+
+                        assert_eq!(on.min(o), o);
+                        assert_eq!(o.min(on), o);
+                        assert_eq!(on.max(o), o);
+                        assert_eq!(o.max(on), o);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/vector_int_min_max.rs b/vendor/packed_simd_2/src/api/ops/vector_int_min_max.rs
new file mode 100644
index 000000000..36ea98e6b
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/vector_int_min_max.rs
@@ -0,0 +1,57 @@
+//! Vertical (lane-wise) vector `min` and `max` for integer vectors.
+
+macro_rules! impl_ops_vector_int_min_max {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Minimum of two vectors.
+            ///
+            /// Returns a new vector containing the minimum value of each of
+            /// the input vector lanes.
+            #[inline]
+            pub fn min(self, x: Self) -> Self {
+                self.lt(x).select(self, x)
+            }
+
+            /// Maximum of two vectors.
+            ///
+            /// Returns a new vector containing the maximum value of each of
+            /// the input vector lanes.
+            #[inline]
+            pub fn max(self, x: Self) -> Self {
+                self.gt(x).select(self, x)
+            }
+        }
+        test_if!{$test_tt:
+        paste::item! {
+            pub mod [<$id _ops_vector_min_max>] {
+                use super::*;
+                #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                fn min_max() {
+                    let o = $id::splat(1 as $elem_ty);
+                    let t = $id::splat(2 as $elem_ty);
+
+                    let mut m = o;
+                    for i in 0..$id::lanes() {
+                        if i % 2 == 0 {
+                            m = m.replace(i, 2 as $elem_ty);
+                        }
+                    }
+                    assert_eq!(o.min(t), o);
+                    assert_eq!(t.min(o), o);
+                    assert_eq!(m.min(o), o);
+                    assert_eq!(o.min(m), o);
+                    assert_eq!(m.min(t), m);
+                    assert_eq!(t.min(m), m);
+
+                    assert_eq!(o.max(t), t);
+                    assert_eq!(t.max(o), t);
+                    assert_eq!(m.max(o), m);
+                    assert_eq!(o.max(m), m);
+                    assert_eq!(m.max(t), t);
+                    assert_eq!(t.max(m), t);
+                }
+            }
+        }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/vector_mask_bitwise.rs b/vendor/packed_simd_2/src/api/ops/vector_mask_bitwise.rs
new file mode 100644
index 000000000..295fc1ca8
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/vector_mask_bitwise.rs
@@ -0,0 +1,116 @@
+//! Vertical (lane-wise) vector-vector bitwise operations.
+
+macro_rules! impl_ops_vector_mask_bitwise {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $test_tt:tt |
+        ($true:expr, $false:expr)
+    ) => {
+        impl crate::ops::Not for $id {
+            type Output = Self;
+            #[inline]
+            fn not(self) -> Self {
+                Self::splat($true) ^ self
+            }
+        }
+        impl crate::ops::BitXor for $id {
+            type Output = Self;
+            #[inline]
+            fn bitxor(self, other: Self) -> Self {
+                use crate::llvm::simd_xor;
+                unsafe { Simd(simd_xor(self.0, other.0)) }
+            }
+        }
+        impl crate::ops::BitAnd for $id {
+            type Output = Self;
+            #[inline]
+            fn bitand(self, other: Self) -> Self {
+                use crate::llvm::simd_and;
+                unsafe { Simd(simd_and(self.0, other.0)) }
+            }
+        }
+        impl crate::ops::BitOr for $id {
+            type Output = Self;
+            #[inline]
+            fn bitor(self, other: Self) -> Self {
+                use crate::llvm::simd_or;
+                unsafe { Simd(simd_or(self.0, other.0)) }
+            }
+        }
+        impl crate::ops::BitAndAssign for $id {
+            #[inline]
+            fn bitand_assign(&mut self, other: Self) {
+                *self = *self & other;
+            }
+        }
+        impl crate::ops::BitOrAssign for $id {
+            #[inline]
+            fn bitor_assign(&mut self, other: Self) {
+                *self = *self | other;
+            }
+        }
+        impl crate::ops::BitXorAssign for $id {
+            #[inline]
+            fn bitxor_assign(&mut self, other: Self) {
+                *self = *self ^ other;
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _ops_vector_mask_bitwise>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn ops_vector_mask_bitwise() {
+                        let t = $id::splat(true);
+                        let f = $id::splat(false);
+                        assert!(t != f);
+                        assert!(!(t == f));
+
+                        // Not:
+                        assert_eq!(!t, f);
+                        assert_eq!(t, !f);
+
+                        // BitAnd:
+                        assert_eq!(t & f, f);
+                        assert_eq!(f & t, f);
+                        assert_eq!(t & t, t);
+                        assert_eq!(f & f, f);
+
+                        // BitOr:
+                        assert_eq!(t | f, t);
+                        assert_eq!(f | t, t);
+                        assert_eq!(t | t, t);
+                        assert_eq!(f | f, f);
+
+                        // BitXOR:
+                        assert_eq!(t ^ f, t);
+                        assert_eq!(f ^ t, t);
+                        assert_eq!(t ^ t, f);
+                        assert_eq!(f ^ f, f);
+
+                        {
+                            // AndAssign:
+                            let mut v = f;
+                            v &= t;
+                            assert_eq!(v, f);
+                        }
+                        {
+                            // OrAssign:
+                            let mut v = f;
+                            v |= t;
+                            assert_eq!(v, t);
+                        }
+                        {
+                            // XORAssign:
+                            let mut v = f;
+                            v ^= t;
+                            assert_eq!(v, t);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/vector_neg.rs b/vendor/packed_simd_2/src/api/ops/vector_neg.rs
new file mode 100644
index 000000000..e2d91fd2f
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/vector_neg.rs
@@ -0,0 +1,43 @@
+//! Vertical (lane-wise) vector `Neg`.
+
+macro_rules! impl_ops_vector_neg {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::ops::Neg for $id {
+            type Output = Self;
+            #[inline]
+            fn neg(self) -> Self {
+                Self::splat(-1 as $elem_ty) * self
+            }
+        }
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _ops_vector_neg>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn neg() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        let f = $id::splat(4 as $elem_ty);
+
+                        let nz = $id::splat(-(0 as $elem_ty));
+                        let no = $id::splat(-(1 as $elem_ty));
+                        let nt = $id::splat(-(2 as $elem_ty));
+                        let nf = $id::splat(-(4 as $elem_ty));
+
+                        assert_eq!(-z, nz);
+                        assert_eq!(-o, no);
+                        assert_eq!(-t, nt);
+                        assert_eq!(-f, nf);
+
+                        assert_eq!(z, -nz);
+                        assert_eq!(o, -no);
+                        assert_eq!(t, -nt);
+                        assert_eq!(f, -nf);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/vector_rotates.rs b/vendor/packed_simd_2/src/api/ops/vector_rotates.rs
new file mode 100644
index 000000000..6c794ecf4
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/vector_rotates.rs
@@ -0,0 +1,90 @@
+//! Vertical (lane-wise) vector rotates operations.
+#![allow(unused)]
+
+macro_rules! impl_ops_vector_rotates {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Shifts the bits of each lane to the left by the specified
+            /// amount in the corresponding lane of `n`, wrapping the
+            /// truncated bits to the end of the resulting integer.
+            ///
+            /// Note: this is neither the same operation as `<<` nor equivalent
+            /// to `slice::rotate_left`.
+            #[inline]
+            pub fn rotate_left(self, n: $id) -> $id {
+                const LANE_WIDTH: $elem_ty =
+                    crate::mem::size_of::<$elem_ty>() as $elem_ty * 8;
+                // Protect against undefined behavior for over-long bit shifts
+                let n = n % LANE_WIDTH;
+                (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH))
+            }
+
+            /// Shifts the bits of each lane to the right by the specified
+            /// amount in the corresponding lane of `n`, wrapping the
+            /// truncated bits to the beginning of the resulting integer.
+            ///
+            /// Note: this is neither the same operation as `<<` nor equivalent
+            /// to `slice::rotate_left`.
+            #[inline]
+            pub fn rotate_right(self, n: $id) -> $id {
+                const LANE_WIDTH: $elem_ty =
+                    crate::mem::size_of::<$elem_ty>() as $elem_ty * 8;
+                // Protect against undefined behavior for over-long bit shifts
+                let n = n % LANE_WIDTH;
+                (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH))
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                // FIXME:
+                // https://github.com/rust-lang-nursery/packed_simd/issues/75
+                #[cfg(not(any(
+                    target_arch = "s390x",
+                    target_arch = "sparc64",
+                )))]
+                pub mod [<$id _ops_vector_rotate>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn rotate_ops() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        let f = $id::splat(4 as $elem_ty);
+
+                        let max = $id::splat(
+                            (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty);
+
+                        // rotate_right
+                        assert_eq!(z.rotate_right(z), z);
+                        assert_eq!(z.rotate_right(o), z);
+                        assert_eq!(z.rotate_right(t), z);
+
+                        assert_eq!(o.rotate_right(z), o);
+                        assert_eq!(t.rotate_right(z), t);
+                        assert_eq!(f.rotate_right(z), f);
+                        assert_eq!(f.rotate_right(max), f << 1);
+
+                        assert_eq!(o.rotate_right(o), o << max);
+                        assert_eq!(t.rotate_right(o), o);
+                        assert_eq!(t.rotate_right(t), o << max);
+                        assert_eq!(f.rotate_right(o), t);
+                        assert_eq!(f.rotate_right(t), o);
+
+                        // rotate_left
+                        assert_eq!(z.rotate_left(z), z);
+                        assert_eq!(o.rotate_left(z), o);
+                        assert_eq!(t.rotate_left(z), t);
+                        assert_eq!(f.rotate_left(z), f);
+                        assert_eq!(f.rotate_left(max), t);
+
+                        assert_eq!(o.rotate_left(o), t);
+                        assert_eq!(o.rotate_left(t), f);
+                        assert_eq!(t.rotate_left(o), f);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ops/vector_shifts.rs b/vendor/packed_simd_2/src/api/ops/vector_shifts.rs
new file mode 100644
index 000000000..22e1fbc0e
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ops/vector_shifts.rs
@@ -0,0 +1,107 @@
+//! Vertical (lane-wise) vector-vector shifts operations.
+
+macro_rules! impl_ops_vector_shifts {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl crate::ops::Shl<$id> for $id {
+            type Output = Self;
+            #[inline]
+            fn shl(self, other: Self) -> Self {
+                use crate::llvm::simd_shl;
+                unsafe { Simd(simd_shl(self.0, other.0)) }
+            }
+        }
+        impl crate::ops::Shr<$id> for $id {
+            type Output = Self;
+            #[inline]
+            fn shr(self, other: Self) -> Self {
+                use crate::llvm::simd_shr;
+                unsafe { Simd(simd_shr(self.0, other.0)) }
+            }
+        }
+        impl crate::ops::ShlAssign<$id> for $id {
+            #[inline]
+            fn shl_assign(&mut self, other: Self) {
+                *self = *self << other;
+            }
+        }
+        impl crate::ops::ShrAssign<$id> for $id {
+            #[inline]
+            fn shr_assign(&mut self, other: Self) {
+                *self = *self >> other;
+            }
+        }
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _ops_vector_shifts>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"),
+                               allow(unreachable_code,
+                                     unused_variables,
+                                     unused_mut)
+                    )]
+                    // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344
+                    fn ops_vector_shifts() {
+                        let z = $id::splat(0 as $elem_ty);
+                        let o = $id::splat(1 as $elem_ty);
+                        let t = $id::splat(2 as $elem_ty);
+                        let f = $id::splat(4 as $elem_ty);
+
+                        let max =$id::splat(
+                            (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty
+                        );
+
+                        // shr
+                        assert_eq!(z >> z, z);
+                        assert_eq!(z >> o, z);
+                        assert_eq!(z >> t, z);
+                        assert_eq!(z >> t, z);
+
+                        #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] {
+                            // FIXME: rust produces bad codegen for shifts:
+                            // https://github.com/rust-lang-nursery/packed_simd/issues/13
+                            return;
+                        }
+
+                        assert_eq!(o >> z, o);
+                        assert_eq!(t >> z, t);
+                        assert_eq!(f >> z, f);
+                        assert_eq!(f >> max, z);
+
+                        assert_eq!(o >> o, z);
+                        assert_eq!(t >> o, o);
+                        assert_eq!(t >> t, z);
+                        assert_eq!(f >> o, t);
+                        assert_eq!(f >> t, o);
+                        assert_eq!(f >> max, z);
+
+                        // shl
+                        assert_eq!(z << z, z);
+                        assert_eq!(o << z, o);
+                        assert_eq!(t << z, t);
+                        assert_eq!(f << z, f);
+                        assert_eq!(f << max, z);
+
+                        assert_eq!(o << o, t);
+                        assert_eq!(o << t, f);
+                        assert_eq!(t << o, f);
+
+                        {
+                            // shr_assign
+                            let mut v = o;
+                            v >>= o;
+                            assert_eq!(v, z);
+                        }
+                        {
+                            // shl_assign
+                            let mut v = o;
+                            v <<= o;
+                            assert_eq!(v, t);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/ptr.rs b/vendor/packed_simd_2/src/api/ptr.rs
new file mode 100644
index 000000000..d2e523a49
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ptr.rs
@@ -0,0 +1,4 @@
+//! Vector of pointers
+
+#[macro_use]
+mod gather_scatter;
diff --git a/vendor/packed_simd_2/src/api/ptr/gather_scatter.rs b/vendor/packed_simd_2/src/api/ptr/gather_scatter.rs
new file mode 100644
index 000000000..430435620
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/ptr/gather_scatter.rs
@@ -0,0 +1,217 @@
+//! Implements masked gather and scatters for vectors of pointers
+
+macro_rules! impl_ptr_read {
+    ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident
+     | $test_tt:tt) => {
+        impl<T> $id<T>
+        where
+            [T; $elem_count]: sealed::SimdArray,
+        {
+            /// Reads selected vector elements from memory.
+            ///
+            /// Instantiates a new vector by reading the values from `self` for
+            /// those lanes whose `mask` is `true`, and using the elements of
+            /// `value` otherwise.
+            ///
+            /// No memory is accessed for those lanes of `self` whose `mask` is
+            /// `false`.
+            ///
+            /// # Safety
+            ///
+            /// This method is unsafe because it dereferences raw pointers. The
+            /// pointers must be aligned to `mem::align_of::<T>()`.
+            #[inline]
+            pub unsafe fn read<M>(
+                self, mask: Simd<[M; $elem_count]>,
+                value: Simd<[T; $elem_count]>,
+            ) -> Simd<[T; $elem_count]>
+            where
+                M: sealed::Mask,
+                [M; $elem_count]: sealed::SimdArray,
+            {
+                use crate::llvm::simd_gather;
+                Simd(simd_gather(value.0, self.0, mask.0))
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                mod [<$id _read>] {
+                    use super::*;
+                    #[test]
+                    fn read() {
+                        let mut v = [0_i32; $elem_count];
+                        for i in 0..$elem_count {
+                            v[i] = i as i32;
+                        }
+
+                        let mut ptr = $id::<i32>::null();
+
+                        for i in 0..$elem_count {
+                            ptr = ptr.replace(i,
+                                &v[i] as *const i32 as *mut i32
+                            );
+                        }
+
+                        // all mask elements are true:
+                        let mask = $mask_ty::splat(true);
+                        let def = Simd::<[i32; $elem_count]>::splat(42_i32);
+                        let r: Simd<[i32; $elem_count]> = unsafe {
+                            ptr.read(mask, def)
+                        };
+                        assert_eq!(
+                            r,
+                            Simd::<[i32; $elem_count]>::from_slice_unaligned(
+                                &v
+                            )
+                        );
+
+                        let mut mask = mask;
+                        for i in 0..$elem_count {
+                            if i % 2 != 0 {
+                                mask = mask.replace(i, false);
+                            }
+                        }
+
+                        // even mask elements are true, odd ones are false:
+                        let r: Simd<[i32; $elem_count]> = unsafe {
+                            ptr.read(mask, def)
+                        };
+                        let mut e = v;
+                        for i in 0..$elem_count {
+                            if i % 2 != 0 {
+                                e[i] = 42;
+                            }
+                        }
+                        assert_eq!(
+                            r,
+                            Simd::<[i32; $elem_count]>::from_slice_unaligned(
+                                &e
+                            )
+                        );
+
+                        // all mask elements are false:
+                        let mask = $mask_ty::splat(false);
+                        let def = Simd::<[i32; $elem_count]>::splat(42_i32);
+                        let r: Simd<[i32; $elem_count]> = unsafe {
+                            ptr.read(mask, def) }
+                        ;
+                        assert_eq!(r, def);
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_ptr_write {
+    ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident
+     | $test_tt:tt) => {
+        impl<T> $id<T>
+        where
+            [T; $elem_count]: sealed::SimdArray,
+        {
+            /// Writes selected vector elements to memory.
+            ///
+            /// Writes the lanes of `values` for which the mask is `true` to
+            /// their corresponding memory addresses in `self`.
+            ///
+            /// No memory is accessed for those lanes of `self` whose `mask` is
+            /// `false`.
+            ///
+            /// Overlapping memory addresses of `self` are written to in order
+            /// from the lest-significant to the most-significant element.
+            ///
+            /// # Safety
+            ///
+            /// This method is unsafe because it dereferences raw pointers. The
+            /// pointers must be aligned to `mem::align_of::<T>()`.
+            #[inline]
+            pub unsafe fn write<M>(
+                self, mask: Simd<[M; $elem_count]>,
+                value: Simd<[T; $elem_count]>,
+            ) where
+                M: sealed::Mask,
+                [M; $elem_count]: sealed::SimdArray,
+            {
+                use crate::llvm::simd_scatter;
+                simd_scatter(value.0, self.0, mask.0)
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                mod [<$id _write>] {
+                    use super::*;
+                    #[test]
+                    fn write() {
+                        // fourty_two = [42, 42, 42, ...]
+                        let fourty_two
+                            = Simd::<[i32; $elem_count]>::splat(42_i32);
+
+                        // This test will write to this array
+                        let mut arr = [0_i32; $elem_count];
+                        for i in 0..$elem_count {
+                            arr[i] = i as i32;
+                        }
+                        // arr = [0, 1, 2, ...]
+
+                        let mut ptr = $id::<i32>::null();
+                        for i in 0..$elem_count {
+                            ptr = ptr.replace(i, unsafe {
+                                arr.as_ptr().add(i) as *mut i32
+                            });
+                        }
+                        // ptr = [&arr[0], &arr[1], ...]
+
+                        // write `fourty_two` to all elements of `v`
+                        {
+                            let backup = arr;
+                            unsafe {
+                                ptr.write($mask_ty::splat(true), fourty_two)
+                            };
+                            assert_eq!(arr, [42_i32; $elem_count]);
+                            arr = backup;  // arr = [0, 1, 2, ...]
+                        }
+
+                        // write 42 to even elements of arr:
+                        {
+                            // set odd elements of the mask to false
+                            let mut mask = $mask_ty::splat(true);
+                            for i in 0..$elem_count {
+                                if i % 2 != 0 {
+                                    mask = mask.replace(i, false);
+                                }
+                            }
+                            // mask = [true, false, true, false, ...]
+
+                            // expected result r = [42, 1, 42, 3, 42, 5, ...]
+                            let mut r = arr;
+                            for i in 0..$elem_count {
+                                if i % 2 == 0 {
+                                    r[i] = 42;
+                                }
+                            }
+
+                            let backup = arr;
+                            unsafe { ptr.write(mask, fourty_two) };
+                            assert_eq!(arr, r);
+                            arr = backup;  // arr = [0, 1, 2, 3, ...]
+                        }
+
+                        // write 42 to no elements of arr
+                        {
+                            let backup = arr;
+                            unsafe {
+                                ptr.write($mask_ty::splat(false), fourty_two)
+                            };
+                            assert_eq!(arr, backup);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/reductions.rs b/vendor/packed_simd_2/src/api/reductions.rs
new file mode 100644
index 000000000..54d2f0cc7
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/reductions.rs
@@ -0,0 +1,12 @@
+//! Reductions
+
+#[macro_use]
+mod float_arithmetic;
+#[macro_use]
+mod integer_arithmetic;
+#[macro_use]
+mod bitwise;
+#[macro_use]
+mod mask;
+#[macro_use]
+mod min_max;
diff --git a/vendor/packed_simd_2/src/api/reductions/bitwise.rs b/vendor/packed_simd_2/src/api/reductions/bitwise.rs
new file mode 100644
index 000000000..5bad4f474
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/reductions/bitwise.rs
@@ -0,0 +1,151 @@
+//! Implements portable horizontal bitwise vector reductions.
+#![allow(unused)]
+
+macro_rules! impl_reduction_bitwise {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $ielem_ty:ident | $test_tt:tt |
+        ($convert:expr) |
+        ($true:expr, $false:expr)
+    ) => {
+        impl $id {
+            /// Lane-wise bitwise `and` of the vector elements.
+            ///
+            /// Note: if the vector has one lane, the first element of the
+            /// vector is returned.
+            #[inline]
+            pub fn and(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_and;
+                    let r: $ielem_ty = unsafe { simd_reduce_and(self.0) };
+                    $convert(r)
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on aarch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x &= self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+
+            /// Lane-wise bitwise `or` of the vector elements.
+            ///
+            /// Note: if the vector has one lane, the first element of the
+            /// vector is returned.
+            #[inline]
+            pub fn or(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_or;
+                    let r: $ielem_ty = unsafe { simd_reduce_or(self.0) };
+                    $convert(r)
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on aarch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x |= self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+
+            /// Lane-wise bitwise `xor` of the vector elements.
+            ///
+            /// Note: if the vector has one lane, the first element of the
+            /// vector is returned.
+            #[inline]
+            pub fn xor(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_xor;
+                    let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) };
+                    $convert(r)
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on aarch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x ^= self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _reduction_bitwise>] {
+                    use super::*;
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn and() {
+                        let v = $id::splat($false);
+                        assert_eq!(v.and(), $false);
+                        let v = $id::splat($true);
+                        assert_eq!(v.and(), $true);
+                        let v = $id::splat($false);
+                        let v = v.replace(0, $true);
+                        if $id::lanes() > 1 {
+                            assert_eq!(v.and(), $false);
+                        } else {
+                            assert_eq!(v.and(), $true);
+                        }
+                        let v = $id::splat($true);
+                        let v = v.replace(0, $false);
+                        assert_eq!(v.and(), $false);
+
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn or() {
+                        let v = $id::splat($false);
+                        assert_eq!(v.or(), $false);
+                        let v = $id::splat($true);
+                        assert_eq!(v.or(), $true);
+                        let v = $id::splat($false);
+                        let v = v.replace(0, $true);
+                        assert_eq!(v.or(), $true);
+                        let v = $id::splat($true);
+                        let v = v.replace(0, $false);
+                        if $id::lanes() > 1 {
+                            assert_eq!(v.or(), $true);
+                        } else {
+                            assert_eq!(v.or(), $false);
+                        }
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn xor() {
+                        let v = $id::splat($false);
+                        assert_eq!(v.xor(), $false);
+                        let v = $id::splat($true);
+                        if $id::lanes() > 1 {
+                            assert_eq!(v.xor(), $false);
+                        } else {
+                            assert_eq!(v.xor(), $true);
+                        }
+                        let v = $id::splat($false);
+                        let v = v.replace(0, $true);
+                        assert_eq!(v.xor(), $true);
+                        let v = $id::splat($true);
+                        let v = v.replace(0, $false);
+                        if $id::lanes() > 1 {
+                            assert_eq!(v.xor(), $true);
+                        } else {
+                            assert_eq!(v.xor(), $false);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/reductions/float_arithmetic.rs b/vendor/packed_simd_2/src/api/reductions/float_arithmetic.rs
new file mode 100644
index 000000000..4a47452e5
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/reductions/float_arithmetic.rs
@@ -0,0 +1,317 @@
+//! Implements portable horizontal float vector arithmetic reductions.
+
+macro_rules! impl_reduction_float_arithmetic {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Horizontal sum of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`. The resulting `NaN` is not required to be equal to any
+            /// of the `NaN`s in the vector.
+            #[inline]
+            pub fn sum(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_add_ordered;
+                    unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) }
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x += self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+
+            /// Horizontal product of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`. The resulting `NaN` is not required to be equal to any
+            /// of the `NaN`s in the vector.
+            #[inline]
+            pub fn product(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_mul_ordered;
+                    unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) }
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x *= self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+        }
+
+        impl crate::iter::Sum for $id {
+            #[inline]
+            fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
+                iter.fold($id::splat(0.), crate::ops::Add::add)
+            }
+        }
+
+        impl crate::iter::Product for $id {
+            #[inline]
+            fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
+                iter.fold($id::splat(1.), crate::ops::Mul::mul)
+            }
+        }
+
+        impl<'a> crate::iter::Sum<&'a $id> for $id {
+            #[inline]
+            fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+                iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b))
+            }
+        }
+
+        impl<'a> crate::iter::Product<&'a $id> for $id {
+            #[inline]
+            fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+                iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b))
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                pub mod [<$id _reduction_float_arith>] {
+                    use super::*;
+                    fn alternating(x: usize) -> $id {
+                        let mut v = $id::splat(1 as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            if i % x == 0 {
+                                v = v.replace(i, 2 as $elem_ty);
+                            }
+                        }
+                        v
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn sum() {
+                        let v = $id::splat(0 as $elem_ty);
+                        assert_eq!(v.sum(), 0 as $elem_ty);
+                        let v = $id::splat(1 as $elem_ty);
+                        assert_eq!(v.sum(), $id::lanes() as $elem_ty);
+                        let v = alternating(2);
+                        assert_eq!(
+                            v.sum(),
+                            ($id::lanes() / 2 + $id::lanes()) as $elem_ty
+                        );
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn product() {
+                        let v = $id::splat(0 as $elem_ty);
+                        assert_eq!(v.product(), 0 as $elem_ty);
+                        let v = $id::splat(1 as $elem_ty);
+                        assert_eq!(v.product(), 1 as $elem_ty);
+                        let f = match $id::lanes() {
+                            64 => 16,
+                            32 => 8,
+                            16 => 4,
+                            _ => 2,
+                        };
+                        let v = alternating(f);
+                        assert_eq!(
+                            v.product(),
+                            (2_usize.pow(($id::lanes() / f) as u32)
+                             as $elem_ty)
+                        );
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[allow(unreachable_code)]
+                    #[allow(unused_mut)]
+                    // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344
+                    fn sum_nan() {
+                        // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
+                        // https://github.com/rust-lang-nursery/packed_simd/issues/6
+                        return;
+
+                        let n0 = crate::$elem_ty::NAN;
+                        let v0 = $id::splat(-3.0);
+                        for i in 0..$id::lanes() {
+                            let mut v = v0.replace(i, n0);
+                            // If the vector contains a NaN the result is NaN:
+                            assert!(
+                                v.sum().is_nan(),
+                                "nan at {} => {} | {:?}",
+                                i,
+                                v.sum(),
+                                v
+                            );
+                            for j in 0..i {
+                                v = v.replace(j, n0);
+                                assert!(v.sum().is_nan());
+                            }
+                        }
+                        let v = $id::splat(n0);
+                        assert!(v.sum().is_nan(), "all nans | {:?}", v);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[allow(unreachable_code)]
+                    #[allow(unused_mut)]
+                    // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344
+                    fn product_nan() {
+                        // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
+                        // https://github.com/rust-lang-nursery/packed_simd/issues/6
+                        return;
+
+                        let n0 = crate::$elem_ty::NAN;
+                        let v0 = $id::splat(-3.0);
+                        for i in 0..$id::lanes() {
+                            let mut v = v0.replace(i, n0);
+                            // If the vector contains a NaN the result is NaN:
+                            assert!(
+                                v.product().is_nan(),
+                                "nan at {} => {} | {:?}",
+                                i,
+                                v.product(),
+                                v
+                            );
+                            for j in 0..i {
+                                v = v.replace(j, n0);
+                                assert!(v.product().is_nan());
+                            }
+                        }
+                        let v = $id::splat(n0);
+                        assert!(v.product().is_nan(), "all nans | {:?}", v);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[allow(unused, dead_code)]
+                    fn sum_roundoff() {
+                        // Performs a tree-reduction
+                        fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty {
+                            assert!(!a.is_empty());
+                            if a.len() == 1 {
+                                a[0]
+                            } else if a.len() == 2 {
+                                a[0] + a[1]
+                            } else {
+                                let mid = a.len() / 2;
+                                let (left, right) = a.split_at(mid);
+                                tree_reduce_sum(left) + tree_reduce_sum(right)
+                            }
+                        }
+
+                        let mut start = crate::$elem_ty::EPSILON;
+                        let mut scalar_reduction = 0. as $elem_ty;
+
+                        let mut v = $id::splat(0. as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            let c = if i % 2 == 0 { 1e3 } else { -1. };
+                            start *= ::core::$elem_ty::consts::PI * c;
+                            scalar_reduction += start;
+                            v = v.replace(i, start);
+                        }
+                        let simd_reduction = v.sum();
+
+                        let mut a = [0. as $elem_ty; $id::lanes()];
+                        v.write_to_slice_unaligned(&mut a);
+                        let tree_reduction = tree_reduce_sum(&a);
+
+                        // tolerate 1 ULP difference:
+                        let red_bits = simd_reduction.to_bits();
+                        let tree_bits = tree_reduction.to_bits();
+                        assert!(
+                            if red_bits > tree_bits {
+                                red_bits - tree_bits
+                            } else {
+                                tree_bits - red_bits
+                            } < 2,
+                            "vector: {:?} | simd_reduction: {:?} | \
+                             tree_reduction: {} | scalar_reduction: {}",
+                            v,
+                            simd_reduction,
+                            tree_reduction,
+                            scalar_reduction
+                        );
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[allow(unused, dead_code)]
+                    fn product_roundoff() {
+                        use ::core::convert::TryInto;
+                        // Performs a tree-reduction
+                        fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty {
+                            assert!(!a.is_empty());
+                            if a.len() == 1 {
+                                a[0]
+                            } else if a.len() == 2 {
+                                a[0] * a[1]
+                            } else {
+                                let mid = a.len() / 2;
+                                let (left, right) = a.split_at(mid);
+                                tree_reduce_product(left)
+                                    * tree_reduce_product(right)
+                            }
+                        }
+
+                        let mut start = crate::$elem_ty::EPSILON;
+                        let mut scalar_reduction = 1. as $elem_ty;
+
+                        let mut v = $id::splat(0. as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            let c = if i % 2 == 0 { 1e3 } else { -1. };
+                            start *= ::core::$elem_ty::consts::PI * c;
+                            scalar_reduction *= start;
+                            v = v.replace(i, start);
+                        }
+                        let simd_reduction = v.product();
+
+                        let mut a = [0. as $elem_ty; $id::lanes()];
+                        v.write_to_slice_unaligned(&mut a);
+                        let tree_reduction = tree_reduce_product(&a);
+
+                        // FIXME: Too imprecise, even only for product(f32x8).
+                        // Figure out how to narrow this down.
+                        let ulp_limit = $id::lanes() / 2;
+                        let red_bits = simd_reduction.to_bits();
+                        let tree_bits = tree_reduction.to_bits();
+                        assert!(
+                            if red_bits > tree_bits {
+                                red_bits - tree_bits
+                            } else {
+                                tree_bits - red_bits
+                            } < ulp_limit.try_into().unwrap(),
+                            "vector: {:?} | simd_reduction: {:?} | \
+                             tree_reduction: {} | scalar_reduction: {}",
+                            v,
+                            simd_reduction,
+                            tree_reduction,
+                            scalar_reduction
+                        );
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/reductions/integer_arithmetic.rs b/vendor/packed_simd_2/src/api/reductions/integer_arithmetic.rs
new file mode 100644
index 000000000..91dffad31
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/reductions/integer_arithmetic.rs
@@ -0,0 +1,197 @@
+//! Implements portable horizontal integer vector arithmetic reductions.
+
+macro_rules! impl_reduction_integer_arithmetic {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
+     | $test_tt:tt) => {
+        impl $id {
+            /// Horizontal wrapping sum of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            #[inline]
+            pub fn wrapping_sum(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_add_ordered;
+                    let v: $ielem_ty = unsafe {
+                        simd_reduce_add_ordered(self.0, 0 as $ielem_ty)
+                    };
+                    v as $elem_ty
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x = x.wrapping_add(self.extract(i) as $elem_ty);
+                    }
+                    x
+                }
+            }
+
+            /// Horizontal wrapping product of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            #[inline]
+            pub fn wrapping_product(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_mul_ordered;
+                    let v: $ielem_ty = unsafe {
+                        simd_reduce_mul_ordered(self.0, 1 as $ielem_ty)
+                    };
+                    v as $elem_ty
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x = x.wrapping_mul(self.extract(i) as $elem_ty);
+                    }
+                    x
+                }
+            }
+        }
+
+        impl crate::iter::Sum for $id {
+            #[inline]
+            fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
+                iter.fold($id::splat(0), crate::ops::Add::add)
+            }
+        }
+
+        impl crate::iter::Product for $id {
+            #[inline]
+            fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
+                iter.fold($id::splat(1), crate::ops::Mul::mul)
+            }
+        }
+
+        impl<'a> crate::iter::Sum<&'a $id> for $id {
+            #[inline]
+            fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+                iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b))
+            }
+        }
+
+        impl<'a> crate::iter::Product<&'a $id> for $id {
+            #[inline]
+            fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+                iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b))
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _reduction_int_arith>] {
+                    use super::*;
+
+                    fn alternating(x: usize) -> $id {
+                        let mut v = $id::splat(1 as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            if i % x == 0 {
+                                v = v.replace(i, 2 as $elem_ty);
+                            }
+                        }
+                        v
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn wrapping_sum() {
+                        let v = $id::splat(0 as $elem_ty);
+                        assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
+                        let v = $id::splat(1 as $elem_ty);
+                        assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
+                        let v = alternating(2);
+                        if $id::lanes() > 1 {
+                            assert_eq!(
+                                v.wrapping_sum(),
+                                ($id::lanes() / 2 + $id::lanes()) as $elem_ty
+                            );
+                        } else {
+                            assert_eq!(
+                                v.wrapping_sum(),
+                                2 as $elem_ty
+                            );
+                        }
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn wrapping_sum_overflow() {
+                        let start = $elem_ty::max_value()
+                            - ($id::lanes() as $elem_ty / 2);
+
+                        let v = $id::splat(start as $elem_ty);
+                        let vwrapping_sum = v.wrapping_sum();
+
+                        let mut wrapping_sum = start;
+                        for _ in 1..$id::lanes() {
+                            wrapping_sum = wrapping_sum.wrapping_add(start);
+                        }
+                        assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn wrapping_product() {
+                        let v = $id::splat(0 as $elem_ty);
+                        assert_eq!(v.wrapping_product(), 0 as $elem_ty);
+                        let v = $id::splat(1 as $elem_ty);
+                        assert_eq!(v.wrapping_product(), 1 as $elem_ty);
+                        let f = match $id::lanes() {
+                            64 => 16,
+                            32 => 8,
+                            16 => 4,
+                            _ => 2,
+                        };
+                        let v = alternating(f);
+                        if $id::lanes() > 1 {
+                            assert_eq!(
+                                v.wrapping_product(),
+                                (2_usize.pow(($id::lanes() / f) as u32)
+                                 as $elem_ty)
+                            );
+                        } else {
+                            assert_eq!(
+                                v.wrapping_product(),
+                                2 as $elem_ty
+                            );
+                        }
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn wrapping_product_overflow() {
+                        let start = $elem_ty::max_value()
+                            - ($id::lanes() as $elem_ty / 2);
+
+                        let v = $id::splat(start as $elem_ty);
+                        let vmul = v.wrapping_product();
+
+                        let mut mul = start;
+                        for _ in 1..$id::lanes() {
+                            mul = mul.wrapping_mul(start);
+                        }
+                        assert_eq!(mul, vmul, "v = {:?}", v);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/reductions/mask.rs b/vendor/packed_simd_2/src/api/reductions/mask.rs
new file mode 100644
index 000000000..0dd6a84e7
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/reductions/mask.rs
@@ -0,0 +1,89 @@
+//! Implements portable horizontal mask reductions.
+
+macro_rules! impl_reduction_mask {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Are `all` vector lanes `true`?
+            #[inline]
+            pub fn all(self) -> bool {
+                unsafe { crate::codegen::reductions::mask::All::all(self) }
+            }
+            /// Is `any` vector lane `true`?
+            #[inline]
+            pub fn any(self) -> bool {
+                unsafe { crate::codegen::reductions::mask::Any::any(self) }
+            }
+            /// Are `all` vector lanes `false`?
+            #[inline]
+            pub fn none(self) -> bool {
+                !self.any()
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _reduction>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn all() {
+                        let a = $id::splat(true);
+                        assert!(a.all());
+                        let a = $id::splat(false);
+                        assert!(!a.all());
+
+                        if $id::lanes() > 1 {
+                            for i in 0..$id::lanes() {
+                                let mut a = $id::splat(true);
+                                a = a.replace(i, false);
+                                assert!(!a.all());
+                                let mut a = $id::splat(false);
+                                a = a.replace(i, true);
+                                assert!(!a.all());
+                            }
+                        }
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn any() {
+                        let a = $id::splat(true);
+                        assert!(a.any());
+                        let a = $id::splat(false);
+                        assert!(!a.any());
+
+                        if $id::lanes() > 1 {
+                            for i in 0..$id::lanes() {
+                                let mut a = $id::splat(true);
+                                a = a.replace(i, false);
+                                assert!(a.any());
+                                let mut a = $id::splat(false);
+                                a = a.replace(i, true);
+                                assert!(a.any());
+                            }
+                        }
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn none() {
+                        let a = $id::splat(true);
+                        assert!(!a.none());
+                        let a = $id::splat(false);
+                        assert!(a.none());
+
+                        if $id::lanes() > 1 {
+                            for i in 0..$id::lanes() {
+                                let mut a = $id::splat(true);
+                                a = a.replace(i, false);
+                                assert!(!a.none());
+                                let mut a = $id::splat(false);
+                                a = a.replace(i, true);
+                                assert!(!a.none());
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/reductions/min_max.rs b/vendor/packed_simd_2/src/api/reductions/min_max.rs
new file mode 100644
index 000000000..c4c1400a8
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/reductions/min_max.rs
@@ -0,0 +1,381 @@
+//! Implements portable horizontal vector min/max reductions.
+
+macro_rules! impl_reduction_min_max {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident
+     | $ielem_ty:ident | $test_tt:tt) => {
+        impl $id {
+            /// Largest vector element value.
+            #[inline]
+            pub fn max_element(self) -> $elem_ty {
+                #[cfg(not(any(
+                    target_arch = "aarch64",
+                    target_arch = "arm",
+                    target_arch = "powerpc64",
+                    target_arch = "wasm32",
+                )))]
+                {
+                    use crate::llvm::simd_reduce_max;
+                    let v: $ielem_ty = unsafe { simd_reduce_max(self.0) };
+                    v as $elem_ty
+                }
+                #[cfg(any(
+                    target_arch = "aarch64",
+                    target_arch = "arm",
+                    target_arch = "powerpc64",
+                    target_arch = "wasm32",
+                ))]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    // FIXME: broken on WASM32
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/91
+                    let mut x = self.extract(0);
+                    for i in 1..$id::lanes() {
+                        x = x.max(self.extract(i));
+                    }
+                    x
+                }
+            }
+
+            /// Smallest vector element value.
+            #[inline]
+            pub fn min_element(self) -> $elem_ty {
+                #[cfg(not(any(
+                    target_arch = "aarch64",
+                    target_arch = "arm",
+                    all(target_arch = "x86", not(target_feature = "sse2")),
+                    target_arch = "powerpc64",
+                    target_arch = "wasm32",
+                ),))]
+                {
+                    use crate::llvm::simd_reduce_min;
+                    let v: $ielem_ty = unsafe { simd_reduce_min(self.0) };
+                    v as $elem_ty
+                }
+                #[cfg(any(
+                    target_arch = "aarch64",
+                    target_arch = "arm",
+                    all(target_arch = "x86", not(target_feature = "sse2")),
+                    target_arch = "powerpc64",
+                    target_arch = "wasm32",
+                ))]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    // FIXME: broken on i586-unknown-linux-gnu
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/22
+                    // FIXME: broken on WASM32
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/91
+                    let mut x = self.extract(0);
+                    for i in 1..$id::lanes() {
+                        x = x.min(self.extract(i));
+                    }
+                    x
+                }
+            }
+        }
+        test_if! {$test_tt:
+        paste::item! {
+            // Comparisons use integer casts within mantissa^1 range.
+            #[allow(clippy::float_cmp)]
+            pub mod [<$id _reduction_min_max>] {
+                use super::*;
+                #[cfg_attr(not(target_arch = "wasm32"), test)]
+                #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                pub fn max_element() {
+                    let v = $id::splat(0 as $elem_ty);
+                    assert_eq!(v.max_element(), 0 as $elem_ty);
+                    if $id::lanes() > 1 {
+                        let v = v.replace(1, 1 as $elem_ty);
+                        assert_eq!(v.max_element(), 1 as $elem_ty);
+                    }
+                    let v = v.replace(0, 2 as $elem_ty);
+                    assert_eq!(v.max_element(), 2 as $elem_ty);
+                }
+
+                #[cfg_attr(not(target_arch = "wasm32"), test)]
+                #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                pub fn min_element() {
+                    let v = $id::splat(0 as $elem_ty);
+                    assert_eq!(v.min_element(), 0 as $elem_ty);
+                    if $id::lanes() > 1 {
+                        let v = v.replace(1, 1 as $elem_ty);
+                        assert_eq!(v.min_element(), 0 as $elem_ty);
+                    }
+                    let v = $id::splat(1 as $elem_ty);
+                    let v = v.replace(0, 2 as $elem_ty);
+                    if $id::lanes() > 1 {
+                        assert_eq!(v.min_element(), 1 as $elem_ty);
+                    } else {
+                        assert_eq!(v.min_element(), 2 as $elem_ty);
+                    }
+                    if $id::lanes() > 1 {
+                        let v = $id::splat(2 as $elem_ty);
+                        let v = v.replace(1, 1 as $elem_ty);
+                        assert_eq!(v.min_element(), 1 as $elem_ty);
+                    }
+                }
+            }
+        }
+        }
+    };
+}
+
+macro_rules! test_reduction_float_min_max {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        test_if!{
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                pub mod [<$id _reduction_min_max_nan>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn min_element_test() {
+                        let n = crate::$elem_ty::NAN;
+
+                        assert_eq!(n.min(-3.), -3.);
+                        assert_eq!((-3. as $elem_ty).min(n), -3.);
+
+                        let v0 = $id::splat(-3.);
+
+                        let target_with_broken_last_lane_nan = !cfg!(any(
+                            target_arch = "arm", target_arch = "aarch64",
+                            all(target_arch = "x86",
+                                not(target_feature = "sse2")
+                            ),
+                            target_arch = "powerpc64",
+                            target_arch = "wasm32",
+                        ));
+
+                        // The vector is initialized to `-3.`s: [-3, -3, -3, -3]
+                        for i in 0..$id::lanes() {
+                            // We replace the i-th element of the vector with
+                            // `NaN`: [-3, -3, -3, NaN]
+                            let mut v = v0.replace(i, n);
+
+                            // If the NaN is in the last place, the LLVM
+                            // implementation of these methods is broken on some
+                            // targets:
+                            if i == $id::lanes() - 1 &&
+                                target_with_broken_last_lane_nan {
+                                // FIXME:
+                                // https://github.com/rust-lang-nursery/packed_simd/issues/5
+                                //
+                                // If there is a NaN, the result should always
+                                // the smallest element, but currently when the
+                                // last element is NaN the current
+                                // implementation incorrectly returns NaN.
+                                //
+                                // The targets mentioned above use different
+                                // codegen that produces the correct result.
+                                //
+                                // These asserts detect if this behavior changes
+                                    assert!(v.min_element().is_nan(),
+                                            // FIXME: ^^^ should be -3.
+                                            "[A]: nan at {} => {} | {:?}",
+                                            i, v.min_element(), v);
+
+                                // If we replace all the elements in the vector
+                                // up-to the `i-th` lane with `NaN`s, the result
+                                // is still always `-3.` unless all elements of
+                                // the vector are `NaN`s:
+                                //
+                                // This is also broken:
+                                for j in 0..i {
+                                    v = v.replace(j, n);
+                                    assert!(v.min_element().is_nan(),
+                                            // FIXME: ^^^ should be -3.
+                                            "[B]: nan at {} => {} | {:?}",
+                                            i, v.min_element(), v);
+                                }
+
+                                // We are done here, since we were in the last
+                                // lane which is the last iteration of the loop.
+                                break
+                            }
+
+                            // We are not in the last lane, and there is only
+                            // one `NaN` in the vector.
+
+                            // If the vector has one lane, the result is `NaN`:
+                            if $id::lanes() == 1 {
+                                assert!(v.min_element().is_nan(),
+                                        "[C]: all nans | v={:?} | min={} | \
+                                         is_nan: {}",
+                                        v, v.min_element(),
+                                        v.min_element().is_nan()
+                                );
+
+                                // And we are done, since the vector only has
+                                // one lane anyways.
+                                break;
+                            }
+
+                            // The vector has more than one lane, since there is
+                            // only one `NaN` in the vector, the result is
+                            // always `-3`.
+                            assert_eq!(v.min_element(), -3.,
+                                       "[D]: nan at {} => {} | {:?}",
+                                       i, v.min_element(), v);
+
+                            // If we replace all the elements in the vector
+                            // up-to the `i-th` lane with `NaN`s, the result is
+                            // still always `-3.` unless all elements of the
+                            // vector are `NaN`s:
+                            for j in 0..i {
+                                v = v.replace(j, n);
+
+                                if i == $id::lanes() - 1 && j == i - 1 {
+                                    // All elements of the vector are `NaN`s,
+                                    // therefore the result is NaN as well.
+                                    //
+                                    // Note: the #lanes of the vector is > 1, so
+                                    // "i - 1" does not overflow.
+                                    assert!(v.min_element().is_nan(),
+                                            "[E]: all nans | v={:?} | min={} | \
+                                             is_nan: {}",
+                                            v, v.min_element(),
+                                            v.min_element().is_nan());
+                                } else {
+                                    // There are non-`NaN` elements in the
+                                    // vector, therefore the result is `-3.`:
+                                    assert_eq!(v.min_element(), -3.,
+                                               "[F]: nan at {} => {} | {:?}",
+                                               i, v.min_element(), v);
+                                }
+                            }
+                        }
+
+                        // If the vector contains all NaNs the result is NaN:
+                        assert!($id::splat(n).min_element().is_nan(),
+                                "all nans | v={:?} | min={} | is_nan: {}",
+                                $id::splat(n), $id::splat(n).min_element(),
+                                $id::splat(n).min_element().is_nan());
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn max_element_test() {
+                        let n = crate::$elem_ty::NAN;
+
+                        assert_eq!(n.max(-3.), -3.);
+                        assert_eq!((-3. as $elem_ty).max(n), -3.);
+
+                        let v0 = $id::splat(-3.);
+
+                        let target_with_broken_last_lane_nan = !cfg!(any(
+                            target_arch = "arm", target_arch = "aarch64",
+                            target_arch = "powerpc64", target_arch = "wasm32",
+                        ));
+
+                        // The vector is initialized to `-3.`s: [-3, -3, -3, -3]
+                        for i in 0..$id::lanes() {
+                            // We replace the i-th element of the vector with
+                            // `NaN`: [-3, -3, -3, NaN]
+                            let mut v = v0.replace(i, n);
+
+                            // If the NaN is in the last place, the LLVM
+                            // implementation of these methods is broken on some
+                            // targets:
+                            if i == $id::lanes() - 1 &&
+                              target_with_broken_last_lane_nan {
+                                // FIXME:
+                                // https://github.com/rust-lang-nursery/packed_simd/issues/5
+                                //
+                                // If there is a NaN, the result should
+                                // always the largest element, but currently
+                                // when the last element is NaN the current
+                                // implementation incorrectly returns NaN.
+                                //
+                                // The targets mentioned above use different
+                                // codegen that produces the correct result.
+                                //
+                                // These asserts detect if this behavior
+                                // changes
+                                assert!(v.max_element().is_nan(),
+                                        // FIXME: ^^^ should be -3.
+                                        "[A]: nan at {} => {} | {:?}",
+                                        i, v.max_element(), v);
+
+                                // If we replace all the elements in the vector
+                                // up-to the `i-th` lane with `NaN`s, the result
+                                // is still always `-3.` unless all elements of
+                                // the vector are `NaN`s:
+                                //
+                                // This is also broken:
+                                for j in 0..i {
+                                    v = v.replace(j, n);
+                                    assert!(v.max_element().is_nan(),
+                                            // FIXME: ^^^ should be -3.
+                                            "[B]: nan at {} => {} | {:?}",
+                                            i, v.max_element(), v);
+                                }
+
+                                // We are done here, since we were in the last
+                                // lane which is the last iteration of the loop.
+                                break
+                            }
+
+                            // We are not in the last lane, and there is only
+                            // one `NaN` in the vector.
+
+                            // If the vector has one lane, the result is `NaN`:
+                            if $id::lanes() == 1 {
+                                assert!(v.max_element().is_nan(),
+                                        "[C]: all nans | v={:?} | min={} | \
+                                         is_nan: {}",
+                                        v, v.max_element(),
+                                        v.max_element().is_nan());
+
+                                // And we are done, since the vector only has
+                                // one lane anyways.
+                                break;
+                            }
+
+                            // The vector has more than one lane, since there is
+                            // only one `NaN` in the vector, the result is
+                            // always `-3`.
+                            assert_eq!(v.max_element(), -3.,
+                                       "[D]: nan at {} => {} | {:?}",
+                                       i, v.max_element(), v);
+
+                            // If we replace all the elements in the vector
+                            // up-to the `i-th` lane with `NaN`s, the result is
+                            // still always `-3.` unless all elements of the
+                            // vector are `NaN`s:
+                            for j in 0..i {
+                                v = v.replace(j, n);
+
+                                if i == $id::lanes() - 1 && j == i - 1 {
+                                    // All elements of the vector are `NaN`s,
+                                    // therefore the result is NaN as well.
+                                    //
+                                    // Note: the #lanes of the vector is > 1, so
+                                    // "i - 1" does not overflow.
+                                    assert!(v.max_element().is_nan(),
+                                            "[E]: all nans | v={:?} | max={} | \
+                                             is_nan: {}",
+                                            v, v.max_element(),
+                                            v.max_element().is_nan());
+                                } else {
+                                    // There are non-`NaN` elements in the
+                                    // vector, therefore the result is `-3.`:
+                                    assert_eq!(v.max_element(), -3.,
+                                               "[F]: nan at {} => {} | {:?}",
+                                               i, v.max_element(), v);
+                                }
+                            }
+                        }
+
+                        // If the vector contains all NaNs the result is NaN:
+                        assert!($id::splat(n).max_element().is_nan(),
+                                "all nans | v={:?} | max={} | is_nan: {}",
+                                $id::splat(n), $id::splat(n).max_element(),
+                                $id::splat(n).max_element().is_nan());
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/vendor/packed_simd_2/src/api/select.rs b/vendor/packed_simd_2/src/api/select.rs
new file mode 100644
index 000000000..24525df56
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/select.rs
@@ -0,0 +1,75 @@
+//! Implements mask's `select`.
+
+/// Implements mask select method
+macro_rules! impl_select {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Selects elements of `a` and `b` using mask.
+            ///
+            /// The lanes of the result for which the mask is `true` contain
+            /// the values of `a`. The remaining lanes contain the values of
+            /// `b`.
+            #[inline]
+            pub fn select<T>(self, a: Simd<T>, b: Simd<T>) -> Simd<T>
+            where
+                T: sealed::SimdArray<
+                    NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT,
+                >,
+            {
+                use crate::llvm::simd_select;
+                Simd(unsafe { simd_select(self.0, a.0, b.0) })
+            }
+        }
+
+        test_select!(bool, $id, $id, (false, true) | $test_tt);
+    };
+}
+
+macro_rules! test_select {
+    (
+        $elem_ty:ident,
+        $mask_ty:ident,
+        $vec_ty:ident,($small:expr, $large:expr) |
+        $test_tt:tt
+    ) => {
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$vec_ty _select>] {
+                    use super::*;
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn select() {
+                        let o = $small as $elem_ty;
+                        let t = $large as $elem_ty;
+
+                        let a = $vec_ty::splat(o);
+                        let b = $vec_ty::splat(t);
+                        let m = a.lt(b);
+                        assert_eq!(m.select(a, b), a);
+
+                        let m = b.lt(a);
+                        assert_eq!(m.select(b, a), a);
+
+                        let mut c = a;
+                        let mut d = b;
+                        let mut m_e = $mask_ty::splat(false);
+                        for i in 0..$vec_ty::lanes() {
+                            if i % 2 == 0 {
+                                let c_tmp = c.extract(i);
+                                c = c.replace(i, d.extract(i));
+                                d = d.replace(i, c_tmp);
+                            } else {
+                                m_e = m_e.replace(i, true);
+                            }
+                        }
+
+                        let m = c.lt(d);
+                        assert_eq!(m_e, m);
+                        assert_eq!(m.select(c, d), a);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/shuffle.rs b/vendor/packed_simd_2/src/api/shuffle.rs
new file mode 100644
index 000000000..13a7fae5f
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/shuffle.rs
@@ -0,0 +1,190 @@
+//! Implements portable vector shuffles with immediate indices.
+
+// FIXME: comprehensive tests
+// https://github.com/rust-lang-nursery/packed_simd/issues/20
+
+/// Shuffles vector elements.
+///
+/// This macro returns a new vector that contains a shuffle of the elements in
+/// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1,
+/// [indices...])`) input vectors.
+///
+/// The type of `vec0` and `vec1` must be equal, and the element type of the
+/// resulting vector is the element type of the input vector.
+///
+/// The number of `indices` must be a power-of-two in range `[0, 64)`, since
+/// currently, the largest vector supported by the library has 64 lanes. The
+/// length of the resulting vector equals the number of indices provided.
+///
+/// The indices must be in range `[0, M * N)` where `M` is the number of input
+/// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors.
+/// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`,
+/// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of
+/// `vec1`.
+///
+/// # Examples
+///
+/// Shuffling elements of two vectors:
+///
+/// ```
+/// # #[macro_use]
+/// # extern crate packed_simd;
+/// # use packed_simd::*;
+/// # fn main() {
+/// // Shuffle allows reordering the elements:
+/// let x = i32x4::new(1, 2, 3, 4);
+/// let y = i32x4::new(5, 6, 7, 8);
+/// let r = shuffle!(x, y, [4, 0, 5, 1]);
+/// assert_eq!(r, i32x4::new(5, 1, 6, 2));
+///
+/// // The resulting vector can als be smaller than the input:
+/// let r = shuffle!(x, y, [1, 6]);
+/// assert_eq!(r, i32x2::new(2, 7));
+///
+/// // Or larger:
+/// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]);
+/// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3));
+/// // At most 2 * the number of lanes in the input vector.
+/// # }
+/// ```
+///
+/// Shuffling elements of one vector:
+///
+/// ```
+/// # #[macro_use]
+/// # extern crate packed_simd;
+/// # use packed_simd::*;
+/// # fn main() {
+/// // Shuffle allows reordering the elements of a vector:
+/// let x = i32x4::new(1, 2, 3, 4);
+/// let r = shuffle!(x, [2, 1, 3, 0]);
+/// assert_eq!(r, i32x4::new(3, 2, 4, 1));
+///
+/// // The resulting vector can be smaller than the input:
+/// let r = shuffle!(x, [1, 3]);
+/// assert_eq!(r, i32x2::new(2, 4));
+///
+/// // Equal:
+/// let r = shuffle!(x, [1, 3, 2, 0]);
+/// assert_eq!(r, i32x4::new(2, 4, 3, 1));
+///
+/// // Or larger:
+/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]);
+/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3));
+/// // At most 2 * the number of lanes in the input vector.
+/// # }
+/// ```
+#[macro_export]
+macro_rules! shuffle {
+    ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{
+        #[allow(unused_unsafe)]
+        unsafe {
+            $crate::Simd($crate::__shuffle_vector2(
+                $vec0.0,
+                $vec1.0,
+                [$l0, $l1],
+            ))
+        }
+    }};
+    ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{
+        #[allow(unused_unsafe)]
+        unsafe {
+            $crate::Simd($crate::__shuffle_vector4(
+                $vec0.0,
+                $vec1.0,
+                [$l0, $l1, $l2, $l3],
+            ))
+        }
+    }};
+    ($vec0:expr, $vec1:expr,
+     [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+      $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{
+        #[allow(unused_unsafe)]
+        unsafe {
+            $crate::Simd($crate::__shuffle_vector8(
+                $vec0.0,
+                $vec1.0,
+                [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7],
+            ))
+        }
+    }};
+    ($vec0:expr, $vec1:expr,
+     [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+      $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+      $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+      $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{
+        #[allow(unused_unsafe)]
+        unsafe {
+            $crate::Simd($crate::__shuffle_vector16(
+                $vec0.0,
+                $vec1.0,
+                [
+                    $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
+                    $l11, $l12, $l13, $l14, $l15,
+                ],
+            ))
+        }
+    }};
+    ($vec0:expr, $vec1:expr,
+     [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+      $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+      $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+      $l12:expr, $l13:expr, $l14:expr, $l15:expr,
+      $l16:expr, $l17:expr, $l18:expr, $l19:expr,
+      $l20:expr, $l21:expr, $l22:expr, $l23:expr,
+      $l24:expr, $l25:expr, $l26:expr, $l27:expr,
+      $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{
+        #[allow(unused_unsafe)]
+        unsafe {
+            $crate::Simd($crate::__shuffle_vector32(
+                $vec0.0,
+                $vec1.0,
+                [
+                    $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
+                    $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19,
+                    $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28,
+                    $l29, $l30, $l31,
+                ],
+            ))
+        }
+    }};
+    ($vec0:expr, $vec1:expr,
+     [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+      $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+      $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+      $l12:expr, $l13:expr, $l14:expr, $l15:expr,
+      $l16:expr, $l17:expr, $l18:expr, $l19:expr,
+      $l20:expr, $l21:expr, $l22:expr, $l23:expr,
+      $l24:expr, $l25:expr, $l26:expr, $l27:expr,
+      $l28:expr, $l29:expr, $l30:expr, $l31:expr,
+      $l32:expr, $l33:expr, $l34:expr, $l35:expr,
+      $l36:expr, $l37:expr, $l38:expr, $l39:expr,
+      $l40:expr, $l41:expr, $l42:expr, $l43:expr,
+      $l44:expr, $l45:expr, $l46:expr, $l47:expr,
+      $l48:expr, $l49:expr, $l50:expr, $l51:expr,
+      $l52:expr, $l53:expr, $l54:expr, $l55:expr,
+      $l56:expr, $l57:expr, $l58:expr, $l59:expr,
+      $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{
+        #[allow(unused_unsafe)]
+        unsafe {
+            $crate::Simd($crate::__shuffle_vector64(
+                $vec0.0,
+                $vec1.0,
+                [
+                    $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
+                    $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19,
+                    $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28,
+                    $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37,
+                    $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46,
+                    $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55,
+                    $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63,
+                ],
+            ))
+        }
+     }};
+    ($vec:expr, [$($l:expr),*]) => {
+        match $vec {
+            v => shuffle!(v, v, [$($l),*])
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/shuffle1_dyn.rs b/vendor/packed_simd_2/src/api/shuffle1_dyn.rs
new file mode 100644
index 000000000..64536be6c
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/shuffle1_dyn.rs
@@ -0,0 +1,159 @@
+//! Shuffle vector elements according to a dynamic vector of indices.
+
+macro_rules! impl_shuffle1_dyn {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Shuffle vector elements according to `indices`.
+            #[inline]
+            pub fn shuffle1_dyn<I>(self, indices: I) -> Self
+            where
+                Self: codegen::shuffle1_dyn::Shuffle1Dyn<Indices = I>,
+            {
+                codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices)
+            }
+        }
+    };
+}
+
+macro_rules! test_shuffle1_dyn {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _shuffle1_dyn>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn shuffle1_dyn() {
+                        let increasing = {
+                            let mut v = $id::splat(0 as $elem_ty);
+                            for i in 0..$id::lanes() {
+                                v = v.replace(i, i as $elem_ty);
+                            }
+                            v
+                        };
+                        let decreasing = {
+                            let mut v = $id::splat(0 as $elem_ty);
+                            for i in 0..$id::lanes() {
+                                v = v.replace(
+                                    i,
+                                    ($id::lanes() - 1 - i) as $elem_ty
+                                );
+                            }
+                            v
+                        };
+
+                        type Indices = <
+                            $id as codegen::shuffle1_dyn::Shuffle1Dyn
+                            >::Indices;
+                        let increasing_ids: Indices = increasing.cast();
+                        let decreasing_ids: Indices = decreasing.cast();
+
+                        assert_eq!(
+                            increasing.shuffle1_dyn(increasing_ids),
+                            increasing,
+                            "(i,i)=>i"
+                        );
+                        assert_eq!(
+                            decreasing.shuffle1_dyn(increasing_ids),
+                            decreasing,
+                            "(d,i)=>d"
+                        );
+                        assert_eq!(
+                            increasing.shuffle1_dyn(decreasing_ids),
+                            decreasing,
+                            "(i,d)=>d"
+                        );
+                        assert_eq!(
+                            decreasing.shuffle1_dyn(decreasing_ids),
+                            increasing,
+                            "(d,d)=>i"
+                        );
+
+                        for i in 0..$id::lanes() {
+                            let v_ids: Indices
+                                = $id::splat(i as $elem_ty).cast();
+                            assert_eq!(increasing.shuffle1_dyn(v_ids),
+                                       $id::splat(increasing.extract(i))
+                            );
+                            assert_eq!(decreasing.shuffle1_dyn(v_ids),
+                                       $id::splat(decreasing.extract(i))
+                            );
+                            assert_eq!(
+                                $id::splat(i as $elem_ty)
+                                    .shuffle1_dyn(increasing_ids),
+                                $id::splat(i as $elem_ty)
+                            );
+                            assert_eq!(
+                                $id::splat(i as $elem_ty)
+                                    .shuffle1_dyn(decreasing_ids),
+                                $id::splat(i as $elem_ty)
+                            );
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! test_shuffle1_dyn_mask {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _shuffle1_dyn>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn shuffle1_dyn() {
+                        // alternating = [true, false, true, false, ...]
+                        let mut alternating = $id::splat(false);
+                        for i in 0..$id::lanes() {
+                            if i % 2 == 0 {
+                                alternating = alternating.replace(i, true);
+                            }
+                        }
+
+                        type Indices = <
+                            $id as codegen::shuffle1_dyn::Shuffle1Dyn
+                            >::Indices;
+                        // even = [0, 0, 2, 2, 4, 4, ..]
+                        let even = {
+                            let mut v = Indices::splat(0);
+                            for i in 0..$id::lanes() {
+                                if i % 2 == 0 {
+                                    v = v.replace(i, (i as u8).into());
+                                } else {
+                                    v = v.replace(i, (i as u8 - 1).into());
+                                }
+                            }
+                            v
+                        };
+                        // odd = [1, 1, 3, 3, 5, 5, ...]
+                        let odd = {
+                            let mut v = Indices::splat(0);
+                            for i in 0..$id::lanes() {
+                                if i % 2 != 0 {
+                                    v = v.replace(i, (i as u8).into());
+                                } else {
+                                    v = v.replace(i, (i as u8 + 1).into());
+                                }
+                            }
+                            v
+                        };
+
+                        assert_eq!(
+                            alternating.shuffle1_dyn(even),
+                            $id::splat(true)
+                        );
+                        if $id::lanes() > 1 {
+                            assert_eq!(
+                                alternating.shuffle1_dyn(odd),
+                                $id::splat(false)
+                            );
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/slice.rs b/vendor/packed_simd_2/src/api/slice.rs
new file mode 100644
index 000000000..526b848b5
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/slice.rs
@@ -0,0 +1,7 @@
+//! Slice from/to methods
+
+#[macro_use]
+mod from_slice;
+
+#[macro_use]
+mod write_to_slice;
diff --git a/vendor/packed_simd_2/src/api/slice/from_slice.rs b/vendor/packed_simd_2/src/api/slice/from_slice.rs
new file mode 100644
index 000000000..25082d1e6
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/slice/from_slice.rs
@@ -0,0 +1,218 @@
+//! Implements methods to read a vector type from a slice.
+
+macro_rules! impl_slice_from_slice {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Instantiates a new vector with the values of the `slice`.
+            ///
+            /// # Panics
+            ///
+            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
+            /// to an `align_of::<Self>()` boundary.
+            #[inline]
+            pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self {
+                unsafe {
+                    assert!(slice.len() >= $elem_count);
+                    let target_ptr = slice.get_unchecked(0) as *const $elem_ty;
+                    assert_eq!(
+                        target_ptr
+                            .align_offset(crate::mem::align_of::<Self>()),
+                        0
+                    );
+                    Self::from_slice_aligned_unchecked(slice)
+                }
+            }
+
+            /// Instantiates a new vector with the values of the `slice`.
+            ///
+            /// # Panics
+            ///
+            /// If `slice.len() < Self::lanes()`.
+            #[inline]
+            pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self {
+                unsafe {
+                    assert!(slice.len() >= $elem_count);
+                    Self::from_slice_unaligned_unchecked(slice)
+                }
+            }
+
+            /// Instantiates a new vector with the values of the `slice`.
+            ///
+            /// # Safety
+            ///
+            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
+            /// to an `align_of::<Self>()` boundary, the behavior is undefined.
+            #[inline]
+            pub unsafe fn from_slice_aligned_unchecked(
+                slice: &[$elem_ty],
+            ) -> Self {
+                debug_assert!(slice.len() >= $elem_count);
+                let target_ptr = slice.get_unchecked(0) as *const $elem_ty;
+                debug_assert_eq!(
+                    target_ptr.align_offset(crate::mem::align_of::<Self>()),
+                    0
+                );
+
+                #[allow(clippy::cast_ptr_alignment)]
+                *(target_ptr as *const Self)
+            }
+
+            /// Instantiates a new vector with the values of the `slice`.
+            ///
+            /// # Safety
+            ///
+            /// If `slice.len() < Self::lanes()` the behavior is undefined.
+            #[inline]
+            pub unsafe fn from_slice_unaligned_unchecked(
+                slice: &[$elem_ty],
+            ) -> Self {
+                use crate::mem::size_of;
+                debug_assert!(slice.len() >= $elem_count);
+                let target_ptr =
+                    slice.get_unchecked(0) as *const $elem_ty as *const u8;
+                let mut x = Self::splat(0 as $elem_ty);
+                let self_ptr = &mut x as *mut Self as *mut u8;
+                crate::ptr::copy_nonoverlapping(
+                    target_ptr,
+                    self_ptr,
+                    size_of::<Self>(),
+                );
+                x
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                pub mod [<$id _slice_from_slice>] {
+                    use super::*;
+                    use crate::iter::Iterator;
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn from_slice_unaligned() {
+                        let mut unaligned = [42 as $elem_ty; $id::lanes() + 1];
+                        unaligned[0] = 0 as $elem_ty;
+                        let vec = $id::from_slice_unaligned(&unaligned[1..]);
+                        for (index, &b) in unaligned.iter().enumerate() {
+                            if index == 0 {
+                                assert_eq!(b, 0 as $elem_ty);
+                            } else {
+                                assert_eq!(b, 42 as $elem_ty);
+                                assert_eq!(b, vec.extract(index - 1));
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn from_slice_unaligned_fail() {
+                        let mut unaligned = [42 as $elem_ty; $id::lanes() + 1];
+                        unaligned[0] = 0 as $elem_ty;
+                        // the slice is not large enough => panic
+                        let _vec = $id::from_slice_unaligned(&unaligned[2..]);
+                    }
+
+                    union A {
+                        data: [$elem_ty; 2 * $id::lanes()],
+                        _vec: $id,
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn from_slice_aligned() {
+                        let mut aligned = A {
+                            data: [0 as $elem_ty; 2 * $id::lanes()],
+                        };
+                        for i in $id::lanes()..(2 * $id::lanes()) {
+                            unsafe {
+                                aligned.data[i] = 42 as $elem_ty;
+                            }
+                        }
+
+                        let vec = unsafe {
+                            $id::from_slice_aligned(
+                                &aligned.data[$id::lanes()..]
+                            )
+                        };
+                        for (index, &b) in
+                            unsafe { aligned.data.iter().enumerate() } {
+                            if index < $id::lanes() {
+                                assert_eq!(b, 0 as $elem_ty);
+                            } else {
+                                assert_eq!(b, 42 as $elem_ty);
+                                assert_eq!(
+                                    b, vec.extract(index - $id::lanes())
+                                );
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn from_slice_aligned_fail_lanes() {
+                        let aligned = A {
+                            data: [0 as $elem_ty; 2 * $id::lanes()],
+                        };
+                        let _vec = unsafe {
+                            $id::from_slice_aligned(
+                                &aligned.data[2 * $id::lanes()..]
+                            )
+                        };
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn from_slice_aligned_fail_align() {
+                        unsafe {
+                            let aligned = A {
+                                data: [0 as $elem_ty; 2 * $id::lanes()],
+                            };
+
+                            // get a pointer to the front of data
+                            let ptr: *const $elem_ty = aligned.data.as_ptr()
+                                as *const $elem_ty;
+                            // offset pointer by one element
+                            let ptr = ptr.wrapping_add(1);
+
+                            if ptr.align_offset(
+                                crate::mem::align_of::<$id>()
+                            ) == 0 {
+                                // the pointer is properly aligned, so
+                                // from_slice_aligned won't fail here (e.g. this
+                                // can happen for i128x1). So we panic to make
+                                // the "should_fail" test pass:
+                                panic!("ok");
+                            }
+
+                            // create a slice - this is safe, because the
+                            // elements of the slice exist, are properly
+                            // initialized, and properly aligned:
+                            let s: &[$elem_ty] = slice::from_raw_parts(
+                                ptr, $id::lanes()
+                            );
+                            // this should always panic because the slice
+                            // alignment does not match the alignment
+                            // requirements for the vector type:
+                            let _vec = $id::from_slice_aligned(s);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/slice/write_to_slice.rs b/vendor/packed_simd_2/src/api/slice/write_to_slice.rs
new file mode 100644
index 000000000..b634d98b9
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/slice/write_to_slice.rs
@@ -0,0 +1,213 @@
+//! Implements methods to write a vector type to a slice.
+
+macro_rules! impl_slice_write_to_slice {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Writes the values of the vector to the `slice`.
+            ///
+            /// # Panics
+            ///
+            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
+            /// aligned to an `align_of::<Self>()` boundary.
+            #[inline]
+            pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) {
+                unsafe {
+                    assert!(slice.len() >= $elem_count);
+                    let target_ptr =
+                        slice.get_unchecked_mut(0) as *mut $elem_ty;
+                    assert_eq!(
+                        target_ptr
+                            .align_offset(crate::mem::align_of::<Self>()),
+                        0
+                    );
+                    self.write_to_slice_aligned_unchecked(slice);
+                }
+            }
+
+            /// Writes the values of the vector to the `slice`.
+            ///
+            /// # Panics
+            ///
+            /// If `slice.len() < Self::lanes()`.
+            #[inline]
+            pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) {
+                unsafe {
+                    assert!(slice.len() >= $elem_count);
+                    self.write_to_slice_unaligned_unchecked(slice);
+                }
+            }
+
+            /// Writes the values of the vector to the `slice`.
+            ///
+            /// # Safety
+            ///
+            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
+            /// aligned to an `align_of::<Self>()` boundary, the behavior is
+            /// undefined.
+            #[inline]
+            pub unsafe fn write_to_slice_aligned_unchecked(
+                self, slice: &mut [$elem_ty],
+            ) {
+                debug_assert!(slice.len() >= $elem_count);
+                let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty;
+                debug_assert_eq!(
+                    target_ptr.align_offset(crate::mem::align_of::<Self>()),
+                    0
+                );
+
+                                #[allow(clippy::cast_ptr_alignment)]
+                        #[allow(clippy::cast_ptr_alignment)]
+                #[allow(clippy::cast_ptr_alignment)]
+                #[allow(clippy::cast_ptr_alignment)]
+                *(target_ptr as *mut Self) = self;
+            }
+
+            /// Writes the values of the vector to the `slice`.
+            ///
+            /// # Safety
+            ///
+            /// If `slice.len() < Self::lanes()` the behavior is undefined.
+            #[inline]
+            pub unsafe fn write_to_slice_unaligned_unchecked(
+                self, slice: &mut [$elem_ty],
+            ) {
+                debug_assert!(slice.len() >= $elem_count);
+                let target_ptr =
+                    slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8;
+                let self_ptr = &self as *const Self as *const u8;
+                crate::ptr::copy_nonoverlapping(
+                    self_ptr,
+                    target_ptr,
+                    crate::mem::size_of::<Self>(),
+                );
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                pub mod [<$id _slice_write_to_slice>] {
+                    use super::*;
+                    use crate::iter::Iterator;
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn write_to_slice_unaligned() {
+                        let mut unaligned = [0 as $elem_ty; $id::lanes() + 1];
+                        let vec = $id::splat(42 as $elem_ty);
+                        vec.write_to_slice_unaligned(&mut unaligned[1..]);
+                        for (index, &b) in unaligned.iter().enumerate() {
+                            if index == 0 {
+                                assert_eq!(b, 0 as $elem_ty);
+                            } else {
+                                assert_eq!(b, 42 as $elem_ty);
+                                assert_eq!(b, vec.extract(index - 1));
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn write_to_slice_unaligned_fail() {
+                        let mut unaligned = [0 as $elem_ty; $id::lanes() + 1];
+                        let vec = $id::splat(42 as $elem_ty);
+                        vec.write_to_slice_unaligned(&mut unaligned[2..]);
+                    }
+
+                    union A {
+                        data: [$elem_ty; 2 * $id::lanes()],
+                        _vec: $id,
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn write_to_slice_aligned() {
+                        let mut aligned = A {
+                            data: [0 as $elem_ty; 2 * $id::lanes()],
+                        };
+                        let vec = $id::splat(42 as $elem_ty);
+                        unsafe {
+                            vec.write_to_slice_aligned(
+                                &mut aligned.data[$id::lanes()..]
+                            );
+                            for (idx, &b) in aligned.data.iter().enumerate() {
+                                if idx < $id::lanes() {
+                                    assert_eq!(b, 0 as $elem_ty);
+                                } else {
+                                    assert_eq!(b, 42 as $elem_ty);
+                                    assert_eq!(
+                                        b, vec.extract(idx - $id::lanes())
+                                    );
+                                }
+                            }
+                        }
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn write_to_slice_aligned_fail_lanes() {
+                        let mut aligned = A {
+                            data: [0 as $elem_ty; 2 * $id::lanes()],
+                        };
+                        let vec = $id::splat(42 as $elem_ty);
+                        unsafe {
+                            vec.write_to_slice_aligned(
+                                &mut aligned.data[2 * $id::lanes()..]
+                            )
+                        };
+                    }
+
+                    // FIXME: wasm-bindgen-test does not support #[should_panic]
+                    // #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[cfg(not(target_arch = "wasm32"))]
+                    #[test]
+                    #[should_panic]
+                    fn write_to_slice_aligned_fail_align() {
+                        unsafe {
+                            let mut aligned = A {
+                                data: [0 as $elem_ty; 2 * $id::lanes()],
+                            };
+
+                            // get a pointer to the front of data
+                            let ptr: *mut $elem_ty
+                                = aligned.data.as_mut_ptr() as *mut $elem_ty;
+                            // offset pointer by one element
+                            let ptr = ptr.wrapping_add(1);
+
+                            if ptr.align_offset(crate::mem::align_of::<$id>())
+                                == 0 {
+                                // the pointer is properly aligned, so
+                                // write_to_slice_aligned won't fail here (e.g.
+                                // this can happen for i128x1). So we panic to
+                                // make the "should_fail" test pass:
+                                panic!("ok");
+                            }
+
+                            // create a slice - this is safe, because the
+                            // elements of the slice exist, are properly
+                            // initialized, and properly aligned:
+                            let s: &mut [$elem_ty]
+                                = slice::from_raw_parts_mut(ptr, $id::lanes());
+                            // this should always panic because the slice
+                            // alignment does not match the alignment
+                            // requirements for the vector type:
+                            let vec = $id::splat(42 as $elem_ty);
+                            vec.write_to_slice_aligned(s);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/api/swap_bytes.rs b/vendor/packed_simd_2/src/api/swap_bytes.rs
new file mode 100644
index 000000000..53bba25bd
--- /dev/null
+++ b/vendor/packed_simd_2/src/api/swap_bytes.rs
@@ -0,0 +1,192 @@
+//! Horizontal swap bytes
+
+macro_rules! impl_swap_bytes {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Reverses the byte order of the vector.
+            #[inline]
+            pub fn swap_bytes(self) -> Self {
+                super::codegen::swap_bytes::SwapBytes::swap_bytes(self)
+            }
+
+            /// Converts self to little endian from the target's endianness.
+            ///
+            /// On little endian this is a no-op. On big endian the bytes are
+            /// swapped.
+            #[inline]
+            pub fn to_le(self) -> Self {
+                #[cfg(target_endian = "little")]
+                {
+                    self
+                }
+                #[cfg(not(target_endian = "little"))]
+                {
+                    self.swap_bytes()
+                }
+            }
+
+            /// Converts self to big endian from the target's endianness.
+            ///
+            /// On big endian this is a no-op. On little endian the bytes are
+            /// swapped.
+            #[inline]
+            pub fn to_be(self) -> Self {
+                #[cfg(target_endian = "big")]
+                {
+                    self
+                }
+                #[cfg(not(target_endian = "big"))]
+                {
+                    self.swap_bytes()
+                }
+            }
+
+            /// Converts a vector from little endian to the target's endianness.
+            ///
+            /// On little endian this is a no-op. On big endian the bytes are
+            /// swapped.
+            #[inline]
+            pub fn from_le(x: Self) -> Self {
+                #[cfg(target_endian = "little")]
+                {
+                    x
+                }
+                #[cfg(not(target_endian = "little"))]
+                {
+                    x.swap_bytes()
+                }
+            }
+
+            /// Converts a vector from big endian to the target's endianness.
+            ///
+            /// On big endian this is a no-op. On little endian the bytes are
+            /// swapped.
+            #[inline]
+            pub fn from_be(x: Self) -> Self {
+                #[cfg(target_endian = "big")]
+                {
+                    x
+                }
+                #[cfg(not(target_endian = "big"))]
+                {
+                    x.swap_bytes()
+                }
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item_with_macros! {
+                pub mod [<$id _swap_bytes>] {
+                    use super::*;
+
+                    const BYTES: [u8; 64] = [
+                        0, 1, 2, 3, 4, 5, 6, 7,
+                        8, 9, 10, 11, 12, 13, 14, 15,
+                        16, 17, 18, 19, 20, 21, 22, 23,
+                        24, 25, 26, 27, 28, 29, 30, 31,
+                        32, 33, 34, 35, 36, 37, 38, 39,
+                        40, 41, 42, 43, 44, 45, 46, 47,
+                        48, 49, 50, 51, 52, 53, 54, 55,
+                        56, 57, 58, 59, 60, 61, 62, 63,
+                    ];
+
+                    macro_rules! swap {
+                        ($func: ident) => {{
+                            // catch possible future >512 vectors
+                            assert!(mem::size_of::<$id>() <= 64);
+
+                            let mut actual = BYTES;
+                            let elems: &mut [$elem_ty] = unsafe {
+                                slice::from_raw_parts_mut(
+                                    actual.as_mut_ptr() as *mut $elem_ty,
+                                    $id::lanes(),
+                                )
+                            };
+
+                            let vec = $id::from_slice_unaligned(elems);
+                            $id::$func(vec).write_to_slice_unaligned(elems);
+
+                            actual
+                        }};
+                    }
+
+                    macro_rules! test_swap {
+                        ($func: ident) => {{
+                            let actual = swap!($func);
+                            let expected =
+                                BYTES.iter().rev()
+                                .skip(64 - crate::mem::size_of::<$id>());
+                            assert!(actual.iter().zip(expected)
+                                    .all(|(x, y)| x == y));
+                        }};
+                    }
+
+                    macro_rules! test_no_swap {
+                        ($func: ident) => {{
+                            let actual = swap!($func);
+                            let expected = BYTES.iter()
+                                .take(mem::size_of::<$id>());
+
+                            assert!(actual.iter().zip(expected)
+                                    .all(|(x, y)| x == y));
+                        }};
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn swap_bytes() {
+                        test_swap!(swap_bytes);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn to_le() {
+                        #[cfg(target_endian = "little")]
+                        {
+                            test_no_swap!(to_le);
+                        }
+                        #[cfg(not(target_endian = "little"))]
+                        {
+                            test_swap!(to_le);
+                        }
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn to_be() {
+                        #[cfg(target_endian = "big")]
+                        {
+                            test_no_swap!(to_be);
+                        }
+                        #[cfg(not(target_endian = "big"))]
+                        {
+                            test_swap!(to_be);
+                        }
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn from_le() {
+                        #[cfg(target_endian = "little")]
+                        {
+                            test_no_swap!(from_le);
+                        }
+                        #[cfg(not(target_endian = "little"))]
+                        {
+                            test_swap!(from_le);
+                        }
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn from_be() {
+                        #[cfg(target_endian = "big")]
+                        {
+                            test_no_swap!(from_be);
+                        }
+                        #[cfg(not(target_endian = "big"))]
+                        {
+                            test_swap!(from_be);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen.rs b/vendor/packed_simd_2/src/codegen.rs
new file mode 100644
index 000000000..9d1517e20
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen.rs
@@ -0,0 +1,62 @@
+//! Code-generation utilities
+
+crate mod bit_manip;
+crate mod llvm;
+crate mod math;
+crate mod reductions;
+crate mod shuffle;
+crate mod shuffle1_dyn;
+crate mod swap_bytes;
+
+macro_rules! impl_simd_array {
+    ([$elem_ty:ident; $elem_count:expr]:
+     $tuple_id:ident | $($elem_tys:ident),*) => {
+        #[derive(Copy, Clone)]
+        #[repr(simd)]
+        pub struct $tuple_id($(crate $elem_tys),*);
+        //^^^^^^^ leaked through SimdArray
+
+        impl crate::sealed::Seal for [$elem_ty; $elem_count] {}
+
+        impl crate::sealed::SimdArray for [$elem_ty; $elem_count] {
+            type Tuple = $tuple_id;
+            type T = $elem_ty;
+            const N: usize = $elem_count;
+            type NT = [u32; $elem_count];
+        }
+
+        impl crate::sealed::Seal for $tuple_id {}
+        impl crate::sealed::Simd for $tuple_id {
+            type Element = $elem_ty;
+            const LANES: usize = $elem_count;
+            type LanesType = [u32; $elem_count];
+        }
+
+    }
+}
+
+crate mod pointer_sized_int;
+
+crate mod v16;
+crate use self::v16::*;
+
+crate mod v32;
+crate use self::v32::*;
+
+crate mod v64;
+crate use self::v64::*;
+
+crate mod v128;
+crate use self::v128::*;
+
+crate mod v256;
+crate use self::v256::*;
+
+crate mod v512;
+crate use self::v512::*;
+
+crate mod vSize;
+crate use self::vSize::*;
+
+crate mod vPtr;
+crate use self::vPtr::*;
diff --git a/vendor/packed_simd_2/src/codegen/bit_manip.rs b/vendor/packed_simd_2/src/codegen/bit_manip.rs
new file mode 100644
index 000000000..83c7d1987
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/bit_manip.rs
@@ -0,0 +1,354 @@
+//! LLVM bit manipulation intrinsics.
+#[rustfmt::skip]
+
+use crate::*;
+
+#[allow(improper_ctypes, dead_code)]
+extern "C" {
+    #[link_name = "llvm.ctlz.v2i8"]
+    fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
+    #[link_name = "llvm.ctlz.v4i8"]
+    fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
+    #[link_name = "llvm.ctlz.v8i8"]
+    fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
+    #[link_name = "llvm.ctlz.v16i8"]
+    fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
+    #[link_name = "llvm.ctlz.v32i8"]
+    fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
+    #[link_name = "llvm.ctlz.v64i8"]
+    fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
+
+    #[link_name = "llvm.ctlz.v2i16"]
+    fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
+    #[link_name = "llvm.ctlz.v4i16"]
+    fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
+    #[link_name = "llvm.ctlz.v8i16"]
+    fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
+    #[link_name = "llvm.ctlz.v16i16"]
+    fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
+    #[link_name = "llvm.ctlz.v32i16"]
+    fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
+
+    #[link_name = "llvm.ctlz.v2i32"]
+    fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
+    #[link_name = "llvm.ctlz.v4i32"]
+    fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
+    #[link_name = "llvm.ctlz.v8i32"]
+    fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
+    #[link_name = "llvm.ctlz.v16i32"]
+    fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
+
+    #[link_name = "llvm.ctlz.v2i64"]
+    fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
+    #[link_name = "llvm.ctlz.v4i64"]
+    fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
+    #[link_name = "llvm.ctlz.v8i64"]
+    fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
+
+    #[link_name = "llvm.ctlz.v1i128"]
+    fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
+    #[link_name = "llvm.ctlz.v2i128"]
+    fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
+    #[link_name = "llvm.ctlz.v4i128"]
+    fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
+
+    #[link_name = "llvm.cttz.v2i8"]
+    fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
+    #[link_name = "llvm.cttz.v4i8"]
+    fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
+    #[link_name = "llvm.cttz.v8i8"]
+    fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
+    #[link_name = "llvm.cttz.v16i8"]
+    fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
+    #[link_name = "llvm.cttz.v32i8"]
+    fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
+    #[link_name = "llvm.cttz.v64i8"]
+    fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
+
+    #[link_name = "llvm.cttz.v2i16"]
+    fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
+    #[link_name = "llvm.cttz.v4i16"]
+    fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
+    #[link_name = "llvm.cttz.v8i16"]
+    fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
+    #[link_name = "llvm.cttz.v16i16"]
+    fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
+    #[link_name = "llvm.cttz.v32i16"]
+    fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
+
+    #[link_name = "llvm.cttz.v2i32"]
+    fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
+    #[link_name = "llvm.cttz.v4i32"]
+    fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
+    #[link_name = "llvm.cttz.v8i32"]
+    fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
+    #[link_name = "llvm.cttz.v16i32"]
+    fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
+
+    #[link_name = "llvm.cttz.v2i64"]
+    fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
+    #[link_name = "llvm.cttz.v4i64"]
+    fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
+    #[link_name = "llvm.cttz.v8i64"]
+    fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
+
+    #[link_name = "llvm.cttz.v1i128"]
+    fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
+    #[link_name = "llvm.cttz.v2i128"]
+    fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
+    #[link_name = "llvm.cttz.v4i128"]
+    fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
+
+    #[link_name = "llvm.ctpop.v2i8"]
+    fn ctpop_u8x2(x: u8x2) -> u8x2;
+    #[link_name = "llvm.ctpop.v4i8"]
+    fn ctpop_u8x4(x: u8x4) -> u8x4;
+    #[link_name = "llvm.ctpop.v8i8"]
+    fn ctpop_u8x8(x: u8x8) -> u8x8;
+    #[link_name = "llvm.ctpop.v16i8"]
+    fn ctpop_u8x16(x: u8x16) -> u8x16;
+    #[link_name = "llvm.ctpop.v32i8"]
+    fn ctpop_u8x32(x: u8x32) -> u8x32;
+    #[link_name = "llvm.ctpop.v64i8"]
+    fn ctpop_u8x64(x: u8x64) -> u8x64;
+
+    #[link_name = "llvm.ctpop.v2i16"]
+    fn ctpop_u16x2(x: u16x2) -> u16x2;
+    #[link_name = "llvm.ctpop.v4i16"]
+    fn ctpop_u16x4(x: u16x4) -> u16x4;
+    #[link_name = "llvm.ctpop.v8i16"]
+    fn ctpop_u16x8(x: u16x8) -> u16x8;
+    #[link_name = "llvm.ctpop.v16i16"]
+    fn ctpop_u16x16(x: u16x16) -> u16x16;
+    #[link_name = "llvm.ctpop.v32i16"]
+    fn ctpop_u16x32(x: u16x32) -> u16x32;
+
+    #[link_name = "llvm.ctpop.v2i32"]
+    fn ctpop_u32x2(x: u32x2) -> u32x2;
+    #[link_name = "llvm.ctpop.v4i32"]
+    fn ctpop_u32x4(x: u32x4) -> u32x4;
+    #[link_name = "llvm.ctpop.v8i32"]
+    fn ctpop_u32x8(x: u32x8) -> u32x8;
+    #[link_name = "llvm.ctpop.v16i32"]
+    fn ctpop_u32x16(x: u32x16) -> u32x16;
+
+    #[link_name = "llvm.ctpop.v2i64"]
+    fn ctpop_u64x2(x: u64x2) -> u64x2;
+    #[link_name = "llvm.ctpop.v4i64"]
+    fn ctpop_u64x4(x: u64x4) -> u64x4;
+    #[link_name = "llvm.ctpop.v8i64"]
+    fn ctpop_u64x8(x: u64x8) -> u64x8;
+
+    #[link_name = "llvm.ctpop.v1i128"]
+    fn ctpop_u128x1(x: u128x1) -> u128x1;
+    #[link_name = "llvm.ctpop.v2i128"]
+    fn ctpop_u128x2(x: u128x2) -> u128x2;
+    #[link_name = "llvm.ctpop.v4i128"]
+    fn ctpop_u128x4(x: u128x4) -> u128x4;
+}
+
+crate trait BitManip {
+    fn ctpop(self) -> Self;
+    fn ctlz(self) -> Self;
+    fn cttz(self) -> Self;
+}
+
+macro_rules! impl_bit_manip {
+    (inner: $ty:ident, $scalar:ty, $uty:ident,
+     $ctpop:ident, $ctlz:ident, $cttz:ident) => {
+        // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192
+        #[cfg(target_arch = "s390x")]
+        impl_bit_manip! { scalar: $ty, $scalar }
+        #[cfg(not(target_arch = "s390x"))]
+        impl BitManip for $ty {
+            #[inline]
+            fn ctpop(self) -> Self {
+                let y: $uty = self.cast();
+                unsafe { $ctpop(y).cast() }
+            }
+
+            #[inline]
+            fn ctlz(self) -> Self {
+                let y: $uty = self.cast();
+                // the ctxx intrinsics need compile-time constant
+                // `is_zero_undef`
+                unsafe { $ctlz(y, false).cast() }
+            }
+
+            #[inline]
+            fn cttz(self) -> Self {
+                let y: $uty = self.cast();
+                unsafe { $cttz(y, false).cast() }
+            }
+        }
+    };
+    (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => {
+        #[cfg(target_arch = "s390x")]
+        impl_bit_manip! { scalar: $ty, $scalar }
+        #[cfg(not(target_arch = "s390x"))]
+        impl BitManip for $ty {
+            #[inline]
+            fn ctpop(self) -> Self {
+                let y: $uty = self.cast();
+                $uty::ctpop(y).cast()
+            }
+
+            #[inline]
+            fn ctlz(self) -> Self {
+                let y: $uty = self.cast();
+                $uty::ctlz(y).cast()
+            }
+
+            #[inline]
+            fn cttz(self) -> Self {
+                let y: $uty = self.cast();
+                $uty::cttz(y).cast()
+            }
+        }
+    };
+    (scalar: $ty:ident, $scalar:ty) => {
+        impl BitManip for $ty {
+            #[inline]
+            fn ctpop(self) -> Self {
+                let mut ones = self;
+                for i in 0..Self::lanes() {
+                    ones = ones
+                        .replace(i, self.extract(i).count_ones() as $scalar);
+                }
+                ones
+            }
+
+            #[inline]
+            fn ctlz(self) -> Self {
+                let mut lz = self;
+                for i in 0..Self::lanes() {
+                    lz = lz.replace(
+                        i,
+                        self.extract(i).leading_zeros() as $scalar,
+                    );
+                }
+                lz
+            }
+
+            #[inline]
+            fn cttz(self) -> Self {
+                let mut tz = self;
+                for i in 0..Self::lanes() {
+                    tz = tz.replace(
+                        i,
+                        self.extract(i).trailing_zeros() as $scalar,
+                    );
+                }
+                tz
+            }
+        }
+    };
+    ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty,
+     $ctpop:ident, $ctlz:ident, $cttz:ident) => {
+        impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz }
+        impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz }
+    };
+    (sized: $usize:ident, $uscalar:ty, $isize:ident,
+     $iscalar:ty, $ty:ident) => {
+        impl_bit_manip! { sized_inner: $usize, $uscalar, $ty }
+        impl_bit_manip! { sized_inner: $isize, $iscalar, $ty }
+    };
+}
+
+impl_bit_manip! { u8x2   ,   u8, i8x2, i8,   ctpop_u8x2,   ctlz_u8x2,   cttz_u8x2   }
+impl_bit_manip! { u8x4   ,   u8, i8x4, i8,   ctpop_u8x4,   ctlz_u8x4,   cttz_u8x4   }
+#[cfg(not(target_arch = "aarch64"))] // see below
+impl_bit_manip! { u8x8   ,   u8, i8x8, i8,   ctpop_u8x8,   ctlz_u8x8,   cttz_u8x8   }
+impl_bit_manip! { u8x16  ,  u8, i8x16, i8,  ctpop_u8x16,  ctlz_u8x16,  cttz_u8x16  }
+impl_bit_manip! { u8x32  ,  u8, i8x32, i8,  ctpop_u8x32,  ctlz_u8x32,  cttz_u8x32  }
+impl_bit_manip! { u8x64  ,  u8, i8x64, i8,  ctpop_u8x64,  ctlz_u8x64,  cttz_u8x64  }
+impl_bit_manip! { u16x2  ,  u16, i16x2, i16,  ctpop_u16x2,  ctlz_u16x2,  cttz_u16x2  }
+impl_bit_manip! { u16x4  ,  u16, i16x4, i16,  ctpop_u16x4,  ctlz_u16x4,  cttz_u16x4  }
+impl_bit_manip! { u16x8  ,  u16, i16x8, i16,  ctpop_u16x8,  ctlz_u16x8,  cttz_u16x8  }
+impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 }
+impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 }
+impl_bit_manip! { u32x2  ,  u32, i32x2, i32,  ctpop_u32x2,  ctlz_u32x2,  cttz_u32x2  }
+impl_bit_manip! { u32x4  ,  u32, i32x4, i32,  ctpop_u32x4,  ctlz_u32x4,  cttz_u32x4  }
+impl_bit_manip! { u32x8  ,  u32, i32x8, i32,  ctpop_u32x8,  ctlz_u32x8,  cttz_u32x8  }
+impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 }
+impl_bit_manip! { u64x2  ,  u64, i64x2, i64,  ctpop_u64x2,  ctlz_u64x2,  cttz_u64x2  }
+impl_bit_manip! { u64x4  ,  u64, i64x4, i64,  ctpop_u64x4,  ctlz_u64x4,  cttz_u64x4  }
+impl_bit_manip! { u64x8  ,  u64, i64x8, i64,  ctpop_u64x8,  ctlz_u64x8,  cttz_u64x8  }
+impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 }
+impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 }
+impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 }
+
+#[cfg(target_arch = "aarch64")]
+impl BitManip for u8x8 {
+    #[inline]
+    fn ctpop(self) -> Self {
+        let y: u8x8 = self.cast();
+        unsafe { ctpop_u8x8(y).cast() }
+    }
+
+    #[inline]
+    fn ctlz(self) -> Self {
+        let y: u8x8 = self.cast();
+        unsafe { ctlz_u8x8(y, false).cast() }
+    }
+
+    #[inline]
+    fn cttz(self) -> Self {
+        // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
+        // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
+        // intrinsics
+        let mut tz = self;
+        for i in 0..Self::lanes() {
+            tz = tz.replace(i, self.extract(i).trailing_zeros() as u8);
+        }
+        tz
+    }
+}
+#[cfg(target_arch = "aarch64")]
+impl BitManip for i8x8 {
+    #[inline]
+    fn ctpop(self) -> Self {
+        let y: u8x8 = self.cast();
+        unsafe { ctpop_u8x8(y).cast() }
+    }
+
+    #[inline]
+    fn ctlz(self) -> Self {
+        let y: u8x8 = self.cast();
+        unsafe { ctlz_u8x8(y, false).cast() }
+    }
+
+    #[inline]
+    fn cttz(self) -> Self {
+        // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
+        // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
+        // intrinsics
+        let mut tz = self;
+        for i in 0..Self::lanes() {
+            tz = tz.replace(i, self.extract(i).trailing_zeros() as i8);
+        }
+        tz
+    }
+}
+
+cfg_if! {
+    if #[cfg(target_pointer_width = "8")] {
+        impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 }
+        impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 }
+        impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 }
+    } else if #[cfg(target_pointer_width = "16")] {
+        impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 }
+        impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 }
+        impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 }
+    } else if #[cfg(target_pointer_width = "32")] {
+        impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 }
+        impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 }
+        impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 }
+    } else if #[cfg(target_pointer_width = "64")] {
+        impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 }
+        impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 }
+        impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 }
+    } else {
+        compile_error!("unsupported target_pointer_width");
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/llvm.rs b/vendor/packed_simd_2/src/codegen/llvm.rs
new file mode 100644
index 000000000..93c6ce6b7
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/llvm.rs
@@ -0,0 +1,107 @@
+//! LLVM's platform intrinsics
+#![allow(dead_code)]
+
+use crate::sealed::Shuffle;
+#[allow(unused_imports)] // FIXME: spurious warning?
+use crate::sealed::Simd;
+
+// Shuffle intrinsics: expanded in users' crates, therefore public.
+extern "platform-intrinsic" {
+    // FIXME: Passing this intrinsics an `idx` array with an index that is
+    // out-of-bounds will produce a monomorphization-time error.
+    // https://github.com/rust-lang-nursery/packed_simd/issues/21
+    #[rustc_args_required_const(2)]
+    pub fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U
+    where
+        T: Simd,
+        <T as Simd>::Element: Shuffle<[u32; 2], Output = U>;
+
+    #[rustc_args_required_const(2)]
+    pub fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U
+    where
+        T: Simd,
+        <T as Simd>::Element: Shuffle<[u32; 4], Output = U>;
+
+    #[rustc_args_required_const(2)]
+    pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U
+    where
+        T: Simd,
+        <T as Simd>::Element: Shuffle<[u32; 8], Output = U>;
+
+    #[rustc_args_required_const(2)]
+    pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U
+    where
+        T: Simd,
+        <T as Simd>::Element: Shuffle<[u32; 16], Output = U>;
+
+    #[rustc_args_required_const(2)]
+    pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U
+    where
+        T: Simd,
+        <T as Simd>::Element: Shuffle<[u32; 32], Output = U>;
+
+    #[rustc_args_required_const(2)]
+    pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U
+    where
+        T: Simd,
+        <T as Simd>::Element: Shuffle<[u32; 64], Output = U>;
+}
+
+pub use self::simd_shuffle16 as __shuffle_vector16;
+pub use self::simd_shuffle2 as __shuffle_vector2;
+pub use self::simd_shuffle32 as __shuffle_vector32;
+pub use self::simd_shuffle4 as __shuffle_vector4;
+pub use self::simd_shuffle64 as __shuffle_vector64;
+pub use self::simd_shuffle8 as __shuffle_vector8;
+
+extern "platform-intrinsic" {
+    crate fn simd_eq<T, U>(x: T, y: T) -> U;
+    crate fn simd_ne<T, U>(x: T, y: T) -> U;
+    crate fn simd_lt<T, U>(x: T, y: T) -> U;
+    crate fn simd_le<T, U>(x: T, y: T) -> U;
+    crate fn simd_gt<T, U>(x: T, y: T) -> U;
+    crate fn simd_ge<T, U>(x: T, y: T) -> U;
+
+    crate fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
+    crate fn simd_extract<T, U>(x: T, idx: u32) -> U;
+
+    crate fn simd_cast<T, U>(x: T) -> U;
+
+    crate fn simd_add<T>(x: T, y: T) -> T;
+    crate fn simd_sub<T>(x: T, y: T) -> T;
+    crate fn simd_mul<T>(x: T, y: T) -> T;
+    crate fn simd_div<T>(x: T, y: T) -> T;
+    crate fn simd_rem<T>(x: T, y: T) -> T;
+    crate fn simd_shl<T>(x: T, y: T) -> T;
+    crate fn simd_shr<T>(x: T, y: T) -> T;
+    crate fn simd_and<T>(x: T, y: T) -> T;
+    crate fn simd_or<T>(x: T, y: T) -> T;
+    crate fn simd_xor<T>(x: T, y: T) -> T;
+
+    crate fn simd_reduce_add_unordered<T, U>(x: T) -> U;
+    crate fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
+    crate fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
+    crate fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
+    crate fn simd_reduce_min<T, U>(x: T) -> U;
+    crate fn simd_reduce_max<T, U>(x: T) -> U;
+    crate fn simd_reduce_min_nanless<T, U>(x: T) -> U;
+    crate fn simd_reduce_max_nanless<T, U>(x: T) -> U;
+    crate fn simd_reduce_and<T, U>(x: T) -> U;
+    crate fn simd_reduce_or<T, U>(x: T) -> U;
+    crate fn simd_reduce_xor<T, U>(x: T) -> U;
+    crate fn simd_reduce_all<T>(x: T) -> bool;
+    crate fn simd_reduce_any<T>(x: T) -> bool;
+
+    crate fn simd_select<M, T>(m: M, a: T, b: T) -> T;
+
+    crate fn simd_fmin<T>(a: T, b: T) -> T;
+    crate fn simd_fmax<T>(a: T, b: T) -> T;
+
+    crate fn simd_fsqrt<T>(a: T) -> T;
+    crate fn simd_fma<T>(a: T, b: T, c: T) -> T;
+
+    crate fn simd_gather<T, P, M>(value: T, pointers: P, mask: M) -> T;
+    crate fn simd_scatter<T, P, M>(value: T, pointers: P, mask: M);
+
+    crate fn simd_bitmask<T, U>(value: T) -> U;
+}
diff --git a/vendor/packed_simd_2/src/codegen/math.rs b/vendor/packed_simd_2/src/codegen/math.rs
new file mode 100644
index 000000000..f3997c7f1
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math.rs
@@ -0,0 +1,3 @@
+//! Vertical math operations
+
+crate mod float;
diff --git a/vendor/packed_simd_2/src/codegen/math/float.rs b/vendor/packed_simd_2/src/codegen/math/float.rs
new file mode 100644
index 000000000..3743b4990
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float.rs
@@ -0,0 +1,19 @@
+//! Vertical floating-point math operations.
+#![allow(clippy::useless_transmute)]
+
+#[macro_use]
+crate mod macros;
+crate mod abs;
+crate mod cos;
+crate mod cos_pi;
+crate mod exp;
+crate mod ln;
+crate mod mul_add;
+crate mod mul_adde;
+crate mod powf;
+crate mod sin;
+crate mod sin_cos_pi;
+crate mod sin_pi;
+crate mod sqrt;
+crate mod sqrte;
+crate mod tanh;
diff --git a/vendor/packed_simd_2/src/codegen/math/float/abs.rs b/vendor/packed_simd_2/src/codegen/math/float/abs.rs
new file mode 100644
index 000000000..bc4421f61
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/abs.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `fabs`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors fabs
+
+use crate::*;
+
+crate trait Abs {
+    fn abs(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.fabs.v2f32"]
+    fn fabs_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.fabs.v4f32"]
+    fn fabs_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.fabs.v8f32"]
+    fn fabs_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.fabs.v16f32"]
+    fn fabs_v16f32(x: f32x16) -> f32x16;
+    /* FIXME 64-bit fabsgle elem vectors
+    #[link_name = "llvm.fabs.v1f64"]
+    fn fabs_v1f64(x: f64x1) -> f64x1;
+     */
+    #[link_name = "llvm.fabs.v2f64"]
+    fn fabs_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.fabs.v4f64"]
+    fn fabs_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.fabs.v8f64"]
+    fn fabs_v8f64(x: f64x8) -> f64x8;
+
+    #[link_name = "llvm.fabs.f32"]
+    fn fabs_f32(x: f32) -> f32;
+    #[link_name = "llvm.fabs.f64"]
+    fn fabs_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Abs, abs);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        impl_unary!(f32x2[f32; 2]: fabs_f32);
+        impl_unary!(f32x4[f32; 4]: fabs_f32);
+        impl_unary!(f32x8[f32; 8]: fabs_f32);
+        impl_unary!(f32x16[f32; 16]: fabs_f32);
+
+        impl_unary!(f64x2[f64; 2]: fabs_f64);
+        impl_unary!(f64x4[f64; 4]: fabs_f64);
+        impl_unary!(f64x8[f64; 8]: fabs_f64);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2);
+
+                impl_unary!(f32x4: Sleef_fabsf4_avx2128);
+                impl_unary!(f32x8: Sleef_fabsf8_avx2);
+                impl_unary!(f64x2: Sleef_fabsd2_avx2128);
+                impl_unary!(f64x4: Sleef_fabsd4_avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx);
+
+                impl_unary!(f32x4: Sleef_fabsf4_sse4);
+                impl_unary!(f32x8: Sleef_fabsf8_avx);
+                impl_unary!(f64x2: Sleef_fabsd2_sse4);
+                impl_unary!(f64x4: Sleef_fabsd4_avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4);
+
+                impl_unary!(f32x4: Sleef_fabsf4_sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4);
+                impl_unary!(f64x2: Sleef_fabsd2_sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4);
+            } else {
+                impl_unary!(f32x2[f32; 2]: fabs_f32);
+                impl_unary!(f32x16: fabs_v16f32);
+                impl_unary!(f64x8: fabs_v8f64);
+
+                impl_unary!(f32x4: fabs_v4f32);
+                impl_unary!(f32x8: fabs_v8f32);
+                impl_unary!(f64x2: fabs_v2f64);
+                impl_unary!(f64x4: fabs_v4f64);
+            }
+        }
+    } else {
+        impl_unary!(f32x2[f32; 2]: fabs_f32);
+        impl_unary!(f32x4: fabs_v4f32);
+        impl_unary!(f32x8: fabs_v8f32);
+        impl_unary!(f32x16: fabs_v16f32);
+
+        impl_unary!(f64x2: fabs_v2f64);
+        impl_unary!(f64x4: fabs_v4f64);
+        impl_unary!(f64x8: fabs_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/cos.rs b/vendor/packed_simd_2/src/codegen/math/float/cos.rs
new file mode 100644
index 000000000..50f6c16da
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/cos.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vector cos
+
+use crate::*;
+
+crate trait Cos {
+    fn cos(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.cos.v2f32"]
+    fn cos_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.cos.v4f32"]
+    fn cos_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.cos.v8f32"]
+    fn cos_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.cos.v16f32"]
+    fn cos_v16f32(x: f32x16) -> f32x16;
+    /* FIXME 64-bit cosgle elem vectors
+    #[link_name = "llvm.cos.v1f64"]
+    fn cos_v1f64(x: f64x1) -> f64x1;
+     */
+    #[link_name = "llvm.cos.v2f64"]
+    fn cos_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.cos.v4f64"]
+    fn cos_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.cos.v8f64"]
+    fn cos_v8f64(x: f64x8) -> f64x8;
+
+    #[link_name = "llvm.cos.f32"]
+    fn cos_f32(x: f32) -> f32;
+    #[link_name = "llvm.cos.f64"]
+    fn cos_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Cos, cos);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        impl_unary!(f32x2[f32; 2]: cos_f32);
+        impl_unary!(f32x4[f32; 4]: cos_f32);
+        impl_unary!(f32x8[f32; 8]: cos_f32);
+        impl_unary!(f32x16[f32; 16]: cos_f32);
+
+        impl_unary!(f64x2[f64; 2]: cos_f64);
+        impl_unary!(f64x4[f64; 4]: cos_f64);
+        impl_unary!(f64x8[f64; 8]: cos_f64);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2);
+
+                impl_unary!(f32x4: Sleef_cosf4_u10avx2128);
+                impl_unary!(f32x8: Sleef_cosf8_u10avx2);
+                impl_unary!(f64x2: Sleef_cosd2_u10avx2128);
+                impl_unary!(f64x4: Sleef_cosd4_u10avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx);
+
+                impl_unary!(f32x4: Sleef_cosf4_u10sse4);
+                impl_unary!(f32x8: Sleef_cosf8_u10avx);
+                impl_unary!(f64x2: Sleef_cosd2_u10sse4);
+                impl_unary!(f64x4: Sleef_cosd4_u10avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4);
+
+                impl_unary!(f32x4: Sleef_cosf4_u10sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4);
+                impl_unary!(f64x2: Sleef_cosd2_u10sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4);
+            } else {
+                impl_unary!(f32x2[f32; 2]: cos_f32);
+                impl_unary!(f32x16: cos_v16f32);
+                impl_unary!(f64x8: cos_v8f64);
+
+                impl_unary!(f32x4: cos_v4f32);
+                impl_unary!(f32x8: cos_v8f32);
+                impl_unary!(f64x2: cos_v2f64);
+                impl_unary!(f64x4: cos_v4f64);
+            }
+        }
+    } else {
+        impl_unary!(f32x2[f32; 2]: cos_f32);
+        impl_unary!(f32x4: cos_v4f32);
+        impl_unary!(f32x8: cos_v8f32);
+        impl_unary!(f32x16: cos_v16f32);
+
+        impl_unary!(f64x2: cos_v2f64);
+        impl_unary!(f64x4: cos_v4f64);
+        impl_unary!(f64x8: cos_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/cos_pi.rs b/vendor/packed_simd_2/src/codegen/math/float/cos_pi.rs
new file mode 100644
index 000000000..ebff5fd1c
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/cos_pi.rs
@@ -0,0 +1,87 @@
+//! Vertical floating-point `cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors cos_pi
+
+use crate::*;
+
+crate trait CosPi {
+    fn cos_pi(self) -> Self;
+}
+
+gen_unary_impl_table!(CosPi, cos_pi);
+
+macro_rules! impl_def {
+    ($vid:ident, $PI:path) => {
+        impl CosPi for $vid {
+            #[inline]
+            fn cos_pi(self) -> Self {
+                (self * Self::splat($PI)).cos()
+            }
+        }
+    };
+}
+macro_rules! impl_def32 {
+    ($vid:ident) => {
+        impl_def!($vid, crate::f32::consts::PI);
+    };
+}
+macro_rules! impl_def64 {
+    ($vid:ident) => {
+        impl_def!($vid, crate::f64::consts::PI);
+    };
+}
+
+cfg_if! {
+    if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2);
+
+                impl_unary!(f32x4: Sleef_cospif4_u05avx2128);
+                impl_unary!(f32x8: Sleef_cospif8_u05avx2);
+                impl_unary!(f64x2: Sleef_cospid2_u05avx2128);
+                impl_unary!(f64x4: Sleef_cospid4_u05avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx);
+
+                impl_unary!(f32x4: Sleef_cospif4_u05sse4);
+                impl_unary!(f32x8: Sleef_cospif8_u05avx);
+                impl_unary!(f64x2: Sleef_cospid2_u05sse4);
+                impl_unary!(f64x4: Sleef_cospid4_u05avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4);
+
+                impl_unary!(f32x4: Sleef_cospif4_u05sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4);
+                impl_unary!(f64x2: Sleef_cospid2_u05sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4);
+            } else {
+                impl_def32!(f32x2);
+                impl_def32!(f32x4);
+                impl_def32!(f32x8);
+                impl_def32!(f32x16);
+
+                impl_def64!(f64x2);
+                impl_def64!(f64x4);
+                impl_def64!(f64x8);
+            }
+        }
+    } else {
+        impl_def32!(f32x2);
+        impl_def32!(f32x4);
+        impl_def32!(f32x8);
+        impl_def32!(f32x16);
+
+        impl_def64!(f64x2);
+        impl_def64!(f64x4);
+        impl_def64!(f64x8);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/exp.rs b/vendor/packed_simd_2/src/codegen/math/float/exp.rs
new file mode 100644
index 000000000..00d10e9fa
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/exp.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `exp`
+#![allow(unused)]
+
+// FIXME 64-bit expgle elem vectors misexpg
+
+use crate::*;
+
+crate trait Exp {
+    fn exp(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.exp.v2f32"]
+    fn exp_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.exp.v4f32"]
+    fn exp_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.exp.v8f32"]
+    fn exp_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.exp.v16f32"]
+    fn exp_v16f32(x: f32x16) -> f32x16;
+    /* FIXME 64-bit expgle elem vectors
+    #[link_name = "llvm.exp.v1f64"]
+    fn exp_v1f64(x: f64x1) -> f64x1;
+     */
+    #[link_name = "llvm.exp.v2f64"]
+    fn exp_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.exp.v4f64"]
+    fn exp_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.exp.v8f64"]
+    fn exp_v8f64(x: f64x8) -> f64x8;
+
+    #[link_name = "llvm.exp.f32"]
+    fn exp_f32(x: f32) -> f32;
+    #[link_name = "llvm.exp.f64"]
+    fn exp_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Exp, exp);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        impl_unary!(f32x2[f32; 2]: exp_f32);
+        impl_unary!(f32x4[f32; 4]: exp_f32);
+        impl_unary!(f32x8[f32; 8]: exp_f32);
+        impl_unary!(f32x16[f32; 16]: exp_f32);
+
+        impl_unary!(f64x2[f64; 2]: exp_f64);
+        impl_unary!(f64x4[f64; 4]: exp_f64);
+        impl_unary!(f64x8[f64; 8]: exp_f64);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2);
+
+                impl_unary!(f32x4: Sleef_expf4_u10avx2128);
+                impl_unary!(f32x8: Sleef_expf8_u10avx2);
+                impl_unary!(f64x2: Sleef_expd2_u10avx2128);
+                impl_unary!(f64x4: Sleef_expd4_u10avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx);
+
+                impl_unary!(f32x4: Sleef_expf4_u10sse4);
+                impl_unary!(f32x8: Sleef_expf8_u10avx);
+                impl_unary!(f64x2: Sleef_expd2_u10sse4);
+                impl_unary!(f64x4: Sleef_expd4_u10avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4);
+
+                impl_unary!(f32x4: Sleef_expf4_u10sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4);
+                impl_unary!(f64x2: Sleef_expd2_u10sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4);
+            } else if #[cfg(target_feature = "sse2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2);
+                impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2);
+                impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2);
+
+                impl_unary!(f32x4: Sleef_expf4_u10sse2);
+                impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2);
+                impl_unary!(f64x2: Sleef_expd2_u10sse2);
+                impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2);
+            } else {
+                impl_unary!(f32x2[f32; 2]: exp_f32);
+                impl_unary!(f32x16: exp_v16f32);
+                impl_unary!(f64x8: exp_v8f64);
+
+                impl_unary!(f32x4: exp_v4f32);
+                impl_unary!(f32x8: exp_v8f32);
+                impl_unary!(f64x2: exp_v2f64);
+                impl_unary!(f64x4: exp_v4f64);
+            }
+        }
+    } else {
+        impl_unary!(f32x2[f32; 2]: exp_f32);
+        impl_unary!(f32x4: exp_v4f32);
+        impl_unary!(f32x8: exp_v8f32);
+        impl_unary!(f32x16: exp_v16f32);
+
+        impl_unary!(f64x2: exp_v2f64);
+        impl_unary!(f64x4: exp_v4f64);
+        impl_unary!(f64x8: exp_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/ln.rs b/vendor/packed_simd_2/src/codegen/math/float/ln.rs
new file mode 100644
index 000000000..88a5a6c6c
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/ln.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `ln`
+#![allow(unused)]
+
+// FIXME 64-bit lngle elem vectors mislng
+
+use crate::*;
+
+crate trait Ln {
+    fn ln(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.log.v2f32"]
+    fn ln_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.log.v4f32"]
+    fn ln_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.log.v8f32"]
+    fn ln_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.log.v16f32"]
+    fn ln_v16f32(x: f32x16) -> f32x16;
+    /* FIXME 64-bit lngle elem vectors
+    #[link_name = "llvm.log.v1f64"]
+    fn ln_v1f64(x: f64x1) -> f64x1;
+     */
+    #[link_name = "llvm.log.v2f64"]
+    fn ln_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.log.v4f64"]
+    fn ln_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.log.v8f64"]
+    fn ln_v8f64(x: f64x8) -> f64x8;
+
+    #[link_name = "llvm.log.f32"]
+    fn ln_f32(x: f32) -> f32;
+    #[link_name = "llvm.log.f64"]
+    fn ln_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Ln, ln);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        impl_unary!(f32x2[f32; 2]: ln_f32);
+        impl_unary!(f32x4[f32; 4]: ln_f32);
+        impl_unary!(f32x8[f32; 8]: ln_f32);
+        impl_unary!(f32x16[f32; 16]: ln_f32);
+
+        impl_unary!(f64x2[f64; 2]: ln_f64);
+        impl_unary!(f64x4[f64; 4]: ln_f64);
+        impl_unary!(f64x8[f64; 8]: ln_f64);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2);
+
+                impl_unary!(f32x4: Sleef_logf4_u10avx2128);
+                impl_unary!(f32x8: Sleef_logf8_u10avx2);
+                impl_unary!(f64x2: Sleef_logd2_u10avx2128);
+                impl_unary!(f64x4: Sleef_logd4_u10avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx);
+
+                impl_unary!(f32x4: Sleef_logf4_u10sse4);
+                impl_unary!(f32x8: Sleef_logf8_u10avx);
+                impl_unary!(f64x2: Sleef_logd2_u10sse4);
+                impl_unary!(f64x4: Sleef_logd4_u10avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4);
+
+                impl_unary!(f32x4: Sleef_logf4_u10sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4);
+                impl_unary!(f64x2: Sleef_logd2_u10sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4);
+            } else if #[cfg(target_feature = "sse2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2);
+                impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2);
+                impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2);
+
+                impl_unary!(f32x4: Sleef_logf4_u10sse2);
+                impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2);
+                impl_unary!(f64x2: Sleef_logd2_u10sse2);
+                impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2);
+            } else {
+                impl_unary!(f32x2[f32; 2]: ln_f32);
+                impl_unary!(f32x16: ln_v16f32);
+                impl_unary!(f64x8: ln_v8f64);
+
+                impl_unary!(f32x4: ln_v4f32);
+                impl_unary!(f32x8: ln_v8f32);
+                impl_unary!(f64x2: ln_v2f64);
+                impl_unary!(f64x4: ln_v4f64);
+            }
+        }
+    } else {
+        impl_unary!(f32x2[f32; 2]: ln_f32);
+        impl_unary!(f32x4: ln_v4f32);
+        impl_unary!(f32x8: ln_v8f32);
+        impl_unary!(f32x16: ln_v16f32);
+
+        impl_unary!(f64x2: ln_v2f64);
+        impl_unary!(f64x4: ln_v4f64);
+        impl_unary!(f64x8: ln_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/macros.rs b/vendor/packed_simd_2/src/codegen/math/float/macros.rs
new file mode 100644
index 000000000..02d0ca3f5
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/macros.rs
@@ -0,0 +1,559 @@
+//! Utility macros
+#![allow(unused)]
+
+
+macro_rules! impl_unary_ {
+    // implementation mapping 1:1
+    (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    transmute($fun(transmute(self)))
+                }
+            }
+        }
+    };
+    // implementation mapping 1:1 for when `$fun` is a generic function
+    // like some of the fp math rustc intrinsics (e.g. `fn fun<T>(x: T) -> T`).
+    (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    transmute($fun(self.0))
+                }
+            }
+        }
+    };
+    (scalar | $trait_id:ident, $trait_method:ident,
+     $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self) -> Self {
+                unsafe {
+                    union U {
+                        vec: $vec_id,
+                        scalars: [$sid; $scount],
+                    }
+                    let mut scalars = U { vec: self }.scalars;
+                    for i in &mut scalars {
+                        *i = $fun(*i);
+                    }
+                    U { scalars }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun twice on each of the vector halves:
+    (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vech_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    union U {
+                        vec: $vec_id,
+                        halves: [$vech_id; 2],
+                    }
+
+                    let mut halves = U { vec: self }.halves;
+
+                    *halves.get_unchecked_mut(0) =
+                        transmute($fun(transmute(*halves.get_unchecked(0))));
+                    *halves.get_unchecked_mut(1) =
+                        transmute($fun(transmute(*halves.get_unchecked(1))));
+
+                    U { halves }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun four times on each of the vector quarters:
+    (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vecq_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    union U {
+                        vec: $vec_id,
+                        quarters: [$vecq_id; 4],
+                    }
+
+                    let mut quarters = U { vec: self }.quarters;
+
+                    *quarters.get_unchecked_mut(0) =
+                        transmute($fun(transmute(*quarters.get_unchecked(0))));
+                    *quarters.get_unchecked_mut(1) =
+                        transmute($fun(transmute(*quarters.get_unchecked(1))));
+                    *quarters.get_unchecked_mut(2) =
+                        transmute($fun(transmute(*quarters.get_unchecked(2))));
+                    *quarters.get_unchecked_mut(3) =
+                        transmute($fun(transmute(*quarters.get_unchecked(3))));
+
+                    U { quarters }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun once on a vector twice as large:
+    (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vect_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self) -> Self {
+                unsafe {
+                    use crate::mem::{transmute, uninitialized};
+
+                    union U {
+                        vec: [$vec_id; 2],
+                        twice: $vect_id,
+                    }
+
+                    let twice = U { vec: [self, uninitialized()] }.twice;
+                    let twice = transmute($fun(transmute(twice)));
+
+                    *(U { twice }.vec.get_unchecked(0))
+                }
+            }
+        }
+    };
+}
+
+macro_rules! gen_unary_impl_table {
+    ($trait_id:ident, $trait_method:ident) => {
+        macro_rules! impl_unary {
+            ($vid:ident: $fun:ident) => {
+                impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun);
+            };
+            ($vid:ident[g]: $fun:ident) => {
+                impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun);
+            };
+            ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+                impl_unary_!(
+                    scalar | $trait_id,
+                    $trait_method,
+                    $vid,
+                    [$sid; $sc],
+                    $fun
+                );
+            };
+            ($vid:ident[s]: $fun:ident) => {
+                impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+            };
+            ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+                impl_unary_!(
+                    halves | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_h,
+                    $fun
+                );
+            };
+            ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+                impl_unary_!(
+                    quarter | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_q,
+                    $fun
+                );
+            };
+            ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+                impl_unary_!(
+                    twice | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_t,
+                    $fun
+                );
+            };
+        }
+    };
+}
+
+macro_rules! impl_tertiary_ {
+    // implementation mapping 1:1
+    (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self, z: Self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    transmute($fun(
+                        transmute(self),
+                        transmute(y),
+                        transmute(z),
+                    ))
+                }
+            }
+        }
+    };
+    (scalar | $trait_id:ident, $trait_method:ident,
+     $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self, z: Self) -> Self {
+                unsafe {
+                    union U {
+                        vec: $vec_id,
+                        scalars: [$sid; $scount],
+                    }
+                    let mut x = U { vec: self }.scalars;
+                    let y = U { vec: y }.scalars;
+                    let z = U { vec: z }.scalars;
+                    for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) {
+                        *i = $fun(*i, *y, *z);
+                    }
+                    U { vec: x }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun twice on each of the vector halves:
+    (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vech_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self, z: Self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    union U {
+                        vec: $vec_id,
+                        halves: [$vech_id; 2],
+                    }
+
+                    let mut x_halves = U { vec: self }.halves;
+                    let y_halves = U { vec: y }.halves;
+                    let z_halves = U { vec: z }.halves;
+
+                    *x_halves.get_unchecked_mut(0) = transmute($fun(
+                        transmute(*x_halves.get_unchecked(0)),
+                        transmute(*y_halves.get_unchecked(0)),
+                        transmute(*z_halves.get_unchecked(0)),
+                    ));
+                    *x_halves.get_unchecked_mut(1) = transmute($fun(
+                        transmute(*x_halves.get_unchecked(1)),
+                        transmute(*y_halves.get_unchecked(1)),
+                        transmute(*z_halves.get_unchecked(1)),
+                    ));
+
+                    U { halves: x_halves }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun four times on each of the vector quarters:
+    (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vecq_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self, z: Self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    union U {
+                        vec: $vec_id,
+                        quarters: [$vecq_id; 4],
+                    }
+
+                    let mut x_quarters = U { vec: self }.quarters;
+                    let y_quarters = U { vec: y }.quarters;
+                    let z_quarters = U { vec: z }.quarters;
+
+                    *x_quarters.get_unchecked_mut(0) = transmute($fun(
+                        transmute(*x_quarters.get_unchecked(0)),
+                        transmute(*y_quarters.get_unchecked(0)),
+                        transmute(*z_quarters.get_unchecked(0)),
+                    ));
+
+                    *x_quarters.get_unchecked_mut(1) = transmute($fun(
+                        transmute(*x_quarters.get_unchecked(1)),
+                        transmute(*y_quarters.get_unchecked(1)),
+                        transmute(*z_quarters.get_unchecked(1)),
+                    ));
+
+                    *x_quarters.get_unchecked_mut(2) = transmute($fun(
+                        transmute(*x_quarters.get_unchecked(2)),
+                        transmute(*y_quarters.get_unchecked(2)),
+                        transmute(*z_quarters.get_unchecked(2)),
+                    ));
+
+                    *x_quarters.get_unchecked_mut(3) = transmute($fun(
+                        transmute(*x_quarters.get_unchecked(3)),
+                        transmute(*y_quarters.get_unchecked(3)),
+                        transmute(*z_quarters.get_unchecked(3)),
+                    ));
+
+                    U { quarters: x_quarters }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun once on a vector twice as large:
+    (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vect_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self, z: Self) -> Self {
+                unsafe {
+                    use crate::mem::{transmute, uninitialized};
+
+                    union U {
+                        vec: [$vec_id; 2],
+                        twice: $vect_id,
+                    }
+
+                    let x_twice = U { vec: [self, uninitialized()] }.twice;
+                    let y_twice = U { vec: [y, uninitialized()] }.twice;
+                    let z_twice = U { vec: [z, uninitialized()] }.twice;
+                    let twice: $vect_id = transmute($fun(
+                        transmute(x_twice),
+                        transmute(y_twice),
+                        transmute(z_twice),
+                    ));
+
+                    *(U { twice }.vec.get_unchecked(0))
+                }
+            }
+        }
+    };
+}
+
+macro_rules! gen_tertiary_impl_table {
+    ($trait_id:ident, $trait_method:ident) => {
+        macro_rules! impl_tertiary {
+            ($vid:ident: $fun:ident) => {
+                impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun);
+            };
+            ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+                impl_tertiary_!(
+                    scalar | $trait_id,
+                    $trait_method,
+                    $vid,
+                    [$sid; $sc],
+                    $fun
+                );
+            };
+            ($vid:ident[s]: $fun:ident) => {
+                impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+            };
+            ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+                impl_tertiary_!(
+                    halves | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_h,
+                    $fun
+                );
+            };
+            ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+                impl_tertiary_!(
+                    quarter | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_q,
+                    $fun
+                );
+            };
+            ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+                impl_tertiary_!(
+                    twice | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_t,
+                    $fun
+                );
+            };
+        }
+    };
+}
+
+macro_rules! impl_binary_ {
+    // implementation mapping 1:1
+    (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    transmute($fun(transmute(self), transmute(y)))
+                }
+            }
+        }
+    };
+    (scalar | $trait_id:ident, $trait_method:ident,
+     $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self) -> Self {
+                unsafe {
+                    union U {
+                        vec: $vec_id,
+                        scalars: [$sid; $scount],
+                    }
+                    let mut x = U { vec: self }.scalars;
+                    let y = U { vec: y }.scalars;
+                    for (x, y) in x.iter_mut().zip(&y) {
+                        *x = $fun(*x, *y);
+                    }
+                    U { scalars: x }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun twice on each of the vector halves:
+    (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vech_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    union U {
+                        vec: $vec_id,
+                        halves: [$vech_id; 2],
+                    }
+
+                    let mut x_halves = U { vec: self }.halves;
+                    let y_halves = U { vec: y }.halves;
+
+                    *x_halves.get_unchecked_mut(0) = transmute($fun(
+                        transmute(*x_halves.get_unchecked(0)),
+                        transmute(*y_halves.get_unchecked(0)),
+                    ));
+                    *x_halves.get_unchecked_mut(1) = transmute($fun(
+                        transmute(*x_halves.get_unchecked(1)),
+                        transmute(*y_halves.get_unchecked(1)),
+                    ));
+
+                    U { halves: x_halves }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun four times on each of the vector quarters:
+    (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vecq_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self) -> Self {
+                unsafe {
+                    use crate::mem::transmute;
+                    union U {
+                        vec: $vec_id,
+                        quarters: [$vecq_id; 4],
+                    }
+
+                    let mut x_quarters = U { vec: self }.quarters;
+                    let y_quarters = U { vec: y }.quarters;
+
+                    *x_quarters.get_unchecked_mut(0) = transmute($fun(
+                        transmute(*x_quarters.get_unchecked(0)),
+                        transmute(*y_quarters.get_unchecked(0)),
+                    ));
+
+                    *x_quarters.get_unchecked_mut(1) = transmute($fun(
+                        transmute(*x_quarters.get_unchecked(1)),
+                        transmute(*y_quarters.get_unchecked(1)),
+                    ));
+
+                    *x_quarters.get_unchecked_mut(2) = transmute($fun(
+                        transmute(*x_quarters.get_unchecked(2)),
+                        transmute(*y_quarters.get_unchecked(2)),
+                    ));
+
+                    *x_quarters.get_unchecked_mut(3) = transmute($fun(
+                        transmute(*x_quarters.get_unchecked(3)),
+                        transmute(*y_quarters.get_unchecked(3)),
+                    ));
+
+                    U { quarters: x_quarters }.vec
+                }
+            }
+        }
+    };
+    // implementation calling fun once on a vector twice as large:
+    (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+     $vect_id:ident, $fun:ident) => {
+        impl $trait_id for $vec_id {
+            #[inline]
+            fn $trait_method(self, y: Self) -> Self {
+                unsafe {
+                    use crate::mem::{transmute, uninitialized};
+
+                    union U {
+                        vec: [$vec_id; 2],
+                        twice: $vect_id,
+                    }
+
+                    let x_twice = U { vec: [self, uninitialized()] }.twice;
+                    let y_twice = U { vec: [y, uninitialized()] }.twice;
+                    let twice: $vect_id = transmute($fun(
+                        transmute(x_twice),
+                        transmute(y_twice),
+                    ));
+
+                    *(U { twice }.vec.get_unchecked(0))
+                }
+            }
+        }
+    };
+}
+
+macro_rules! gen_binary_impl_table {
+    ($trait_id:ident, $trait_method:ident) => {
+        macro_rules! impl_binary {
+            ($vid:ident: $fun:ident) => {
+                impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun);
+            };
+            ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+                impl_binary_!(
+                    scalar | $trait_id,
+                    $trait_method,
+                    $vid,
+                    [$sid; $sc],
+                    $fun
+                );
+            };
+            ($vid:ident[s]: $fun:ident) => {
+                impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+            };
+            ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+                impl_binary_!(
+                    halves | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_h,
+                    $fun
+                );
+            };
+            ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+                impl_binary_!(
+                    quarter | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_q,
+                    $fun
+                );
+            };
+            ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+                impl_binary_!(
+                    twice | $trait_id,
+                    $trait_method,
+                    $vid,
+                    $vid_t,
+                    $fun
+                );
+            };
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/mul_add.rs b/vendor/packed_simd_2/src/codegen/math/float/mul_add.rs
new file mode 100644
index 000000000..f48a57dc4
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/mul_add.rs
@@ -0,0 +1,109 @@
+//! Vertical floating-point `mul_add`
+#![allow(unused)]
+use crate::*;
+
+// FIXME: 64-bit 1 element mul_add
+
+crate trait MulAdd {
+    fn mul_add(self, y: Self, z: Self) -> Self;
+}
+
+#[cfg(not(target_arch = "s390x"))]
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.fma.v2f32"]
+    fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
+    #[link_name = "llvm.fma.v4f32"]
+    fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
+    #[link_name = "llvm.fma.v8f32"]
+    fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
+    #[link_name = "llvm.fma.v16f32"]
+    fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
+    /* FIXME 64-bit single elem vectors
+    #[link_name = "llvm.fma.v1f64"]
+    fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
+    */
+    #[link_name = "llvm.fma.v2f64"]
+    fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
+    #[link_name = "llvm.fma.v4f64"]
+    fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
+    #[link_name = "llvm.fma.v8f64"]
+    fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
+}
+
+gen_tertiary_impl_table!(MulAdd, mul_add);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        macro_rules! impl_broken {
+            ($id:ident) => {
+                impl MulAdd for $id {
+                    #[inline]
+                    fn mul_add(self, y: Self, z: Self) -> Self {
+                        self * y + z
+                    }
+                }
+            };
+        }
+
+        impl_broken!(f32x2);
+        impl_broken!(f32x4);
+        impl_broken!(f32x8);
+        impl_broken!(f32x16);
+
+        impl_broken!(f64x2);
+        impl_broken!(f64x4);
+        impl_broken!(f64x8);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128);
+                impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2);
+                impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2);
+
+                impl_tertiary!(f32x4: Sleef_fmaf4_avx2128);
+                impl_tertiary!(f32x8: Sleef_fmaf8_avx2);
+                impl_tertiary!(f64x2: Sleef_fmad2_avx2128);
+                impl_tertiary!(f64x4: Sleef_fmad4_avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
+                impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx);
+                impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx);
+
+                impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
+                impl_tertiary!(f32x8: Sleef_fmaf8_avx);
+                impl_tertiary!(f64x2: Sleef_fmad2_sse4);
+                impl_tertiary!(f64x4: Sleef_fmad4_avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
+                impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4);
+                impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4);
+
+                impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
+                impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4);
+                impl_tertiary!(f64x2: Sleef_fmad2_sse4);
+                impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4);
+            } else {
+                impl_tertiary!(f32x2: fma_v2f32);
+                impl_tertiary!(f32x16: fma_v16f32);
+                impl_tertiary!(f64x8: fma_v8f64);
+
+                impl_tertiary!(f32x4: fma_v4f32);
+                impl_tertiary!(f32x8: fma_v8f32);
+                impl_tertiary!(f64x2: fma_v2f64);
+                impl_tertiary!(f64x4: fma_v4f64);
+            }
+        }
+    } else {
+        impl_tertiary!(f32x2: fma_v2f32);
+        impl_tertiary!(f32x4: fma_v4f32);
+        impl_tertiary!(f32x8: fma_v8f32);
+        impl_tertiary!(f32x16: fma_v16f32);
+        // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
+        impl_tertiary!(f64x2: fma_v2f64);
+        impl_tertiary!(f64x4: fma_v4f64);
+        impl_tertiary!(f64x8: fma_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/mul_adde.rs b/vendor/packed_simd_2/src/codegen/math/float/mul_adde.rs
new file mode 100644
index 000000000..8c41fb131
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/mul_adde.rs
@@ -0,0 +1,66 @@
+//! Approximation for floating-point `mul_add`
+use crate::*;
+
+// FIXME: 64-bit 1 element mul_adde
+
+crate trait MulAddE {
+    fn mul_adde(self, y: Self, z: Self) -> Self;
+}
+
+#[cfg(not(target_arch = "s390x"))]
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.fmuladd.v2f32"]
+    fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
+    #[link_name = "llvm.fmuladd.v4f32"]
+    fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
+    #[link_name = "llvm.fmuladd.v8f32"]
+    fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
+    #[link_name = "llvm.fmuladd.v16f32"]
+    fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
+    /* FIXME 64-bit single elem vectors
+    #[link_name = "llvm.fmuladd.v1f64"]
+    fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
+    */
+    #[link_name = "llvm.fmuladd.v2f64"]
+    fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
+    #[link_name = "llvm.fmuladd.v4f64"]
+    fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
+    #[link_name = "llvm.fmuladd.v8f64"]
+    fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
+}
+
+macro_rules! impl_mul_adde {
+    ($id:ident : $fn:ident) => {
+        impl MulAddE for $id {
+            #[inline]
+            fn mul_adde(self, y: Self, z: Self) -> Self {
+                #[cfg(not(target_arch = "s390x"))]
+                {
+                    use crate::mem::transmute;
+                    unsafe {
+                        transmute($fn(
+                            transmute(self),
+                            transmute(y),
+                            transmute(z),
+                        ))
+                    }
+                }
+                #[cfg(target_arch = "s390x")]
+                {
+                    // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+                    self * y + z
+                }
+            }
+        }
+    };
+}
+
+impl_mul_adde!(f32x2: fmuladd_v2f32);
+impl_mul_adde!(f32x4: fmuladd_v4f32);
+impl_mul_adde!(f32x8: fmuladd_v8f32);
+impl_mul_adde!(f32x16: fmuladd_v16f32);
+// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
+impl_mul_adde!(f64x2: fmuladd_v2f64);
+impl_mul_adde!(f64x4: fmuladd_v4f64);
+impl_mul_adde!(f64x8: fmuladd_v8f64);
diff --git a/vendor/packed_simd_2/src/codegen/math/float/powf.rs b/vendor/packed_simd_2/src/codegen/math/float/powf.rs
new file mode 100644
index 000000000..bc15067d7
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/powf.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `powf`
+#![allow(unused)]
+
+// FIXME 64-bit powfgle elem vectors mispowfg
+
+use crate::*;
+
+crate trait Powf {
+    fn powf(self, x: Self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.pow.v2f32"]
+    fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2;
+    #[link_name = "llvm.pow.v4f32"]
+    fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4;
+    #[link_name = "llvm.pow.v8f32"]
+    fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8;
+    #[link_name = "llvm.pow.v16f32"]
+    fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16;
+    /* FIXME 64-bit powfgle elem vectors
+    #[link_name = "llvm.pow.v1f64"]
+    fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1;
+     */
+    #[link_name = "llvm.pow.v2f64"]
+    fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2;
+    #[link_name = "llvm.pow.v4f64"]
+    fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4;
+    #[link_name = "llvm.pow.v8f64"]
+    fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8;
+
+    #[link_name = "llvm.pow.f32"]
+    fn powf_f32(x: f32, y: f32) -> f32;
+    #[link_name = "llvm.pow.f64"]
+    fn powf_f64(x: f64, y: f64) -> f64;
+}
+
+gen_binary_impl_table!(Powf, powf);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        impl_binary!(f32x2[f32; 2]: powf_f32);
+        impl_binary!(f32x4[f32; 4]: powf_f32);
+        impl_binary!(f32x8[f32; 8]: powf_f32);
+        impl_binary!(f32x16[f32; 16]: powf_f32);
+
+        impl_binary!(f64x2[f64; 2]: powf_f64);
+        impl_binary!(f64x4[f64; 4]: powf_f64);
+        impl_binary!(f64x8[f64; 8]: powf_f64);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128);
+                impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2);
+                impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2);
+
+                impl_binary!(f32x4: Sleef_powf4_u10avx2128);
+                impl_binary!(f32x8: Sleef_powf8_u10avx2);
+                impl_binary!(f64x2: Sleef_powd2_u10avx2128);
+                impl_binary!(f64x4: Sleef_powd4_u10avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4);
+                impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx);
+                impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx);
+
+                impl_binary!(f32x4: Sleef_powf4_u10sse4);
+                impl_binary!(f32x8: Sleef_powf8_u10avx);
+                impl_binary!(f64x2: Sleef_powd2_u10sse4);
+                impl_binary!(f64x4: Sleef_powd4_u10avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4);
+                impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4);
+                impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4);
+
+                impl_binary!(f32x4: Sleef_powf4_u10sse4);
+                impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4);
+                impl_binary!(f64x2: Sleef_powd2_u10sse4);
+                impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4);
+            } else if #[cfg(target_feature = "sse2")] {
+                impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2);
+                impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2);
+                impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2);
+
+                impl_binary!(f32x4: Sleef_powf4_u10sse2);
+                impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2);
+                impl_binary!(f64x2: Sleef_powd2_u10sse2);
+                impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2);
+            } else {
+                impl_binary!(f32x2[f32; 2]: powf_f32);
+                impl_binary!(f32x4: powf_v4f32);
+                impl_binary!(f32x8: powf_v8f32);
+                impl_binary!(f32x16: powf_v16f32);
+
+                impl_binary!(f64x2: powf_v2f64);
+                impl_binary!(f64x4: powf_v4f64);
+                impl_binary!(f64x8: powf_v8f64);
+            }
+        }
+    } else {
+        impl_binary!(f32x2[f32; 2]: powf_f32);
+        impl_binary!(f32x4: powf_v4f32);
+        impl_binary!(f32x8: powf_v8f32);
+        impl_binary!(f32x16: powf_v16f32);
+
+        impl_binary!(f64x2: powf_v2f64);
+        impl_binary!(f64x4: powf_v4f64);
+        impl_binary!(f64x8: powf_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/sin.rs b/vendor/packed_simd_2/src/codegen/math/float/sin.rs
new file mode 100644
index 000000000..7b014d07d
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/sin.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `sin`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin
+
+use crate::*;
+
+crate trait Sin {
+    fn sin(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.sin.v2f32"]
+    fn sin_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.sin.v4f32"]
+    fn sin_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.sin.v8f32"]
+    fn sin_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.sin.v16f32"]
+    fn sin_v16f32(x: f32x16) -> f32x16;
+    /* FIXME 64-bit single elem vectors
+    #[link_name = "llvm.sin.v1f64"]
+    fn sin_v1f64(x: f64x1) -> f64x1;
+     */
+    #[link_name = "llvm.sin.v2f64"]
+    fn sin_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.sin.v4f64"]
+    fn sin_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.sin.v8f64"]
+    fn sin_v8f64(x: f64x8) -> f64x8;
+
+    #[link_name = "llvm.sin.f32"]
+    fn sin_f32(x: f32) -> f32;
+    #[link_name = "llvm.sin.f64"]
+    fn sin_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Sin, sin);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        impl_unary!(f32x2[f32; 2]: sin_f32);
+        impl_unary!(f32x4[f32; 4]: sin_f32);
+        impl_unary!(f32x8[f32; 8]: sin_f32);
+        impl_unary!(f32x16[f32; 16]: sin_f32);
+
+        impl_unary!(f64x2[f64; 2]: sin_f64);
+        impl_unary!(f64x4[f64; 4]: sin_f64);
+        impl_unary!(f64x8[f64; 8]: sin_f64);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2);
+
+                impl_unary!(f32x4: Sleef_sinf4_u10avx2128);
+                impl_unary!(f32x8: Sleef_sinf8_u10avx2);
+                impl_unary!(f64x2: Sleef_sind2_u10avx2128);
+                impl_unary!(f64x4: Sleef_sind4_u10avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx);
+
+                impl_unary!(f32x4: Sleef_sinf4_u10sse4);
+                impl_unary!(f32x8: Sleef_sinf8_u10avx);
+                impl_unary!(f64x2: Sleef_sind2_u10sse4);
+                impl_unary!(f64x4: Sleef_sind4_u10avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4);
+
+                impl_unary!(f32x4: Sleef_sinf4_u10sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4);
+                impl_unary!(f64x2: Sleef_sind2_u10sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4);
+            } else {
+                impl_unary!(f32x2[f32; 2]: sin_f32);
+                impl_unary!(f32x16: sin_v16f32);
+                impl_unary!(f64x8: sin_v8f64);
+
+                impl_unary!(f32x4: sin_v4f32);
+                impl_unary!(f32x8: sin_v8f32);
+                impl_unary!(f64x2: sin_v2f64);
+                impl_unary!(f64x4: sin_v4f64);
+            }
+        }
+    } else {
+        impl_unary!(f32x2[f32; 2]: sin_f32);
+        impl_unary!(f32x4: sin_v4f32);
+        impl_unary!(f32x8: sin_v8f32);
+        impl_unary!(f32x16: sin_v16f32);
+
+        impl_unary!(f64x2: sin_v2f64);
+        impl_unary!(f64x4: sin_v4f64);
+        impl_unary!(f64x8: sin_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs b/vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs
new file mode 100644
index 000000000..0f1249ec8
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs
@@ -0,0 +1,195 @@
+//! Vertical floating-point `sin_cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin_cos
+
+use crate::*;
+
+crate trait SinCosPi: Sized {
+    type Output;
+    fn sin_cos_pi(self) -> Self::Output;
+}
+
+macro_rules! impl_def {
+    ($vid:ident, $PI:path) => {
+        impl SinCosPi for $vid {
+            type Output = (Self, Self);
+            #[inline]
+            fn sin_cos_pi(self) -> Self::Output {
+                let v = self * Self::splat($PI);
+                (v.sin(), v.cos())
+            }
+        }
+    };
+}
+
+macro_rules! impl_def32 {
+    ($vid:ident) => {
+        impl_def!($vid, crate::f32::consts::PI);
+    };
+}
+macro_rules! impl_def64 {
+    ($vid:ident) => {
+        impl_def!($vid, crate::f64::consts::PI);
+    };
+}
+
+macro_rules! impl_unary_t {
+    ($vid:ident: $fun:ident) => {
+        impl SinCosPi for $vid {
+            type Output = (Self, Self);
+            fn sin_cos_pi(self) -> Self::Output {
+                unsafe {
+                    use crate::mem::transmute;
+                    transmute($fun(transmute(self)))
+                }
+            }
+        }
+    };
+    ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+        impl SinCosPi for $vid {
+            type Output = (Self, Self);
+            fn sin_cos_pi(self) -> Self::Output {
+                unsafe {
+                    use crate::mem::{transmute, uninitialized};
+
+                    union U {
+                        vec: [$vid; 2],
+                        twice: $vid_t,
+                    }
+
+                    let twice = U { vec: [self, uninitialized()] }.twice;
+                    let twice = transmute($fun(transmute(twice)));
+
+                    union R {
+                        twice: ($vid_t, $vid_t),
+                        vecs: ([$vid; 2], [$vid; 2]),
+                    }
+                    let r = R { twice }.vecs;
+                    (*r.0.get_unchecked(0), *r.0.get_unchecked(1))
+                }
+            }
+        }
+    };
+    ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+        impl SinCosPi for $vid {
+            type Output = (Self, Self);
+            fn sin_cos_pi(self) -> Self::Output {
+                unsafe {
+                    use crate::mem::transmute;
+
+                    union U {
+                        vec: $vid,
+                        halves: [$vid_h; 2],
+                    }
+
+                    let halves = U { vec: self }.halves;
+
+                    let res_0: ($vid_h, $vid_h) =
+                        transmute($fun(transmute(*halves.get_unchecked(0))));
+                    let res_1: ($vid_h, $vid_h) =
+                        transmute($fun(transmute(*halves.get_unchecked(1))));
+
+                    union R {
+                        result: ($vid, $vid),
+                        halves: ([$vid_h; 2], [$vid_h; 2]),
+                    }
+                    R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) }
+                        .result
+                }
+            }
+        }
+    };
+    ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+        impl SinCosPi for $vid {
+            type Output = (Self, Self);
+            fn sin_cos_pi(self) -> Self::Output {
+                unsafe {
+                    use crate::mem::transmute;
+
+                    union U {
+                        vec: $vid,
+                        quarters: [$vid_q; 4],
+                    }
+
+                    let quarters = U { vec: self }.quarters;
+
+                    let res_0: ($vid_q, $vid_q) =
+                        transmute($fun(transmute(*quarters.get_unchecked(0))));
+                    let res_1: ($vid_q, $vid_q) =
+                        transmute($fun(transmute(*quarters.get_unchecked(1))));
+                    let res_2: ($vid_q, $vid_q) =
+                        transmute($fun(transmute(*quarters.get_unchecked(2))));
+                    let res_3: ($vid_q, $vid_q) =
+                        transmute($fun(transmute(*quarters.get_unchecked(3))));
+
+                    union R {
+                        result: ($vid, $vid),
+                        quarters: ([$vid_q; 4], [$vid_q; 4]),
+                    }
+                    R {
+                        quarters: (
+                            [res_0.0, res_1.0, res_2.0, res_3.0],
+                            [res_0.1, res_1.1, res_2.1, res_3.1],
+                        ),
+                    }
+                    .result
+                }
+            }
+        }
+    };
+}
+
+cfg_if! {
+    if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128);
+                impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2);
+                impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2);
+
+                impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128);
+                impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2);
+                impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128);
+                impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
+                impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx);
+                impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx);
+
+                impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
+                impl_unary_t!(f32x8: Sleef_sincospif8_u05avx);
+                impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
+                impl_unary_t!(f64x4: Sleef_sincospid4_u05avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
+                impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4);
+                impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4);
+
+                impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
+                impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4);
+                impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
+                impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4);
+            } else {
+                impl_def32!(f32x2);
+                impl_def32!(f32x4);
+                impl_def32!(f32x8);
+                impl_def32!(f32x16);
+
+                impl_def64!(f64x2);
+                impl_def64!(f64x4);
+                impl_def64!(f64x8);
+            }
+        }
+    } else {
+        impl_def32!(f32x2);
+        impl_def32!(f32x4);
+        impl_def32!(f32x8);
+        impl_def32!(f32x16);
+
+        impl_def64!(f64x2);
+        impl_def64!(f64x4);
+        impl_def64!(f64x8);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/sin_pi.rs b/vendor/packed_simd_2/src/codegen/math/float/sin_pi.rs
new file mode 100644
index 000000000..72df98c93
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/sin_pi.rs
@@ -0,0 +1,87 @@
+//! Vertical floating-point `sin_pi`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin_pi
+
+use crate::*;
+
+crate trait SinPi {
+    fn sin_pi(self) -> Self;
+}
+
+gen_unary_impl_table!(SinPi, sin_pi);
+
+macro_rules! impl_def {
+    ($vid:ident, $PI:path) => {
+        impl SinPi for $vid {
+            #[inline]
+            fn sin_pi(self) -> Self {
+                (self * Self::splat($PI)).sin()
+            }
+        }
+    };
+}
+macro_rules! impl_def32 {
+    ($vid:ident) => {
+        impl_def!($vid, crate::f32::consts::PI);
+    };
+}
+macro_rules! impl_def64 {
+    ($vid:ident) => {
+        impl_def!($vid, crate::f64::consts::PI);
+    };
+}
+
+cfg_if! {
+    if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2);
+
+                impl_unary!(f32x4: Sleef_sinpif4_u05avx2128);
+                impl_unary!(f32x8: Sleef_sinpif8_u05avx2);
+                impl_unary!(f64x2: Sleef_sinpid2_u05avx2128);
+                impl_unary!(f64x4: Sleef_sinpid4_u05avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx);
+
+                impl_unary!(f32x4: Sleef_sinpif4_u05sse4);
+                impl_unary!(f32x8: Sleef_sinpif8_u05avx);
+                impl_unary!(f64x2: Sleef_sinpid2_u05sse4);
+                impl_unary!(f64x4: Sleef_sinpid4_u05avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4);
+
+                impl_unary!(f32x4: Sleef_sinpif4_u05sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4);
+                impl_unary!(f64x2: Sleef_sinpid2_u05sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4);
+            } else {
+                impl_def32!(f32x2);
+                impl_def32!(f32x4);
+                impl_def32!(f32x8);
+                impl_def32!(f32x16);
+
+                impl_def64!(f64x2);
+                impl_def64!(f64x4);
+                impl_def64!(f64x8);
+            }
+        }
+    } else {
+        impl_def32!(f32x2);
+        impl_def32!(f32x4);
+        impl_def32!(f32x8);
+        impl_def32!(f32x16);
+
+        impl_def64!(f64x2);
+        impl_def64!(f64x4);
+        impl_def64!(f64x8);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/sqrt.rs b/vendor/packed_simd_2/src/codegen/math/float/sqrt.rs
new file mode 100644
index 000000000..7ce31df62
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/sqrt.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `sqrt`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sqrt
+
+use crate::*;
+
+crate trait Sqrt {
+    fn sqrt(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.sqrt.v2f32"]
+    fn sqrt_v2f32(x: f32x2) -> f32x2;
+    #[link_name = "llvm.sqrt.v4f32"]
+    fn sqrt_v4f32(x: f32x4) -> f32x4;
+    #[link_name = "llvm.sqrt.v8f32"]
+    fn sqrt_v8f32(x: f32x8) -> f32x8;
+    #[link_name = "llvm.sqrt.v16f32"]
+    fn sqrt_v16f32(x: f32x16) -> f32x16;
+    /* FIXME 64-bit sqrtgle elem vectors
+    #[link_name = "llvm.sqrt.v1f64"]
+    fn sqrt_v1f64(x: f64x1) -> f64x1;
+     */
+    #[link_name = "llvm.sqrt.v2f64"]
+    fn sqrt_v2f64(x: f64x2) -> f64x2;
+    #[link_name = "llvm.sqrt.v4f64"]
+    fn sqrt_v4f64(x: f64x4) -> f64x4;
+    #[link_name = "llvm.sqrt.v8f64"]
+    fn sqrt_v8f64(x: f64x8) -> f64x8;
+
+    #[link_name = "llvm.sqrt.f32"]
+    fn sqrt_f32(x: f32) -> f32;
+    #[link_name = "llvm.sqrt.f64"]
+    fn sqrt_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Sqrt, sqrt);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        impl_unary!(f32x2[f32; 2]: sqrt_f32);
+        impl_unary!(f32x4[f32; 4]: sqrt_f32);
+        impl_unary!(f32x8[f32; 8]: sqrt_f32);
+        impl_unary!(f32x16[f32; 16]: sqrt_f32);
+
+        impl_unary!(f64x2[f64; 2]: sqrt_f64);
+        impl_unary!(f64x4[f64; 4]: sqrt_f64);
+        impl_unary!(f64x8[f64; 8]: sqrt_f64);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2);
+
+                impl_unary!(f32x4: Sleef_sqrtf4_avx2128);
+                impl_unary!(f32x8: Sleef_sqrtf8_avx2);
+                impl_unary!(f64x2: Sleef_sqrtd2_avx2128);
+                impl_unary!(f64x4: Sleef_sqrtd4_avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx);
+
+                impl_unary!(f32x4: Sleef_sqrtf4_sse4);
+                impl_unary!(f32x8: Sleef_sqrtf8_avx);
+                impl_unary!(f64x2: Sleef_sqrtd2_sse4);
+                impl_unary!(f64x4: Sleef_sqrtd4_avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4);
+
+                impl_unary!(f32x4: Sleef_sqrtf4_sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4);
+                impl_unary!(f64x2: Sleef_sqrtd2_sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4);
+            } else {
+                impl_unary!(f32x2[f32; 2]: sqrt_f32);
+                impl_unary!(f32x16: sqrt_v16f32);
+                impl_unary!(f64x8: sqrt_v8f64);
+
+                impl_unary!(f32x4: sqrt_v4f32);
+                impl_unary!(f32x8: sqrt_v8f32);
+                impl_unary!(f64x2: sqrt_v2f64);
+                impl_unary!(f64x4: sqrt_v4f64);
+            }
+        }
+    } else {
+        impl_unary!(f32x2[f32; 2]: sqrt_f32);
+        impl_unary!(f32x4: sqrt_v4f32);
+        impl_unary!(f32x8: sqrt_v8f32);
+        impl_unary!(f32x16: sqrt_v16f32);
+
+        impl_unary!(f64x2: sqrt_v2f64);
+        impl_unary!(f64x4: sqrt_v4f64);
+        impl_unary!(f64x8: sqrt_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/sqrte.rs b/vendor/packed_simd_2/src/codegen/math/float/sqrte.rs
new file mode 100644
index 000000000..c1e379c34
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/sqrte.rs
@@ -0,0 +1,67 @@
+//! Vertical floating-point `sqrt`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sqrte
+
+use crate::llvm::simd_fsqrt;
+use crate::*;
+
+crate trait Sqrte {
+    fn sqrte(self) -> Self;
+}
+
+gen_unary_impl_table!(Sqrte, sqrte);
+
+cfg_if! {
+    if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2);
+
+                impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128);
+                impl_unary!(f32x8: Sleef_sqrtf8_u35avx2);
+                impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128);
+                impl_unary!(f64x4: Sleef_sqrtd4_u35avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx);
+
+                impl_unary!(f32x4: Sleef_sqrtf4_u35sse4);
+                impl_unary!(f32x8: Sleef_sqrtf8_u35avx);
+                impl_unary!(f64x2: Sleef_sqrtd2_u35sse4);
+                impl_unary!(f64x4: Sleef_sqrtd4_u35avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4);
+
+                impl_unary!(f32x4: Sleef_sqrtf4_u35sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4);
+                impl_unary!(f64x2: Sleef_sqrtd2_u35sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4);
+            } else {
+                impl_unary!(f32x2[g]: simd_fsqrt);
+                impl_unary!(f32x16[g]: simd_fsqrt);
+                impl_unary!(f64x8[g]: simd_fsqrt);
+
+                impl_unary!(f32x4[g]: simd_fsqrt);
+                impl_unary!(f32x8[g]: simd_fsqrt);
+                impl_unary!(f64x2[g]: simd_fsqrt);
+                impl_unary!(f64x4[g]: simd_fsqrt);
+            }
+        }
+    } else {
+        impl_unary!(f32x2[g]: simd_fsqrt);
+        impl_unary!(f32x4[g]: simd_fsqrt);
+        impl_unary!(f32x8[g]: simd_fsqrt);
+        impl_unary!(f32x16[g]: simd_fsqrt);
+
+        impl_unary!(f64x2[g]: simd_fsqrt);
+        impl_unary!(f64x4[g]: simd_fsqrt);
+        impl_unary!(f64x8[g]: simd_fsqrt);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/math/float/tanh.rs b/vendor/packed_simd_2/src/codegen/math/float/tanh.rs
new file mode 100644
index 000000000..5220c7d10
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/tanh.rs
@@ -0,0 +1,117 @@
+//! Vertical floating-point `tanh`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors tanh
+
+use crate::*;
+
+crate trait Tanh {
+    fn tanh(self) -> Self;
+}
+
+macro_rules! define_tanh {
+
+    ($name:ident, $basetype:ty, $simdtype:ty, $lanes:expr, $trait:path) => {
+        fn $name(x: $simdtype) -> $simdtype {
+            use core::intrinsics::transmute;
+            let mut buf: [$basetype; $lanes] = unsafe { transmute(x) };
+            for elem in &mut buf {
+                *elem = <$basetype as $trait>::tanh(*elem);
+            }
+            unsafe { transmute(buf) }
+        }
+    };
+
+    (f32 => $name:ident, $type:ty, $lanes:expr) => {
+        define_tanh!($name, f32, $type, $lanes, libm::F32Ext);
+    };
+
+    (f64 => $name:ident, $type:ty, $lanes:expr) => {
+        define_tanh!($name, f64, $type, $lanes, libm::F64Ext);
+    };
+}
+
+// llvm does not seem to expose the hyperbolic versions of trigonometric functions;
+// we thus call the classical rust versions on all of them (which stem from cmath).
+define_tanh!(f32 => tanh_v2f32, f32x2, 2);
+define_tanh!(f32 => tanh_v4f32, f32x4, 4);
+define_tanh!(f32 => tanh_v8f32, f32x8, 8);
+define_tanh!(f32 => tanh_v16f32, f32x16, 16);
+
+define_tanh!(f64 => tanh_v2f64, f64x2, 2);
+define_tanh!(f64 => tanh_v4f64, f64x4, 4);
+define_tanh!(f64 => tanh_v8f64, f64x8, 8);
+
+fn tanh_f32(x: f32) -> f32 {
+    libm::F32Ext::tanh(x)
+}
+
+fn tanh_f64(x: f64) -> f64 {
+    libm::F64Ext::tanh(x)
+}
+
+gen_unary_impl_table!(Tanh, tanh);
+
+cfg_if! {
+    if #[cfg(target_arch = "s390x")] {
+        // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+        impl_unary!(f32x2[f32; 2]: tanh_f32);
+        impl_unary!(f32x4[f32; 4]: tanh_f32);
+        impl_unary!(f32x8[f32; 8]: tanh_f32);
+        impl_unary!(f32x16[f32; 16]: tanh_f32);
+
+        impl_unary!(f64x2[f64; 2]: tanh_f64);
+        impl_unary!(f64x4[f64; 4]: tanh_f64);
+        impl_unary!(f64x8[f64; 8]: tanh_f64);
+    } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+        use sleef_sys::*;
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10avx2128);
+                impl_unary!(f32x16[h => f32x8]: Sleef_tanhf8_u10avx2);
+                impl_unary!(f64x8[h => f64x4]: Sleef_tanhd4_u10avx2);
+
+                impl_unary!(f32x4: Sleef_tanhf4_u10avx2128);
+                impl_unary!(f32x8: Sleef_tanhf8_u10avx2);
+                impl_unary!(f64x2: Sleef_tanhd2_u10avx2128);
+                impl_unary!(f64x4: Sleef_tanhd4_u10avx2);
+            } else if #[cfg(target_feature = "avx")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10sse4);
+                impl_unary!(f32x16[h => f32x8]: Sleef_tanhf8_u10avx);
+                impl_unary!(f64x8[h => f64x4]: Sleef_tanhd4_u10avx);
+
+                impl_unary!(f32x4: Sleef_tanhf4_u10sse4);
+                impl_unary!(f32x8: Sleef_tanhf8_u10avx);
+                impl_unary!(f64x2: Sleef_tanhd2_u10sse4);
+                impl_unary!(f64x4: Sleef_tanhd4_u10avx);
+            } else if #[cfg(target_feature = "sse4.2")] {
+                impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10sse4);
+                impl_unary!(f32x16[q => f32x4]: Sleef_tanhf4_u10sse4);
+                impl_unary!(f64x8[q => f64x2]: Sleef_tanhd2_u10sse4);
+
+                impl_unary!(f32x4: Sleef_tanhf4_u10sse4);
+                impl_unary!(f32x8[h => f32x4]: Sleef_tanhf4_u10sse4);
+                impl_unary!(f64x2: Sleef_tanhd2_u10sse4);
+                impl_unary!(f64x4[h => f64x2]: Sleef_tanhd2_u10sse4);
+            } else {
+                impl_unary!(f32x2[f32; 2]: tanh_f32);
+                impl_unary!(f32x16: tanh_v16f32);
+                impl_unary!(f64x8: tanh_v8f64);
+
+                impl_unary!(f32x4: tanh_v4f32);
+                impl_unary!(f32x8: tanh_v8f32);
+                impl_unary!(f64x2: tanh_v2f64);
+                impl_unary!(f64x4: tanh_v4f64);
+            }
+        }
+    } else {
+        impl_unary!(f32x2[f32; 2]: tanh_f32);
+        impl_unary!(f32x4: tanh_v4f32);
+        impl_unary!(f32x8: tanh_v8f32);
+        impl_unary!(f32x16: tanh_v16f32);
+
+        impl_unary!(f64x2: tanh_v2f64);
+        impl_unary!(f64x4: tanh_v4f64);
+        impl_unary!(f64x8: tanh_v8f64);
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/pointer_sized_int.rs b/vendor/packed_simd_2/src/codegen/pointer_sized_int.rs
new file mode 100644
index 000000000..39f493d3b
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/pointer_sized_int.rs
@@ -0,0 +1,28 @@
+//! Provides `isize` and `usize`
+
+use cfg_if::cfg_if;
+
+cfg_if! {
+    if #[cfg(target_pointer_width = "8")] {
+        crate type isize_ = i8;
+        crate type usize_ = u8;
+    } else if #[cfg(target_pointer_width = "16")] {
+        crate type isize_ = i16;
+        crate type usize_ = u16;
+    } else if #[cfg(target_pointer_width = "32")] {
+        crate type isize_ = i32;
+        crate type usize_ = u32;
+
+    } else if #[cfg(target_pointer_width = "64")] {
+        crate type isize_ = i64;
+        crate type usize_ = u64;
+    } else if #[cfg(target_pointer_width = "64")] {
+        crate type isize_ = i64;
+        crate type usize_ = u64;
+    } else if #[cfg(target_pointer_width = "128")] {
+        crate type isize_ = i128;
+        crate type usize_ = u128;
+    } else {
+        compile_error!("unsupported target_pointer_width");
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions.rs b/vendor/packed_simd_2/src/codegen/reductions.rs
new file mode 100644
index 000000000..7be4f5fab
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions.rs
@@ -0,0 +1 @@
+crate mod mask;
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask.rs b/vendor/packed_simd_2/src/codegen/reductions/mask.rs
new file mode 100644
index 000000000..97260c6d4
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask.rs
@@ -0,0 +1,69 @@
+//! Code generation workaround for `all()` mask horizontal reduction.
+//!
+//! Works arround [LLVM bug 36702].
+//!
+//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702
+#![allow(unused_macros)]
+
+use crate::*;
+
+crate trait All: crate::marker::Sized {
+    unsafe fn all(self) -> bool;
+}
+
+crate trait Any: crate::marker::Sized {
+    unsafe fn any(self) -> bool;
+}
+
+#[macro_use]
+mod fallback_impl;
+
+cfg_if! {
+    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+        #[macro_use]
+        mod x86;
+    } else if #[cfg(all(target_arch = "arm", target_feature = "v7",
+                        target_feature = "neon",
+                        any(feature = "core_arch", libcore_neon)))] {
+        #[macro_use]
+        mod arm;
+    } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
+        #[macro_use]
+        mod aarch64;
+    } else {
+        #[macro_use]
+        mod fallback;
+    }
+}
+
+impl_mask_reductions!(m8x2);
+impl_mask_reductions!(m8x4);
+impl_mask_reductions!(m8x8);
+impl_mask_reductions!(m8x16);
+impl_mask_reductions!(m8x32);
+impl_mask_reductions!(m8x64);
+
+impl_mask_reductions!(m16x2);
+impl_mask_reductions!(m16x4);
+impl_mask_reductions!(m16x8);
+impl_mask_reductions!(m16x16);
+impl_mask_reductions!(m16x32);
+
+impl_mask_reductions!(m32x2);
+impl_mask_reductions!(m32x4);
+impl_mask_reductions!(m32x8);
+impl_mask_reductions!(m32x16);
+
+// FIXME: 64-bit single element vector
+// impl_mask_reductions!(m64x1);
+impl_mask_reductions!(m64x2);
+impl_mask_reductions!(m64x4);
+impl_mask_reductions!(m64x8);
+
+impl_mask_reductions!(m128x1);
+impl_mask_reductions!(m128x2);
+impl_mask_reductions!(m128x4);
+
+impl_mask_reductions!(msizex2);
+impl_mask_reductions!(msizex4);
+impl_mask_reductions!(msizex8);
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs
new file mode 100644
index 000000000..e9586eace
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs
@@ -0,0 +1,71 @@
+//! Mask reductions implementation for `aarch64` targets
+
+/// 128-bit wide vectors
+macro_rules! aarch64_128_neon_impl {
+    ($id:ident, $vmin:ident, $vmax:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "neon")]
+            unsafe fn all(self) -> bool {
+                use crate::arch::aarch64::$vmin;
+                $vmin(crate::mem::transmute(self)) != 0
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "neon")]
+            unsafe fn any(self) -> bool {
+                use crate::arch::aarch64::$vmax;
+                $vmax(crate::mem::transmute(self)) != 0
+            }
+        }
+    }
+}
+
+/// 64-bit wide vectors
+macro_rules! aarch64_64_neon_impl {
+    ($id:ident, $vec128:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "neon")]
+            unsafe fn all(self) -> bool {
+                // Duplicates the 64-bit vector into a 128-bit one and
+                // calls all on that.
+                union U {
+                    halves: ($id, $id),
+                    vec: $vec128,
+                }
+                U {
+                    halves: (self, self),
+                }.vec.all()
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "neon")]
+            unsafe fn any(self) -> bool {
+                union U {
+                    halves: ($id, $id),
+                    vec: $vec128,
+                }
+                U {
+                    halves: (self, self),
+                }.vec.any()
+            }
+        }
+    };
+}
+
+/// Mask reduction implementation for `aarch64` targets
+macro_rules! impl_mask_reductions {
+    // 64-bit wide masks
+    (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); };
+    (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); };
+    (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); };
+    // 128-bit wide masks
+    (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); };
+    (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); };
+    (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); };
+    // Fallback to LLVM's default code-generation:
+    ($id:ident) => { fallback_impl!($id); };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs
new file mode 100644
index 000000000..1987af7a9
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs
@@ -0,0 +1,54 @@
+//! Mask reductions implementation for `arm` targets
+
+/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with
+/// more than two elements.
+macro_rules! arm_128_v7_neon_impl {
+    ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "v7,neon")]
+            unsafe fn all(self) -> bool {
+                use crate::arch::arm::$vpmin;
+                use crate::mem::transmute;
+                union U {
+                    halves: ($half, $half),
+                    vec: $id,
+                }
+                let halves = U { vec: self }.halves;
+                let h: $half = transmute($vpmin(
+                    transmute(halves.0),
+                    transmute(halves.1),
+                ));
+                h.all()
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "v7,neon")]
+            unsafe fn any(self) -> bool {
+                use crate::arch::arm::$vpmax;
+                use crate::mem::transmute;
+                union U {
+                    halves: ($half, $half),
+                    vec: $id,
+                }
+                let halves = U { vec: self }.halves;
+                let h: $half = transmute($vpmax(
+                    transmute(halves.0),
+                    transmute(halves.1),
+                ));
+                h.any()
+            }
+        }
+    };
+}
+
+/// Mask reduction implementation for `arm` targets
+macro_rules! impl_mask_reductions {
+    // 128-bit wide masks
+    (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); };
+    (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); };
+    (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); };
+    // Fallback to LLVM's default code-generation:
+    ($id:ident) => { fallback_impl!($id); };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs
new file mode 100644
index 000000000..25e5c813a
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs
@@ -0,0 +1,6 @@
+//! Default mask reduction implementations.
+
+/// Default mask reduction implementation
+macro_rules! impl_mask_reductions {
+    ($id:ident) => { fallback_impl!($id); };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/fallback_impl.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback_impl.rs
new file mode 100644
index 000000000..0d246e2fd
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback_impl.rs
@@ -0,0 +1,237 @@
+//! Default implementation of a mask reduction for any target.
+
+macro_rules! fallback_to_other_impl {
+    ($id:ident, $other:ident) => {
+        impl All for $id {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let m: $other = crate::mem::transmute(self);
+                m.all()
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let m: $other = crate::mem::transmute(self);
+                m.any()
+            }
+        }
+    };
+}
+
+/// Fallback implementation.
+macro_rules! fallback_impl {
+    // 16-bit wide masks:
+    (m8x2) => {
+        impl All for m8x2 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: u16 = crate::mem::transmute(self);
+                i == u16::max_value()
+            }
+        }
+        impl Any for m8x2 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: u16 = crate::mem::transmute(self);
+                i != 0
+            }
+        }
+    };
+    // 32-bit wide masks
+    (m8x4) => {
+        impl All for m8x4 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: u32 = crate::mem::transmute(self);
+                i == u32::max_value()
+            }
+        }
+        impl Any for m8x4 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: u32 = crate::mem::transmute(self);
+                i != 0
+            }
+        }
+    };
+    (m16x2) => {
+        fallback_to_other_impl!(m16x2, m8x4);
+    };
+    // 64-bit wide masks:
+    (m8x8) => {
+        impl All for m8x8 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: u64 = crate::mem::transmute(self);
+                i == u64::max_value()
+            }
+        }
+        impl Any for m8x8 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: u64 = crate::mem::transmute(self);
+                i != 0
+            }
+        }
+    };
+    (m16x4) => {
+        fallback_to_other_impl!(m16x4, m8x8);
+    };
+    (m32x2) => {
+        fallback_to_other_impl!(m32x2, m16x4);
+    };
+    // FIXME: 64x1 maxk
+    // 128-bit wide masks:
+    (m8x16) => {
+        impl All for m8x16 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: u128 = crate::mem::transmute(self);
+                i == u128::max_value()
+            }
+        }
+        impl Any for m8x16 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: u128 = crate::mem::transmute(self);
+                i != 0
+            }
+        }
+    };
+    (m16x8) => {
+        fallback_to_other_impl!(m16x8, m8x16);
+    };
+    (m32x4) => {
+        fallback_to_other_impl!(m32x4, m16x8);
+    };
+    (m64x2) => {
+        fallback_to_other_impl!(m64x2, m32x4);
+    };
+    (m128x1) => {
+        fallback_to_other_impl!(m128x1, m64x2);
+    };
+    // 256-bit wide masks
+    (m8x32) => {
+        impl All for m8x32 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: [u128; 2] = crate::mem::transmute(self);
+                let o: [u128; 2] = [u128::max_value(); 2];
+                i == o
+            }
+        }
+        impl Any for m8x32 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: [u128; 2] = crate::mem::transmute(self);
+                let o: [u128; 2] = [0; 2];
+                i != o
+            }
+        }
+    };
+    (m16x16) => {
+        fallback_to_other_impl!(m16x16, m8x32);
+    };
+    (m32x8) => {
+        fallback_to_other_impl!(m32x8, m16x16);
+    };
+    (m64x4) => {
+        fallback_to_other_impl!(m64x4, m32x8);
+    };
+    (m128x2) => {
+        fallback_to_other_impl!(m128x2, m64x4);
+    };
+    // 512-bit wide masks
+    (m8x64) => {
+        impl All for m8x64 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: [u128; 4] = crate::mem::transmute(self);
+                let o: [u128; 4] = [u128::max_value(); 4];
+                i == o
+            }
+        }
+        impl Any for m8x64 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: [u128; 4] = crate::mem::transmute(self);
+                let o: [u128; 4] = [0; 4];
+                i != o
+            }
+        }
+    };
+    (m16x32) => {
+        fallback_to_other_impl!(m16x32, m8x64);
+    };
+    (m32x16) => {
+        fallback_to_other_impl!(m32x16, m16x32);
+    };
+    (m64x8) => {
+        fallback_to_other_impl!(m64x8, m32x16);
+    };
+    (m128x4) => {
+        fallback_to_other_impl!(m128x4, m64x8);
+    };
+    // Masks with pointer-sized elements64
+    (msizex2) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex2, m64x2);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex2, m32x2);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+    (msizex4) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex4, m64x4);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex4, m32x4);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+    (msizex8) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex8, m64x8);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex8, m32x8);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+}
+
+macro_rules! recurse_half {
+    ($vid:ident, $vid_h:ident) => {
+        impl All for $vid {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                union U {
+                    halves: ($vid_h, $vid_h),
+                    vec: $vid,
+                }
+                let halves = U { vec: self }.halves;
+                halves.0.all() && halves.1.all()
+            }
+        }
+        impl Any for $vid {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                union U {
+                    halves: ($vid_h, $vid_h),
+                    vec: $vid,
+                }
+                let halves = U { vec: self }.halves;
+                halves.0.any() || halves.1.any()
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs
new file mode 100644
index 000000000..bcfb1a6e1
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs
@@ -0,0 +1,188 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets
+
+#[cfg(target_feature = "sse")]
+#[macro_use]
+mod sse;
+
+#[cfg(target_feature = "sse2")]
+#[macro_use]
+mod sse2;
+
+#[cfg(target_feature = "avx")]
+#[macro_use]
+mod avx;
+
+#[cfg(target_feature = "avx2")]
+#[macro_use]
+mod avx2;
+
+/// x86 64-bit m8x8 implementation
+macro_rules! x86_m8x8_impl {
+    ($id:ident) => {
+        fallback_impl!($id);
+    };
+}
+
+/// x86 128-bit m8x16 implementation
+macro_rules! x86_m8x16_impl {
+    ($id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "sse2")] {
+                x86_m8x16_sse2_impl!($id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 128-bit m32x4 implementation
+macro_rules! x86_m32x4_impl {
+    ($id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "sse")] {
+                x86_m32x4_sse_impl!($id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 128-bit m64x2 implementation
+macro_rules! x86_m64x2_impl {
+    ($id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "sse2")] {
+                x86_m64x2_sse2_impl!($id);
+            } else if #[cfg(target_feature = "sse")] {
+                x86_m32x4_sse_impl!($id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 256-bit m8x32 implementation
+macro_rules! x86_m8x32_impl {
+    ($id:ident, $half_id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                x86_m8x32_avx2_impl!($id);
+            } else if #[cfg(target_feature = "avx")] {
+                x86_m8x32_avx_impl!($id);
+            } else if #[cfg(target_feature = "sse2")] {
+                recurse_half!($id, $half_id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 256-bit m32x8 implementation
+macro_rules! x86_m32x8_impl {
+    ($id:ident, $half_id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "avx")] {
+                x86_m32x8_avx_impl!($id);
+            } else if #[cfg(target_feature = "sse")] {
+                recurse_half!($id, $half_id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 256-bit m64x4 implementation
+macro_rules! x86_m64x4_impl {
+    ($id:ident, $half_id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "avx")] {
+                x86_m64x4_avx_impl!($id);
+            } else if #[cfg(target_feature = "sse")] {
+                recurse_half!($id, $half_id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// Fallback implementation.
+macro_rules! x86_intr_impl {
+    ($id:ident) => {
+    impl All for $id {
+        #[inline]
+        unsafe fn all(self) -> bool {
+        use crate::llvm::simd_reduce_all;
+            simd_reduce_all(self.0)
+        }
+    }
+        impl Any for $id {
+            #[inline]
+            unsafe fn any(self) -> bool {
+            use crate::llvm::simd_reduce_any;
+                simd_reduce_any(self.0)
+            }
+        }
+    };
+}
+
+/// Mask reduction implementation for `x86` and `x86_64` targets
+macro_rules! impl_mask_reductions {
+    // 64-bit wide masks
+    (m8x8) => { x86_m8x8_impl!(m8x8); };
+    (m16x4) => { x86_m8x8_impl!(m16x4); };
+    (m32x2) => { x86_m8x8_impl!(m32x2); };
+    // 128-bit wide masks
+    (m8x16) => { x86_m8x16_impl!(m8x16); };
+    (m16x8) => { x86_m8x16_impl!(m16x8); };
+    (m32x4) => { x86_m32x4_impl!(m32x4); };
+    (m64x2) => { x86_m64x2_impl!(m64x2); };
+    (m128x1) => { x86_intr_impl!(m128x1); };
+    // 256-bit wide masks:
+    (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); };
+    (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); };
+    (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); };
+    (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); };
+    (m128x2) => { x86_intr_impl!(m128x2); };
+    (msizex2) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex2, m64x2);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex2, m32x2);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+    (msizex4) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex4, m64x4);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex4, m32x4);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+    (msizex8) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex8, m64x8);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex8, m32x8);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+
+    // Fallback to LLVM's default code-generation:
+    ($id:ident) => { fallback_impl!($id); };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
new file mode 100644
index 000000000..d18736fb0
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
@@ -0,0 +1,101 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX`
+
+/// `x86`/`x86_64` 256-bit `AVX` implementation
+/// FIXME: it might be faster here to do two `_mm_movmask_epi8`
+#[cfg(target_feature = "avx")]
+macro_rules! x86_m8x32_avx_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "avx")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_testc_si256;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_testc_si256;
+                _mm256_testc_si256(
+                    crate::mem::transmute(self),
+                    crate::mem::transmute($id::splat(true)),
+                ) != 0
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "avx")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_testz_si256;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_testz_si256;
+                _mm256_testz_si256(
+                    crate::mem::transmute(self),
+                    crate::mem::transmute(self),
+                ) == 0
+            }
+        }
+    };
+}
+
+/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation
+macro_rules! x86_m32x8_avx_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_ps;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_ps;
+                // _mm256_movemask_ps(a) creates a 8bit mask containing the
+                // most significant bit of each lane of `a`. If all bits are
+                // set, then all 8 lanes of the mask are true.
+                _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_ps;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_ps;
+
+                _mm256_movemask_ps(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
+
+/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation
+macro_rules! x86_m64x4_avx_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_pd;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_pd;
+                // _mm256_movemask_pd(a) creates a 4bit mask containing the
+                // most significant bit of each lane of `a`. If all bits are
+                // set, then all 4 lanes of the mask are true.
+                _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_pd;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_pd;
+
+                _mm256_movemask_pd(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx2.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx2.rs
new file mode 100644
index 000000000..d37d02342
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx2.rs
@@ -0,0 +1,35 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`.
+#![allow(unused)]
+
+/// x86/x86_64 256-bit m8x32 AVX2 implementation
+macro_rules! x86_m8x32_avx2_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse2")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_epi8;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_epi8;
+                // _mm256_movemask_epi8(a) creates a 32bit mask containing the
+                // most significant bit of each byte of `a`. If all
+                // bits are set, then all 32 lanes of the mask are
+                // true.
+                _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse2")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_epi8;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_epi8;
+
+                _mm256_movemask_epi8(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs
new file mode 100644
index 000000000..eb1ef7fac
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs
@@ -0,0 +1,36 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`.
+#![allow(unused)]
+
+/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation
+macro_rules! x86_m32x4_sse_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_ps;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_ps;
+                // _mm_movemask_ps(a) creates a 4bit mask containing the
+                // most significant bit of each lane of `a`. If all
+                // bits are set, then all 4 lanes of the mask are
+                // true.
+                _mm_movemask_ps(crate::mem::transmute(self))
+                    == 0b_1111_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_ps;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_ps;
+
+                _mm_movemask_ps(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs
new file mode 100644
index 000000000..a99c606f5
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs
@@ -0,0 +1,70 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`.
+#![allow(unused)]
+
+/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation
+macro_rules! x86_m64x2_sse2_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_pd;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_pd;
+                // _mm_movemask_pd(a) creates a 2bit mask containing the
+                // most significant bit of each lane of `a`. If all
+                // bits are set, then all 2 lanes of the mask are
+                // true.
+                _mm_movemask_pd(crate::mem::transmute(self))
+                    == 0b_11_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_pd;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_pd;
+
+                _mm_movemask_pd(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
+
+/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation
+macro_rules! x86_m8x16_sse2_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse2")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_epi8;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_epi8;
+                // _mm_movemask_epi8(a) creates a 16bit mask containing the
+                // most significant bit of each byte of `a`. If all
+                // bits are set, then all 16 lanes of the mask are
+                // true.
+                _mm_movemask_epi8(crate::mem::transmute(self))
+                    == i32::from(u16::max_value())
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse2")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_epi8;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_epi8;
+
+                _mm_movemask_epi8(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/shuffle.rs b/vendor/packed_simd_2/src/codegen/shuffle.rs
new file mode 100644
index 000000000..d92c9ee22
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/shuffle.rs
@@ -0,0 +1,150 @@
+//! Implementations of the `ShuffleResult` trait for the different numbers of
+//! lanes and vector element types.
+
+use crate::masks::*;
+use crate::sealed::{Shuffle, Seal};
+
+macro_rules! impl_shuffle {
+    ($array:ty, $base:ty, $out:ty) => {
+        impl Seal<$array> for $base {}
+        impl Shuffle<$array> for $base {
+            type Output = $out;
+        }
+    }
+}
+
+impl_shuffle! { [u32; 2], i8, crate::codegen::i8x2 }
+impl_shuffle! { [u32; 4], i8, crate::codegen::i8x4 }
+impl_shuffle! { [u32; 8], i8, crate::codegen::i8x8 }
+impl_shuffle! { [u32; 16], i8, crate::codegen::i8x16 }
+impl_shuffle! { [u32; 32], i8, crate::codegen::i8x32 }
+impl_shuffle! { [u32; 64], i8, crate::codegen::i8x64 }
+
+impl_shuffle! { [u32; 2], u8, crate::codegen::u8x2 }
+impl_shuffle! { [u32; 4], u8, crate::codegen::u8x4 }
+impl_shuffle! { [u32; 8], u8, crate::codegen::u8x8 }
+impl_shuffle! { [u32; 16], u8, crate::codegen::u8x16 }
+impl_shuffle! { [u32; 32], u8, crate::codegen::u8x32 }
+impl_shuffle! { [u32; 64], u8, crate::codegen::u8x64 }
+
+impl_shuffle! { [u32; 2], m8, crate::codegen::m8x2 }
+impl_shuffle! { [u32; 4], m8, crate::codegen::m8x4 }
+impl_shuffle! { [u32; 8], m8, crate::codegen::m8x8 }
+impl_shuffle! { [u32; 16], m8, crate::codegen::m8x16 }
+impl_shuffle! { [u32; 32], m8, crate::codegen::m8x32 }
+impl_shuffle! { [u32; 64], m8, crate::codegen::m8x64 }
+
+impl_shuffle! { [u32; 2], i16, crate::codegen::i16x2 }
+impl_shuffle! { [u32; 4], i16, crate::codegen::i16x4 }
+impl_shuffle! { [u32; 8], i16, crate::codegen::i16x8 }
+impl_shuffle! { [u32; 16], i16, crate::codegen::i16x16 }
+impl_shuffle! { [u32; 32], i16, crate::codegen::i16x32 }
+
+impl_shuffle! { [u32; 2], u16, crate::codegen::u16x2 }
+impl_shuffle! { [u32; 4], u16, crate::codegen::u16x4 }
+impl_shuffle! { [u32; 8], u16, crate::codegen::u16x8 }
+impl_shuffle! { [u32; 16], u16, crate::codegen::u16x16 }
+impl_shuffle! { [u32; 32], u16, crate::codegen::u16x32 }
+
+impl_shuffle! { [u32; 2], m16, crate::codegen::m16x2 }
+impl_shuffle! { [u32; 4], m16, crate::codegen::m16x4 }
+impl_shuffle! { [u32; 8], m16, crate::codegen::m16x8 }
+impl_shuffle! { [u32; 16], m16, crate::codegen::m16x16 }
+
+impl_shuffle! { [u32; 2], i32, crate::codegen::i32x2 }
+impl_shuffle! { [u32; 4], i32, crate::codegen::i32x4 }
+impl_shuffle! { [u32; 8], i32, crate::codegen::i32x8 }
+impl_shuffle! { [u32; 16], i32, crate::codegen::i32x16 }
+
+impl_shuffle! { [u32; 2], u32, crate::codegen::u32x2 }
+impl_shuffle! { [u32; 4], u32, crate::codegen::u32x4 }
+impl_shuffle! { [u32; 8], u32, crate::codegen::u32x8 }
+impl_shuffle! { [u32; 16], u32, crate::codegen::u32x16 }
+
+impl_shuffle! { [u32; 2], f32, crate::codegen::f32x2 }
+impl_shuffle! { [u32; 4], f32, crate::codegen::f32x4 }
+impl_shuffle! { [u32; 8], f32, crate::codegen::f32x8 }
+impl_shuffle! { [u32; 16], f32, crate::codegen::f32x16 }
+
+impl_shuffle! { [u32; 2], m32, crate::codegen::m32x2 }
+impl_shuffle! { [u32; 4], m32, crate::codegen::m32x4 }
+impl_shuffle! { [u32; 8], m32, crate::codegen::m32x8 }
+impl_shuffle! { [u32; 16], m32, crate::codegen::m32x16 }
+
+/* FIXME: 64-bit single element vector
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
+*/
+impl_shuffle! { [u32; 2], i64, crate::codegen::i64x2 }
+impl_shuffle! { [u32; 4], i64, crate::codegen::i64x4 }
+impl_shuffle! { [u32; 8], i64, crate::codegen::i64x8 }
+
+/* FIXME: 64-bit single element vector
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
+*/
+impl_shuffle! { [u32; 2], u64, crate::codegen::u64x2 }
+impl_shuffle! { [u32; 4], u64, crate::codegen::u64x4 }
+impl_shuffle! { [u32; 8], u64, crate::codegen::u64x8 }
+
+/* FIXME: 64-bit single element vector
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
+*/
+impl_shuffle! { [u32; 2], f64, crate::codegen::f64x2 }
+impl_shuffle! { [u32; 4], f64, crate::codegen::f64x4 }
+impl_shuffle! { [u32; 8], f64, crate::codegen::f64x8 }
+
+/* FIXME: 64-bit single element vector
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
+*/
+impl_shuffle! { [u32; 2], m64, crate::codegen::m64x2 }
+impl_shuffle! { [u32; 4], m64, crate::codegen::m64x4 }
+impl_shuffle! { [u32; 8], m64, crate::codegen::m64x8 }
+
+impl_shuffle! { [u32; 2], isize, crate::codegen::isizex2 }
+impl_shuffle! { [u32; 4], isize, crate::codegen::isizex4 }
+impl_shuffle! { [u32; 8], isize, crate::codegen::isizex8 }
+
+impl_shuffle! { [u32; 2], usize, crate::codegen::usizex2 }
+impl_shuffle! { [u32; 4], usize, crate::codegen::usizex4 }
+impl_shuffle! { [u32; 8], usize, crate::codegen::usizex8 }
+
+impl_shuffle! { [u32; 2], msize, crate::codegen::msizex2 }
+impl_shuffle! { [u32; 4], msize, crate::codegen::msizex4 }
+impl_shuffle! { [u32; 8], msize, crate::codegen::msizex8 }
+
+impl<T> Seal<[u32; 2]> for *const T {}
+impl<T> Shuffle<[u32; 2]> for *const T {
+    type Output = crate::codegen::cptrx2<T>;
+}
+impl<T> Seal<[u32; 4]> for *const T {}
+impl<T> Shuffle<[u32; 4]> for *const T {
+    type Output = crate::codegen::cptrx4<T>;
+}
+impl<T> Seal<[u32; 8]> for *const T {}
+impl<T> Shuffle<[u32; 8]> for *const T {
+    type Output = crate::codegen::cptrx8<T>;
+}
+
+impl<T> Seal<[u32; 2]> for *mut T {}
+impl<T> Shuffle<[u32; 2]> for *mut T {
+    type Output = crate::codegen::mptrx2<T>;
+}
+impl<T> Seal<[u32; 4]> for *mut T {}
+impl<T> Shuffle<[u32; 4]> for *mut T {
+    type Output = crate::codegen::mptrx4<T>;
+}
+impl<T> Seal<[u32; 8]> for *mut T {}
+impl<T> Shuffle<[u32; 8]> for *mut T {
+    type Output = crate::codegen::mptrx8<T>;
+}
+
+impl_shuffle! { [u32; 1], i128, crate::codegen::i128x1 }
+impl_shuffle! { [u32; 2], i128, crate::codegen::i128x2 }
+impl_shuffle! { [u32; 4], i128, crate::codegen::i128x4 }
+
+impl_shuffle! { [u32; 1], u128, crate::codegen::u128x1 }
+impl_shuffle! { [u32; 2], u128, crate::codegen::u128x2 }
+impl_shuffle! { [u32; 4], u128, crate::codegen::u128x4 }
+
+impl_shuffle! { [u32; 1], m128, crate::codegen::m128x1 }
+impl_shuffle! { [u32; 2], m128, crate::codegen::m128x2 }
+impl_shuffle! { [u32; 4], m128, crate::codegen::m128x4 }
diff --git a/vendor/packed_simd_2/src/codegen/shuffle1_dyn.rs b/vendor/packed_simd_2/src/codegen/shuffle1_dyn.rs
new file mode 100644
index 000000000..8d9577b26
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/shuffle1_dyn.rs
@@ -0,0 +1,411 @@
+//! Shuffle vector lanes with run-time indices.
+
+use crate::*;
+
+pub trait Shuffle1Dyn {
+    type Indices;
+    fn shuffle1_dyn(self, _: Self::Indices) -> Self;
+}
+
+// Fallback implementation
+macro_rules! impl_fallback {
+    ($id:ident) => {
+        impl Shuffle1Dyn for $id {
+            type Indices = Self;
+            #[inline]
+            fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                let mut result = Self::splat(0);
+                for i in 0..$id::lanes() {
+                    result = result
+                        .replace(i, self.extract(indices.extract(i) as usize));
+                }
+                result
+            }
+        }
+    };
+}
+
+macro_rules! impl_shuffle1_dyn {
+    (u8x8) => {
+        cfg_if! {
+            if #[cfg(all(
+                any(
+                    all(target_arch = "aarch64", target_feature = "neon"),
+                    all(target_arch = "arm", target_feature = "v7",
+                        target_feature = "neon")
+                ),
+                any(feature = "core_arch", libcore_neon)
+            )
+            )] {
+                impl Shuffle1Dyn for u8x8 {
+                    type Indices = Self;
+                    #[inline]
+                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                        #[cfg(target_arch = "aarch64")]
+                        use crate::arch::aarch64::vtbl1_u8;
+                        #[cfg(target_arch = "arm")]
+                        use crate::arch::arm::vtbl1_u8;
+
+                        // This is safe because the binary is compiled with
+                        // neon enabled at compile-time and can therefore only
+                        // run on CPUs that have it enabled.
+                        unsafe {
+                            Simd(mem::transmute(
+                                vtbl1_u8(mem::transmute(self.0),
+                                        crate::mem::transmute(indices.0))
+                            ))
+                        }
+                    }
+                }
+            } else {
+                impl_fallback!(u8x8);
+            }
+        }
+    };
+    (u8x16) => {
+        cfg_if! {
+            if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
+                         target_feature = "ssse3"))] {
+                impl Shuffle1Dyn for u8x16 {
+                    type Indices = Self;
+                    #[inline]
+                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                        #[cfg(target_arch = "x86")]
+                        use crate::arch::x86::_mm_shuffle_epi8;
+                        #[cfg(target_arch = "x86_64")]
+                        use crate::arch::x86_64::_mm_shuffle_epi8;
+                        // This is safe because the binary is compiled with
+                        // ssse3 enabled at compile-time and can therefore only
+                        // run on CPUs that have it enabled.
+                        unsafe {
+                            Simd(mem::transmute(
+                                _mm_shuffle_epi8(mem::transmute(self.0),
+                                                crate::mem::transmute(indices))
+                            ))
+                        }
+                    }
+                }
+            } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon",
+                                any(feature = "core_arch", libcore_neon)))] {
+                impl Shuffle1Dyn for u8x16 {
+                    type Indices = Self;
+                    #[inline]
+                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                        use crate::arch::aarch64::vqtbl1q_u8;
+
+                        // This is safe because the binary is compiled with
+                        // neon enabled at compile-time and can therefore only
+                        // run on CPUs that have it enabled.
+                        unsafe {
+                            Simd(mem::transmute(
+                                vqtbl1q_u8(mem::transmute(self.0),
+                                          crate::mem::transmute(indices.0))
+                            ))
+                        }
+                    }
+                }
+            } else if #[cfg(all(target_arch = "arm", target_feature = "v7",
+                                target_feature = "neon",
+                                any(feature = "core_arch", libcore_neon)))] {
+                impl Shuffle1Dyn for u8x16 {
+                    type Indices = Self;
+                    #[inline]
+                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                        use crate::arch::arm::vtbl2_u8;
+
+                        // This is safe because the binary is compiled with
+                        // neon enabled at compile-time and can therefore only
+                        // run on CPUs that have it enabled.
+                        unsafe {
+                            union U {
+                                j: u8x16,
+                                s: (u8x8, u8x8),
+                            }
+
+                            let (i0, i1) = U { j: y }.s;
+
+                            let r0 = vtbl2_u8(
+                                mem::transmute(x),
+                                crate::mem::transmute(i0)
+                            );
+                            let r1 = vtbl2_u8(
+                                mem::transmute(x),
+                                crate::mem::transmute(i1)
+                            );
+
+                            let r = U { s: (r0, r1) }.j;
+
+                            Simd(mem::transmute(r))
+                        }
+                    }
+                }
+            } else {
+                impl_fallback!(u8x16);
+            }
+        }
+    };
+    (u16x8) => {
+        impl Shuffle1Dyn for u16x8 {
+            type Indices = Self;
+            #[inline]
+            fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                let indices: u8x8 = (indices * 2).cast();
+                let indices: u8x16 = shuffle!(
+                    indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7]
+                );
+                let v = u8x16::new(
+                    0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+                );
+                let indices = indices + v;
+                unsafe {
+                    let s: u8x16 =crate::mem::transmute(self);
+                   crate::mem::transmute(s.shuffle1_dyn(indices))
+                }
+            }
+        }
+    };
+    (u32x4) => {
+        cfg_if! {
+            if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
+                         target_feature = "avx"))] {
+                impl Shuffle1Dyn for u32x4 {
+                    type Indices = Self;
+                    #[inline]
+                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                        #[cfg(target_arch = "x86")]
+                        use crate::arch::x86::{_mm_permutevar_ps};
+                        #[cfg(target_arch = "x86_64")]
+                        use crate::arch::x86_64::{_mm_permutevar_ps};
+
+                        unsafe {
+                            crate::mem::transmute(
+                                _mm_permutevar_ps(
+                                    crate::mem::transmute(self.0),
+                                    crate::mem::transmute(indices.0)
+                                )
+                            )
+                        }
+                    }
+                }
+            } else {
+                impl Shuffle1Dyn for u32x4 {
+                    type Indices = Self;
+                    #[inline]
+                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                        let indices: u8x4 = (indices * 4).cast();
+                        let indices: u8x16 = shuffle!(
+                            indices,
+                            [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
+                        );
+                        let v = u8x16::new(
+                            0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+                        );
+                        let indices = indices + v;
+                        unsafe {
+                            let s: u8x16 =crate::mem::transmute(self);
+                           crate::mem::transmute(s.shuffle1_dyn(indices))
+                        }
+                    }
+                }
+            }
+        }
+    };
+    (u64x2) => {
+        cfg_if! {
+            if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
+                         target_feature = "avx"))] {
+                impl Shuffle1Dyn for u64x2 {
+                    type Indices = Self;
+                    #[inline]
+                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                        #[cfg(target_arch = "x86")]
+                        use crate::arch::x86::{_mm_permutevar_pd};
+                        #[cfg(target_arch = "x86_64")]
+                        use crate::arch::x86_64::{_mm_permutevar_pd};
+                        // _mm_permutevar_pd uses the _second_ bit of each
+                        // element to perform the selection, that is: 0b00 => 0,
+                        // 0b10 => 1:
+                        let indices = indices << 1;
+                        unsafe {
+                            crate::mem::transmute(
+                                _mm_permutevar_pd(
+                                    crate::mem::transmute(self),
+                                    crate::mem::transmute(indices)
+                                )
+                            )
+                        }
+                    }
+                }
+            } else {
+                impl Shuffle1Dyn for u64x2 {
+                    type Indices = Self;
+                    #[inline]
+                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                        let indices: u8x2 = (indices * 8).cast();
+                        let indices: u8x16 = shuffle!(
+                            indices,
+                            [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
+                        );
+                        let v = u8x16::new(
+                            0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
+                        );
+                        let indices = indices + v;
+                        unsafe {
+                            let s: u8x16 =crate::mem::transmute(self);
+                           crate::mem::transmute(s.shuffle1_dyn(indices))
+                        }
+                    }
+                }
+            }
+        }
+    };
+    (u128x1) => {
+        impl Shuffle1Dyn for u128x1 {
+            type Indices = Self;
+            #[inline]
+            fn shuffle1_dyn(self, _indices: Self::Indices) -> Self {
+                self
+            }
+        }
+    };
+    ($id:ident) => { impl_fallback!($id); }
+}
+
+impl_shuffle1_dyn!(u8x2);
+impl_shuffle1_dyn!(u8x4);
+impl_shuffle1_dyn!(u8x8);
+impl_shuffle1_dyn!(u8x16);
+impl_shuffle1_dyn!(u8x32);
+impl_shuffle1_dyn!(u8x64);
+
+impl_shuffle1_dyn!(u16x2);
+impl_shuffle1_dyn!(u16x4);
+impl_shuffle1_dyn!(u16x8);
+impl_shuffle1_dyn!(u16x16);
+impl_shuffle1_dyn!(u16x32);
+
+impl_shuffle1_dyn!(u32x2);
+impl_shuffle1_dyn!(u32x4);
+impl_shuffle1_dyn!(u32x8);
+impl_shuffle1_dyn!(u32x16);
+
+impl_shuffle1_dyn!(u64x2);
+impl_shuffle1_dyn!(u64x4);
+impl_shuffle1_dyn!(u64x8);
+
+impl_shuffle1_dyn!(usizex2);
+impl_shuffle1_dyn!(usizex4);
+impl_shuffle1_dyn!(usizex8);
+
+impl_shuffle1_dyn!(u128x1);
+impl_shuffle1_dyn!(u128x2);
+impl_shuffle1_dyn!(u128x4);
+
+// Implementation for non-unsigned vector types
+macro_rules! impl_shuffle1_dyn_non_u {
+    ($id:ident, $uid:ident) => {
+        impl Shuffle1Dyn for $id {
+            type Indices = $uid;
+            #[inline]
+            fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                unsafe {
+                    let u: $uid = crate::mem::transmute(self);
+                    crate::mem::transmute(u.shuffle1_dyn(indices))
+                }
+            }
+        }
+    };
+}
+
+impl_shuffle1_dyn_non_u!(i8x2, u8x2);
+impl_shuffle1_dyn_non_u!(i8x4, u8x4);
+impl_shuffle1_dyn_non_u!(i8x8, u8x8);
+impl_shuffle1_dyn_non_u!(i8x16, u8x16);
+impl_shuffle1_dyn_non_u!(i8x32, u8x32);
+impl_shuffle1_dyn_non_u!(i8x64, u8x64);
+
+impl_shuffle1_dyn_non_u!(i16x2, u16x2);
+impl_shuffle1_dyn_non_u!(i16x4, u16x4);
+impl_shuffle1_dyn_non_u!(i16x8, u16x8);
+impl_shuffle1_dyn_non_u!(i16x16, u16x16);
+impl_shuffle1_dyn_non_u!(i16x32, u16x32);
+
+impl_shuffle1_dyn_non_u!(i32x2, u32x2);
+impl_shuffle1_dyn_non_u!(i32x4, u32x4);
+impl_shuffle1_dyn_non_u!(i32x8, u32x8);
+impl_shuffle1_dyn_non_u!(i32x16, u32x16);
+
+impl_shuffle1_dyn_non_u!(i64x2, u64x2);
+impl_shuffle1_dyn_non_u!(i64x4, u64x4);
+impl_shuffle1_dyn_non_u!(i64x8, u64x8);
+
+impl_shuffle1_dyn_non_u!(isizex2, usizex2);
+impl_shuffle1_dyn_non_u!(isizex4, usizex4);
+impl_shuffle1_dyn_non_u!(isizex8, usizex8);
+
+impl_shuffle1_dyn_non_u!(i128x1, u128x1);
+impl_shuffle1_dyn_non_u!(i128x2, u128x2);
+impl_shuffle1_dyn_non_u!(i128x4, u128x4);
+
+impl_shuffle1_dyn_non_u!(m8x2, u8x2);
+impl_shuffle1_dyn_non_u!(m8x4, u8x4);
+impl_shuffle1_dyn_non_u!(m8x8, u8x8);
+impl_shuffle1_dyn_non_u!(m8x16, u8x16);
+impl_shuffle1_dyn_non_u!(m8x32, u8x32);
+impl_shuffle1_dyn_non_u!(m8x64, u8x64);
+
+impl_shuffle1_dyn_non_u!(m16x2, u16x2);
+impl_shuffle1_dyn_non_u!(m16x4, u16x4);
+impl_shuffle1_dyn_non_u!(m16x8, u16x8);
+impl_shuffle1_dyn_non_u!(m16x16, u16x16);
+impl_shuffle1_dyn_non_u!(m16x32, u16x32);
+
+impl_shuffle1_dyn_non_u!(m32x2, u32x2);
+impl_shuffle1_dyn_non_u!(m32x4, u32x4);
+impl_shuffle1_dyn_non_u!(m32x8, u32x8);
+impl_shuffle1_dyn_non_u!(m32x16, u32x16);
+
+impl_shuffle1_dyn_non_u!(m64x2, u64x2);
+impl_shuffle1_dyn_non_u!(m64x4, u64x4);
+impl_shuffle1_dyn_non_u!(m64x8, u64x8);
+
+impl_shuffle1_dyn_non_u!(msizex2, usizex2);
+impl_shuffle1_dyn_non_u!(msizex4, usizex4);
+impl_shuffle1_dyn_non_u!(msizex8, usizex8);
+
+impl_shuffle1_dyn_non_u!(m128x1, u128x1);
+impl_shuffle1_dyn_non_u!(m128x2, u128x2);
+impl_shuffle1_dyn_non_u!(m128x4, u128x4);
+
+impl_shuffle1_dyn_non_u!(f32x2, u32x2);
+impl_shuffle1_dyn_non_u!(f32x4, u32x4);
+impl_shuffle1_dyn_non_u!(f32x8, u32x8);
+impl_shuffle1_dyn_non_u!(f32x16, u32x16);
+
+impl_shuffle1_dyn_non_u!(f64x2, u64x2);
+impl_shuffle1_dyn_non_u!(f64x4, u64x4);
+impl_shuffle1_dyn_non_u!(f64x8, u64x8);
+
+// Implementation for non-unsigned vector types
+macro_rules! impl_shuffle1_dyn_ptr {
+    ($id:ident, $uid:ident) => {
+        impl<T> Shuffle1Dyn for $id<T> {
+            type Indices = $uid;
+            #[inline]
+            fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+                unsafe {
+                    let u: $uid = crate::mem::transmute(self);
+                    crate::mem::transmute(u.shuffle1_dyn(indices))
+                }
+            }
+        }
+    };
+}
+
+impl_shuffle1_dyn_ptr!(cptrx2, usizex2);
+impl_shuffle1_dyn_ptr!(cptrx4, usizex4);
+impl_shuffle1_dyn_ptr!(cptrx8, usizex8);
+
+impl_shuffle1_dyn_ptr!(mptrx2, usizex2);
+impl_shuffle1_dyn_ptr!(mptrx4, usizex4);
+impl_shuffle1_dyn_ptr!(mptrx8, usizex8);
diff --git a/vendor/packed_simd_2/src/codegen/swap_bytes.rs b/vendor/packed_simd_2/src/codegen/swap_bytes.rs
new file mode 100644
index 000000000..b435fb5da
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/swap_bytes.rs
@@ -0,0 +1,189 @@
+//! Horizontal swap bytes reductions.
+
+// FIXME: investigate using `llvm.bswap`
+// https://github.com/rust-lang-nursery/packed_simd/issues/19
+
+use crate::*;
+
+crate trait SwapBytes {
+    fn swap_bytes(self) -> Self;
+}
+
+macro_rules! impl_swap_bytes {
+    (v16: $($id:ident,)+) => {
+        $(
+            impl SwapBytes for $id {
+                #[inline]
+                fn swap_bytes(self) -> Self {
+                    unsafe { shuffle!(self, [1, 0]) }
+                }
+            }
+        )+
+    };
+    (v32: $($id:ident,)+) => {
+        $(
+            impl SwapBytes for $id {
+                #[inline]
+                #[allow(clippy::useless_transmute)]
+                fn swap_bytes(self) -> Self {
+                    unsafe {
+                        let bytes: u8x4 = crate::mem::transmute(self);
+                        let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]);
+                        crate::mem::transmute(result)
+                    }
+                }
+            }
+        )+
+    };
+    (v64: $($id:ident,)+) => {
+        $(
+            impl SwapBytes for $id {
+                #[inline]
+                #[allow(clippy::useless_transmute)]
+                fn swap_bytes(self) -> Self {
+                    unsafe {
+                        let bytes: u8x8 = crate::mem::transmute(self);
+                        let result: u8x8 = shuffle!(
+                            bytes, [7, 6, 5, 4, 3, 2, 1, 0]
+                        );
+                        crate::mem::transmute(result)
+                    }
+                }
+            }
+        )+
+    };
+    (v128: $($id:ident,)+) => {
+        $(
+            impl SwapBytes for $id {
+                #[inline]
+                #[allow(clippy::useless_transmute)]
+                fn swap_bytes(self) -> Self {
+                    unsafe {
+                        let bytes: u8x16 = crate::mem::transmute(self);
+                        let result: u8x16 = shuffle!(bytes, [
+                            15, 14, 13, 12, 11, 10, 9, 8,
+                            7, 6, 5, 4, 3, 2, 1, 0
+                        ]);
+                        crate::mem::transmute(result)
+                    }
+                }
+            }
+        )+
+    };
+    (v256: $($id:ident,)+) => {
+        $(
+            impl SwapBytes for $id {
+                #[inline]
+                #[allow(clippy::useless_transmute)]
+                fn swap_bytes(self) -> Self {
+                    unsafe {
+                        let bytes: u8x32 = crate::mem::transmute(self);
+                        let result: u8x32 = shuffle!(bytes, [
+                            31, 30, 29, 28, 27, 26, 25, 24,
+                            23, 22, 21, 20, 19, 18, 17, 16,
+                            15, 14, 13, 12, 11, 10, 9,  8,
+                            7,  6,  5,  4,  3,  2,  1,  0
+                        ]);
+                        crate::mem::transmute(result)
+                    }
+                }
+            }
+        )+
+    };
+    (v512: $($id:ident,)+) => {
+        $(
+            impl SwapBytes for $id {
+                #[inline]
+                #[allow(clippy::useless_transmute)]
+                fn swap_bytes(self) -> Self {
+                    unsafe {
+                        let bytes: u8x64 = crate::mem::transmute(self);
+                        let result: u8x64 = shuffle!(bytes, [
+                            63, 62, 61, 60, 59, 58, 57, 56,
+                            55, 54, 53, 52, 51, 50, 49, 48,
+                            47, 46, 45, 44, 43, 42, 41, 40,
+                            39, 38, 37, 36, 35, 34, 33, 32,
+                            31, 30, 29, 28, 27, 26, 25, 24,
+                            23, 22, 21, 20, 19, 18, 17, 16,
+                            15, 14, 13, 12, 11, 10, 9,  8,
+                            7,  6,  5,  4,  3,  2,  1,  0
+                        ]);
+                        crate::mem::transmute(result)
+                    }
+                }
+            }
+        )+
+    };
+}
+
+impl_swap_bytes!(v16: u8x2, i8x2,);
+impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,);
+// FIXME: 64-bit single element vector
+impl_swap_bytes!(
+    v64: u8x8,
+    i8x8,
+    u16x4,
+    i16x4,
+    u32x2,
+    i32x2, /* u64x1, i64x1, */
+);
+
+impl_swap_bytes!(
+    v128: u8x16,
+    i8x16,
+    u16x8,
+    i16x8,
+    u32x4,
+    i32x4,
+    u64x2,
+    i64x2,
+    u128x1,
+    i128x1,
+);
+impl_swap_bytes!(
+    v256: u8x32,
+    i8x32,
+    u16x16,
+    i16x16,
+    u32x8,
+    i32x8,
+    u64x4,
+    i64x4,
+    u128x2,
+    i128x2,
+);
+
+impl_swap_bytes!(
+    v512: u8x64,
+    i8x64,
+    u16x32,
+    i16x32,
+    u32x16,
+    i32x16,
+    u64x8,
+    i64x8,
+    u128x4,
+    i128x4,
+);
+
+cfg_if! {
+    if #[cfg(target_pointer_width = "8")] {
+        impl_swap_bytes!(v16: isizex2, usizex2,);
+        impl_swap_bytes!(v32: isizex4, usizex4,);
+        impl_swap_bytes!(v64: isizex8, usizex8,);
+    } else if #[cfg(target_pointer_width = "16")] {
+        impl_swap_bytes!(v32: isizex2, usizex2,);
+        impl_swap_bytes!(v64: isizex4, usizex4,);
+        impl_swap_bytes!(v128: isizex8, usizex8,);
+    } else if #[cfg(target_pointer_width = "32")] {
+        impl_swap_bytes!(v64: isizex2, usizex2,);
+        impl_swap_bytes!(v128: isizex4, usizex4,);
+        impl_swap_bytes!(v256: isizex8, usizex8,);
+    } else if #[cfg(target_pointer_width = "64")] {
+        impl_swap_bytes!(v128: isizex2, usizex2,);
+        impl_swap_bytes!(v256: isizex4, usizex4,);
+        impl_swap_bytes!(v512: isizex8, usizex8,);
+    } else {
+        compile_error!("unsupported target_pointer_width");
+    }
+}
diff --git a/vendor/packed_simd_2/src/codegen/v128.rs b/vendor/packed_simd_2/src/codegen/v128.rs
new file mode 100644
index 000000000..9506424fa
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/v128.rs
@@ -0,0 +1,46 @@
+//! Internal 128-bit wide vector types
+
+use crate::masks::*;
+
+#[rustfmt::skip]
+impl_simd_array!(
+    [i8; 16]: i8x16 |
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [u8; 16]: u8x16 |
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [m8; 16]: m8x16 |
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8
+);
+
+impl_simd_array!([i16; 8]: i16x8 | i16, i16, i16, i16, i16, i16, i16, i16);
+impl_simd_array!([u16; 8]: u16x8 | u16, u16, u16, u16, u16, u16, u16, u16);
+impl_simd_array!([m16; 8]: m16x8 | i16, i16, i16, i16, i16, i16, i16, i16);
+
+impl_simd_array!([i32; 4]: i32x4 | i32, i32, i32, i32);
+impl_simd_array!([u32; 4]: u32x4 | u32, u32, u32, u32);
+impl_simd_array!([f32; 4]: f32x4 | f32, f32, f32, f32);
+impl_simd_array!([m32; 4]: m32x4 | i32, i32, i32, i32);
+
+impl_simd_array!([i64; 2]: i64x2 | i64, i64);
+impl_simd_array!([u64; 2]: u64x2 | u64, u64);
+impl_simd_array!([f64; 2]: f64x2 | f64, f64);
+impl_simd_array!([m64; 2]: m64x2 | i64, i64);
+
+impl_simd_array!([i128; 1]: i128x1 | i128);
+impl_simd_array!([u128; 1]: u128x1 | u128);
+impl_simd_array!([m128; 1]: m128x1 | i128);
diff --git a/vendor/packed_simd_2/src/codegen/v16.rs b/vendor/packed_simd_2/src/codegen/v16.rs
new file mode 100644
index 000000000..4d55a6d89
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/v16.rs
@@ -0,0 +1,7 @@
+//! Internal 16-bit wide vector types
+
+use crate::masks::*;
+
+impl_simd_array!([i8; 2]: i8x2 | i8, i8);
+impl_simd_array!([u8; 2]: u8x2 | u8, u8);
+impl_simd_array!([m8; 2]: m8x2 | i8, i8);
diff --git a/vendor/packed_simd_2/src/codegen/v256.rs b/vendor/packed_simd_2/src/codegen/v256.rs
new file mode 100644
index 000000000..5ca4759f0
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/v256.rs
@@ -0,0 +1,78 @@
+//! Internal 256-bit wide vector types
+
+use crate::masks::*;
+
+#[rustfmt::skip]
+impl_simd_array!(
+    [i8; 32]: i8x32 |
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [u8; 32]: u8x32 |
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [m8; 32]: m8x32 |
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [i16; 16]: i16x16 |
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [u16; 16]: u16x16 |
+    u16, u16, u16, u16,
+    u16, u16, u16, u16,
+    u16, u16, u16, u16,
+    u16, u16, u16, u16
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [m16; 16]: m16x16 |
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16
+);
+
+impl_simd_array!([i32; 8]: i32x8 | i32, i32, i32, i32, i32, i32, i32, i32);
+impl_simd_array!([u32; 8]: u32x8 | u32, u32, u32, u32, u32, u32, u32, u32);
+impl_simd_array!([f32; 8]: f32x8 | f32, f32, f32, f32, f32, f32, f32, f32);
+impl_simd_array!([m32; 8]: m32x8 | i32, i32, i32, i32, i32, i32, i32, i32);
+
+impl_simd_array!([i64; 4]: i64x4 | i64, i64, i64, i64);
+impl_simd_array!([u64; 4]: u64x4 | u64, u64, u64, u64);
+impl_simd_array!([f64; 4]: f64x4 | f64, f64, f64, f64);
+impl_simd_array!([m64; 4]: m64x4 | i64, i64, i64, i64);
+
+impl_simd_array!([i128; 2]: i128x2 | i128, i128);
+impl_simd_array!([u128; 2]: u128x2 | u128, u128);
+impl_simd_array!([m128; 2]: m128x2 | i128, i128);
diff --git a/vendor/packed_simd_2/src/codegen/v32.rs b/vendor/packed_simd_2/src/codegen/v32.rs
new file mode 100644
index 000000000..ae1dabd00
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/v32.rs
@@ -0,0 +1,11 @@
+//! Internal 32-bit wide vector types
+
+use crate::masks::*;
+
+impl_simd_array!([i8; 4]: i8x4 | i8, i8, i8, i8);
+impl_simd_array!([u8; 4]: u8x4 | u8, u8, u8, u8);
+impl_simd_array!([m8; 4]: m8x4 | i8, i8, i8, i8);
+
+impl_simd_array!([i16; 2]: i16x2 | i16, i16);
+impl_simd_array!([u16; 2]: u16x2 | u16, u16);
+impl_simd_array!([m16; 2]: m16x2 | i16, i16);
diff --git a/vendor/packed_simd_2/src/codegen/v512.rs b/vendor/packed_simd_2/src/codegen/v512.rs
new file mode 100644
index 000000000..bf9511034
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/v512.rs
@@ -0,0 +1,145 @@
+//! Internal 512-bit wide vector types
+
+use crate::masks::*;
+
+#[rustfmt::skip]
+impl_simd_array!(
+    [i8; 64]: i8x64 |
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [u8; 64]: u8x64 |
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8,
+    u8, u8, u8, u8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [m8; 64]: m8x64 |
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8,
+    i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [i16; 32]: i16x32 |
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [u16; 32]: u16x32 |
+    u16, u16, u16, u16,
+    u16, u16, u16, u16,
+    u16, u16, u16, u16,
+    u16, u16, u16, u16,
+    u16, u16, u16, u16,
+    u16, u16, u16, u16,
+    u16, u16, u16, u16,
+    u16, u16, u16, u16
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [m16; 32]: m16x32 |
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16,
+    i16, i16, i16, i16
+);
+
+#[rustfmt::skip]
+impl_simd_array!(
+    [i32; 16]: i32x16 |
+    i32, i32, i32, i32,
+    i32, i32, i32, i32,
+    i32, i32, i32, i32,
+    i32, i32, i32, i32
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [u32; 16]: u32x16 |
+    u32, u32, u32, u32,
+    u32, u32, u32, u32,
+    u32, u32, u32, u32,
+    u32, u32, u32, u32
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [f32; 16]: f32x16 |
+    f32, f32, f32, f32,
+    f32, f32, f32, f32,
+    f32, f32, f32, f32,
+    f32, f32, f32, f32
+);
+#[rustfmt::skip]
+impl_simd_array!(
+    [m32; 16]: m32x16 |
+    i32, i32, i32, i32,
+    i32, i32, i32, i32,
+    i32, i32, i32, i32,
+    i32, i32, i32, i32
+);
+
+impl_simd_array!([i64; 8]: i64x8 | i64, i64, i64, i64, i64, i64, i64, i64);
+impl_simd_array!([u64; 8]: u64x8 | u64, u64, u64, u64, u64, u64, u64, u64);
+impl_simd_array!([f64; 8]: f64x8 | f64, f64, f64, f64, f64, f64, f64, f64);
+impl_simd_array!([m64; 8]: m64x8 | i64, i64, i64, i64, i64, i64, i64, i64);
+
+impl_simd_array!([i128; 4]: i128x4 | i128, i128, i128, i128);
+impl_simd_array!([u128; 4]: u128x4 | u128, u128, u128, u128);
+impl_simd_array!([m128; 4]: m128x4 | i128, i128, i128, i128);
diff --git a/vendor/packed_simd_2/src/codegen/v64.rs b/vendor/packed_simd_2/src/codegen/v64.rs
new file mode 100644
index 000000000..3cfb67c1a
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/v64.rs
@@ -0,0 +1,21 @@
+//! Internal 64-bit wide vector types
+
+use crate::masks::*;
+
+impl_simd_array!([i8; 8]: i8x8 | i8, i8, i8, i8, i8, i8, i8, i8);
+impl_simd_array!([u8; 8]: u8x8 | u8, u8, u8, u8, u8, u8, u8, u8);
+impl_simd_array!([m8; 8]: m8x8 | i8, i8, i8, i8, i8, i8, i8, i8);
+
+impl_simd_array!([i16; 4]: i16x4 | i16, i16, i16, i16);
+impl_simd_array!([u16; 4]: u16x4 | u16, u16, u16, u16);
+impl_simd_array!([m16; 4]: m16x4 | i16, i16, i16, i16);
+
+impl_simd_array!([i32; 2]: i32x2 | i32, i32);
+impl_simd_array!([u32; 2]: u32x2 | u32, u32);
+impl_simd_array!([f32; 2]: f32x2 | f32, f32);
+impl_simd_array!([m32; 2]: m32x2 | i32, i32);
+
+impl_simd_array!([i64; 1]: i64x1 | i64);
+impl_simd_array!([u64; 1]: u64x1 | u64);
+impl_simd_array!([f64; 1]: f64x1 | f64);
+impl_simd_array!([m64; 1]: m64x1 | i64);
diff --git a/vendor/packed_simd_2/src/codegen/vPtr.rs b/vendor/packed_simd_2/src/codegen/vPtr.rs
new file mode 100644
index 000000000..cf4765538
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/vPtr.rs
@@ -0,0 +1,35 @@
+//! Pointer vector types
+
+macro_rules! impl_simd_ptr {
+    ([$ptr_ty:ty; $elem_count:expr]: $tuple_id:ident | $ty:ident
+     | $($tys:ty),*) => {
+        #[derive(Copy, Clone)]
+        #[repr(simd)]
+        pub struct $tuple_id<$ty>($(crate $tys),*);
+        //^^^^^^^ leaked through SimdArray
+
+        impl<$ty> crate::sealed::Seal for [$ptr_ty; $elem_count] {}
+        impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] {
+            type Tuple = $tuple_id<$ptr_ty>;
+            type T = $ptr_ty;
+            const N: usize = $elem_count;
+            type NT = [u32; $elem_count];
+        }
+
+        impl<$ty> crate::sealed::Seal for $tuple_id<$ptr_ty> {}
+        impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> {
+            type Element = $ptr_ty;
+            const LANES: usize = $elem_count;
+            type LanesType = [u32; $elem_count];
+        }
+
+    }
+}
+
+impl_simd_ptr!([*const T; 2]: cptrx2 | T | T, T);
+impl_simd_ptr!([*const T; 4]: cptrx4 | T | T, T, T, T);
+impl_simd_ptr!([*const T; 8]: cptrx8 | T | T, T, T, T, T, T, T, T);
+
+impl_simd_ptr!([*mut T; 2]: mptrx2 | T | T, T);
+impl_simd_ptr!([*mut T; 4]: mptrx4 | T | T, T, T, T);
+impl_simd_ptr!([*mut T; 8]: mptrx8 | T | T, T, T, T, T, T, T, T);
diff --git a/vendor/packed_simd_2/src/codegen/vSize.rs b/vendor/packed_simd_2/src/codegen/vSize.rs
new file mode 100644
index 000000000..3911b2134
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/vSize.rs
@@ -0,0 +1,43 @@
+//! Vector types with pointer-sized elements
+
+use crate::codegen::pointer_sized_int::{isize_, usize_};
+use crate::masks::*;
+
+impl_simd_array!([isize; 2]: isizex2 | isize_, isize_);
+impl_simd_array!([usize; 2]: usizex2 | usize_, usize_);
+impl_simd_array!([msize; 2]: msizex2 | isize_, isize_);
+
+impl_simd_array!([isize; 4]: isizex4 | isize_, isize_, isize_, isize_);
+impl_simd_array!([usize; 4]: usizex4 | usize_, usize_, usize_, usize_);
+impl_simd_array!([msize; 4]: msizex4 | isize_, isize_, isize_, isize_);
+
+impl_simd_array!(
+    [isize; 8]: isizex8 | isize_,
+    isize_,
+    isize_,
+    isize_,
+    isize_,
+    isize_,
+    isize_,
+    isize_
+);
+impl_simd_array!(
+    [usize; 8]: usizex8 | usize_,
+    usize_,
+    usize_,
+    usize_,
+    usize_,
+    usize_,
+    usize_,
+    usize_
+);
+impl_simd_array!(
+    [msize; 8]: msizex8 | isize_,
+    isize_,
+    isize_,
+    isize_,
+    isize_,
+    isize_,
+    isize_,
+    isize_
+);
diff --git a/vendor/packed_simd_2/src/lib.rs b/vendor/packed_simd_2/src/lib.rs
new file mode 100644
index 000000000..4d12c9cd9
--- /dev/null
+++ b/vendor/packed_simd_2/src/lib.rs
@@ -0,0 +1,337 @@
+//! # Portable packed SIMD vectors
+//!
+//! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366:
+//! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) .
+//!
+//! The examples available in the
+//! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples)
+//! sub-directory of the crate showcase how to use the library in practice.
+//!
+//! ## Table of contents
+//!
+//! - [Introduction](#introduction)
+//! - [Vector types](#vector-types)
+//! - [Conditional operations](#conditional-operations)
+//! - [Conversions](#conversions)
+//! - [Performance
+//!   guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/)
+//!
+//! ## Introduction
+//!
+//! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N`
+//! elements of type `T` as well as many type aliases for this type: for
+//! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`.
+//!
+//! The operations on packed vectors are, by default, "vertical", that is, they
+//! are applied to each vector lane in isolation of the others:
+//!
+//! ```
+//! # use packed_simd::*;
+//! let a = i32x4::new(1, 2, 3, 4);
+//! let b = i32x4::new(5, 6, 7, 8);
+//! assert_eq!(a + b, i32x4::new(6, 8, 10, 12));
+//! ```
+//!
+//! Many "horizontal" operations are also provided:
+//!
+//! ```
+//! # use packed_simd::*;
+//! # let a = i32x4::new(1, 2, 3, 4);
+//! assert_eq!(a.wrapping_sum(), 10);
+//! ```
+//!
+//! In virtually all architectures vertical operations are fast, while
+//! horizontal operations are, by comparison, much slower. That is, the
+//! most portably-efficient way of performing a reduction over a slice
+//! is to collect the results into a vector using vertical operations,
+//! and performing a single horizontal operation at the end:
+//!
+//! ```
+//! # use packed_simd::*;
+//! fn reduce(x: &[i32]) -> i32 {
+//!     assert!(x.len() % 4 == 0);
+//!     let mut sum = i32x4::splat(0); // [0, 0, 0, 0]
+//!     for i in (0..x.len()).step_by(4) {
+//!         sum += i32x4::from_slice_unaligned(&x[i..]);
+//!     }
+//!     sum.wrapping_sum()
+//! }
+//!
+//! let x = [0, 1, 2, 3, 4, 5, 6, 7];
+//! assert_eq!(reduce(&x), 28);
+//! ```
+//!
+//! ## Vector types
+//!
+//! The vector type aliases are named according to the following scheme:
+//!
+//! > `{element_type}x{number_of_lanes} == Simd<[element_type;
+//! number_of_lanes]>`
+//!
+//! where the following element types are supported:
+//!
+//! * `i{element_width}`: signed integer
+//! * `u{element_width}`: unsigned integer
+//! * `f{element_width}`: float
+//! * `m{element_width}`: mask (see below)
+//! * `*{const,mut} T`: `const` and `mut` pointers
+//!
+//! ## Basic operations
+//!
+//! ```
+//! # use packed_simd::*;
+//! // Sets all elements to `0`:
+//! let a = i32x4::splat(0);
+//!
+//! // Reads a vector from a slice:
+//! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5];
+//! let b = i32x4::from_slice_unaligned(&arr);
+//!
+//! // Reads the 4-th element of a vector:
+//! assert_eq!(b.extract(3), 1);
+//!
+//! // Returns a new vector where the 4-th element is replaced with `1`:
+//! let a = a.replace(3, 1);
+//! assert_eq!(a, b);
+//!
+//! // Writes a vector to a slice:
+//! let a = a.replace(2, 1);
+//! a.write_to_slice_unaligned(&mut arr[4..]);
+//! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]);
+//! ```
+//!
+//! ## Conditional operations
+//!
+//! One often needs to perform an operation on some lanes of the vector. Vector
+//! masks, like `m32x4`, allow selecting on which vector lanes an operation is
+//! to be performed:
+//!
+//! ```
+//! # use packed_simd::*;
+//! let a = i32x4::new(1, 1, 2, 2);
+//!
+//! // Add `1` to the first two lanes of the vector.
+//! let m = m16x4::new(true, true, false, false);
+//! let a = m.select(a + 1, a);
+//! assert_eq!(a, i32x4::splat(2));
+//! ```
+//!
+//! The elements of a vector mask are either `true` or `false`. Here `true`
+//! means that a lane is "selected", while `false` means that a lane is not
+//! selected.
+//!
+//! All vector masks implement a `mask.select(a: T, b: T) -> T` method that
+//! works on all vectors that have the same number of lanes as the mask. The
+//! resulting vector contains the elements of `a` for those lanes for which the
+//! mask is `true`, and the elements of `b` otherwise.
+//!
+//! The example constructs a mask with the first two lanes set to `true` and
+//! the last two lanes set to `false`. This selects the first two lanes of `a +
+//! 1` and the last two lanes of `a`, producing a vector where the first two
+//! lanes have been incremented by `1`.
+//!
+//! > note: mask `select` can be used on vector types that have the same number
+//! > of lanes as the mask. The example shows this by using [`m16x4`] instead
+//! > of [`m32x4`]. It is _typically_ more performant to use a mask element
+//! > width equal to the element width of the vectors being operated upon.
+//! > This is, however, not true for 512-bit wide vectors when targetting
+//! > AVX-512, where the most efficient masks use only 1-bit per element.
+//!
+//! All vertical comparison operations returns masks:
+//!
+//! ```
+//! # use packed_simd::*;
+//! let a = i32x4::new(1, 1, 3, 3);
+//! let b = i32x4::new(2, 2, 0, 0);
+//!
+//! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne).
+//! let m = a.ge(i32x4::splat(2));
+//!
+//! if m.any() {
+//!     // all / any / none allow coherent control flow
+//!     let d = m.select(a, b);
+//!     assert_eq!(d, i32x4::new(2, 2, 3, 3));
+//! }
+//! ```
+//!
+//! ## Conversions
+//!
+//! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for
+//!   vectors with the same number of lanes when the conversion is value
+//! preserving   (same as in `std`).
+//!
+//! * **safe bitwise conversions**: The cargo feature `into_bits` provides the
+//!   `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise
+//!   `transmute`s when all bit patterns of the source type are valid bit
+//!   patterns of the target type and are also implemented for the
+//!   architecture-specific vector types of `std::arch`. For example, `let x:
+//!   u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit
+//!   patterns are valid `u8x8` bit patterns. However, the opposite is not
+//! true,   not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this
+//!   operation cannot be peformed safely using `x.into_bits()`; one needs to
+//!   use `unsafe { crate::mem::transmute(x) }` for that, making sure that the
+//!   value in the `u8x8` is a valid bit-pattern of `m8x8`.
+//!
+//! * **numeric casts** (`as`): are peformed using [`FromCast`]/[`Cast`]
+//! (`x.cast()`), just like `as`:
+//!
+//!   * casting integer vectors whose lane types have the same size (e.g.
+//! `i32xN`     -> `u32xN`) is a **no-op**,
+//!
+//!   * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
+//! `u8xN`)     will **truncate**,
+//!
+//!   * casting from a smaller integer to a larger integer     (e.g. `u8xN` ->
+//!     `u32xN`) will:
+//!        * **zero-extend** if the source is unsigned, or
+//!        * **sign-extend** if the source is signed,
+//!
+//!   * casting from a float to an integer will **round the float towards
+//! zero**,
+//!
+//!   * casting from an integer to float will produce the floating point
+//!     representation of the integer, **rounding to nearest, ties to even**,
+//!
+//!   * casting from an `f32` to an `f64` is perfect and lossless,
+//!
+//!   * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
+//!
+//!   Numeric casts are not very "precise": sometimes lossy, sometimes value
+//!   preserving, etc.
+
+#![feature(
+    repr_simd,
+    rustc_attrs,
+    const_fn,
+    platform_intrinsics,
+    stdsimd,
+    aarch64_target_feature,
+    arm_target_feature,
+    link_llvm_intrinsics,
+    core_intrinsics,
+    stmt_expr_attributes,
+    crate_visibility_modifier,
+    custom_inner_attributes,
+    llvm_asm
+)]
+#![allow(non_camel_case_types, non_snake_case,
+        // FIXME: these types are unsound in C FFI already
+        // See https://github.com/rust-lang/rust/issues/53346
+        improper_ctypes_definitions,
+        clippy::cast_possible_truncation,
+        clippy::cast_lossless,
+        clippy::cast_possible_wrap,
+        clippy::cast_precision_loss,
+        // TODO: manually add the `#[must_use]` attribute where appropriate
+        clippy::must_use_candidate,
+        // This lint is currently broken for generic code
+        // See https://github.com/rust-lang/rust-clippy/issues/3410
+        clippy::use_self,
+        clippy::wrong_self_convention,
+)]
+#![cfg_attr(test, feature(hashmap_internals))]
+#![deny(rust_2018_idioms, clippy::missing_inline_in_public_items)]
+#![no_std]
+
+use cfg_if::cfg_if;
+
+cfg_if! {
+    if #[cfg(feature = "core_arch")] {
+        #[allow(unused_imports)]
+        use core_arch as arch;
+    } else {
+        #[allow(unused_imports)]
+        use core::arch;
+    }
+}
+
+#[cfg(all(target_arch = "wasm32", test))]
+use wasm_bindgen_test::*;
+
+#[allow(unused_imports)]
+use core::{
+    /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128,
+    i16, i32, i64, i8, intrinsics, isize, iter, marker, mem, ops, ptr, slice,
+    u128, u16, u32, u64, u8, usize,
+};
+
+#[macro_use]
+mod testing;
+#[macro_use]
+mod api;
+mod codegen;
+mod sealed;
+
+pub use crate::sealed::{Simd as SimdVector, Shuffle, SimdArray, Mask};
+
+/// Packed SIMD vector type.
+///
+/// # Examples
+///
+/// ```
+/// # use packed_simd::Simd;
+/// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3);
+/// assert_eq!(v.extract(2), 2);
+/// ```
+#[repr(transparent)]
+#[derive(Copy, Clone)]
+pub struct Simd<A: sealed::SimdArray>(
+    // FIXME: this type should be private,
+    // but it currently must be public for the
+    // `shuffle!` macro to work: it needs to
+    // access the internal `repr(simd)` type
+    // to call the shuffle intrinsics.
+    #[doc(hidden)] pub <A as sealed::SimdArray>::Tuple,
+);
+
+impl<A: sealed::SimdArray> sealed::Seal for Simd<A> {}
+
+/// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd`
+/// and/or `Ord` traits.
+#[repr(transparent)]
+#[derive(Copy, Clone, Debug)]
+#[allow(clippy::missing_inline_in_public_items)]
+pub struct LexicographicallyOrdered<T>(T);
+
+mod masks;
+pub use self::masks::*;
+
+mod v16;
+pub use self::v16::*;
+
+mod v32;
+pub use self::v32::*;
+
+mod v64;
+pub use self::v64::*;
+
+mod v128;
+pub use self::v128::*;
+
+mod v256;
+pub use self::v256::*;
+
+mod v512;
+pub use self::v512::*;
+
+mod vSize;
+pub use self::vSize::*;
+
+mod vPtr;
+pub use self::vPtr::*;
+
+pub use self::api::cast::*;
+
+#[cfg(feature = "into_bits")]
+pub use self::api::into_bits::*;
+
+// Re-export the shuffle intrinsics required by the `shuffle!` macro.
+#[doc(hidden)]
+pub use self::codegen::llvm::{
+    __shuffle_vector16, __shuffle_vector2, __shuffle_vector32,
+    __shuffle_vector4, __shuffle_vector64, __shuffle_vector8,
+};
+
+crate mod llvm {
+    crate use crate::codegen::llvm::*;
+}
diff --git a/vendor/packed_simd_2/src/masks.rs b/vendor/packed_simd_2/src/masks.rs
new file mode 100644
index 000000000..aeb36d232
--- /dev/null
+++ b/vendor/packed_simd_2/src/masks.rs
@@ -0,0 +1,130 @@
+//! Mask types
+
+macro_rules! impl_mask_ty {
+    ($id:ident : $elem_ty:ident | #[$doc:meta]) => {
+        #[$doc]
+        #[derive(Copy, Clone)]
+        pub struct $id($elem_ty);
+
+        impl crate::sealed::Seal for $id {}
+        impl crate::sealed::Mask for $id {
+            #[inline]
+            fn test(&self) -> bool {
+                $id::test(self)
+            }
+        }
+
+        impl $id {
+            /// Instantiate a mask with `value`
+            #[inline]
+            pub fn new(x: bool) -> Self {
+                if x {
+                    $id(!0)
+                } else {
+                    $id(0)
+                }
+            }
+            /// Test if the mask is set
+            #[inline]
+            pub fn test(&self) -> bool {
+                self.0 != 0
+            }
+        }
+
+        impl Default for $id {
+            #[inline]
+            fn default() -> Self {
+                $id(0)
+            }
+        }
+
+        #[allow(clippy::partialeq_ne_impl)]
+        impl PartialEq<$id> for $id {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                self.0 == other.0
+            }
+            #[inline]
+            fn ne(&self, other: &Self) -> bool {
+                self.0 != other.0
+            }
+        }
+
+        impl Eq for $id {}
+
+        impl PartialOrd<$id> for $id {
+            #[inline]
+            fn partial_cmp(
+                &self, other: &Self,
+            ) -> Option<crate::cmp::Ordering> {
+                use crate::cmp::Ordering;
+                if self == other {
+                    Some(Ordering::Equal)
+                } else if self.0 > other.0 {
+                    // Note:
+                    //  * false = 0_i
+                    //  * true == !0_i == -1_i
+                    Some(Ordering::Less)
+                } else {
+                    Some(Ordering::Greater)
+                }
+            }
+
+            #[inline]
+            fn lt(&self, other: &Self) -> bool {
+                self.0 > other.0
+            }
+            #[inline]
+            fn gt(&self, other: &Self) -> bool {
+                self.0 < other.0
+            }
+            #[inline]
+            fn le(&self, other: &Self) -> bool {
+                self.0 >= other.0
+            }
+            #[inline]
+            fn ge(&self, other: &Self) -> bool {
+                self.0 <= other.0
+            }
+        }
+
+        impl Ord for $id {
+            #[inline]
+            fn cmp(&self, other: &Self) -> crate::cmp::Ordering {
+                match self.partial_cmp(other) {
+                    Some(x) => x,
+                    None => unsafe { crate::hint::unreachable_unchecked() },
+                }
+            }
+        }
+
+        impl crate::hash::Hash for $id {
+            #[inline]
+            fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
+                (self.0 != 0).hash(state);
+            }
+        }
+
+        impl crate::fmt::Debug for $id {
+            #[inline]
+            fn fmt(
+                &self, fmtter: &mut crate::fmt::Formatter<'_>,
+            ) -> Result<(), crate::fmt::Error> {
+                write!(fmtter, "{}({})", stringify!($id), self.0 != 0)
+            }
+        }
+    };
+}
+
+impl_mask_ty!(m8: i8 | /// 8-bit wide mask.
+);
+impl_mask_ty!(m16: i16 | /// 16-bit wide mask.
+);
+impl_mask_ty!(m32: i32 | /// 32-bit wide mask.
+);
+impl_mask_ty!(m64: i64 | /// 64-bit wide mask.
+);
+impl_mask_ty!(m128: i128 | /// 128-bit wide mask.
+);
+impl_mask_ty!(msize: isize | /// isize-wide mask.
+);
diff --git a/vendor/packed_simd_2/src/sealed.rs b/vendor/packed_simd_2/src/sealed.rs
new file mode 100644
index 000000000..0ec20300f
--- /dev/null
+++ b/vendor/packed_simd_2/src/sealed.rs
@@ -0,0 +1,42 @@
+//! Sealed traits
+
+/// A sealed trait, this is logically private to the crate
+/// and will prevent implementations from outside the crate
+pub trait Seal<T = ()> {}
+
+/// Trait implemented by arrays that can be SIMD types.
+pub trait SimdArray: Seal {
+    /// The type of the #[repr(simd)] type.
+    type Tuple: Copy + Clone;
+    /// The element type of the vector.
+    type T;
+    /// The number of elements in the array.
+    const N: usize;
+    /// The type: `[u32; Self::N]`.
+    type NT;
+}
+
+/// This traits is used to constraint the arguments
+/// and result type of the portable shuffles.
+#[doc(hidden)]
+pub trait Shuffle<Lanes>: Seal<Lanes> {
+    // Lanes is a `[u32; N]` where `N` is the number of vector lanes
+
+    /// The result type of the shuffle.
+    type Output;
+}
+
+/// This trait is implemented by all SIMD vector types.
+pub trait Simd: Seal {
+    /// Element type of the SIMD vector
+    type Element;
+    /// The number of elements in the SIMD vector.
+    const LANES: usize;
+    /// The type: `[u32; Self::N]`.
+    type LanesType;
+}
+
+/// This trait is implemented by all mask types
+pub trait Mask: Seal {
+    fn test(&self) -> bool;
+}
diff --git a/vendor/packed_simd_2/src/testing.rs b/vendor/packed_simd_2/src/testing.rs
new file mode 100644
index 000000000..fcbcf9e2a
--- /dev/null
+++ b/vendor/packed_simd_2/src/testing.rs
@@ -0,0 +1,8 @@
+//! Testing macros and other utilities.
+
+#[macro_use]
+mod macros;
+
+#[cfg(test)]
+#[macro_use]
+crate mod utils;
diff --git a/vendor/packed_simd_2/src/testing/macros.rs b/vendor/packed_simd_2/src/testing/macros.rs
new file mode 100644
index 000000000..6008634c7
--- /dev/null
+++ b/vendor/packed_simd_2/src/testing/macros.rs
@@ -0,0 +1,44 @@
+//! Testing macros
+
+macro_rules! test_if {
+    ($cfg_tt:tt: $it:item) => {
+        #[cfg(any(
+                            // Test everything if:
+                            //
+                            // * tests are enabled,
+                            // * no features about exclusively testing
+                            //   specific vector classes are enabled
+                            all(test, not(any(
+                                test_v16,
+                                test_v32,
+                                test_v64,
+                                test_v128,
+                                test_v256,
+                                test_v512,
+                                test_none,  // disables all tests
+                            ))),
+                            // Test if:
+                            //
+                            // * tests are enabled
+                            // * a particular cfg token tree returns true
+                            all(test, $cfg_tt),
+                        ))]
+        $it
+    };
+}
+
+#[cfg(test)]
+#[allow(unused)]
+macro_rules! ref_ {
+    ($anything:tt) => {
+        &$anything
+    };
+}
+
+#[cfg(test)]
+#[allow(unused)]
+macro_rules! ref_mut_ {
+    ($anything:tt) => {
+        &mut $anything
+    };
+}
diff --git a/vendor/packed_simd_2/src/testing/utils.rs b/vendor/packed_simd_2/src/testing/utils.rs
new file mode 100644
index 000000000..21f27aae5
--- /dev/null
+++ b/vendor/packed_simd_2/src/testing/utils.rs
@@ -0,0 +1,142 @@
+//! Testing utilities
+
+#![allow(dead_code)]
+// FIXME: Or don't. But it's true this is a problematic comparison.
+#![allow(clippy::neg_cmp_op_on_partial_ord)]
+
+use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered};
+
+/// Tests PartialOrd for `a` and `b` where `a < b` is true.
+pub fn test_lt<T>(
+    a: LexicographicallyOrdered<T>, b: LexicographicallyOrdered<T>,
+) where
+    LexicographicallyOrdered<T>: Debug + PartialOrd,
+{
+    assert!(a < b, "{:?}, {:?}", a, b);
+    assert!(b > a, "{:?}, {:?}", a, b);
+
+    assert!(!(a == b), "{:?}, {:?}", a, b);
+    assert!(a != b, "{:?}, {:?}", a, b);
+
+    assert!(a <= b, "{:?}, {:?}", a, b);
+    assert!(b >= a, "{:?}, {:?}", a, b);
+
+    // The elegance of the mathematical expression of irreflexivity is more
+    // than clippy can handle.
+    #[allow(clippy::eq_op)]
+    {
+        // Irreflexivity
+        assert!(!(a < a), "{:?}, {:?}", a, b);
+        assert!(!(b < b), "{:?}, {:?}", a, b);
+        assert!(!(a > a), "{:?}, {:?}", a, b);
+        assert!(!(b > b), "{:?}, {:?}", a, b);
+
+        assert!(a <= a, "{:?}, {:?}", a, b);
+        assert!(b <= b, "{:?}, {:?}", a, b);
+    }
+}
+
+/// Tests PartialOrd for `a` and `b` where `a <= b` is true.
+pub fn test_le<T>(
+    a: LexicographicallyOrdered<T>, b: LexicographicallyOrdered<T>,
+) where
+    LexicographicallyOrdered<T>: Debug + PartialOrd,
+{
+    assert!(a <= b, "{:?}, {:?}", a, b);
+    assert!(b >= a, "{:?}, {:?}", a, b);
+
+    assert!(a <= b, "{:?}, {:?}", a, b);
+    assert!(b >= a, "{:?}, {:?}", a, b);
+
+    if a == b {
+        assert!(!(a < b), "{:?}, {:?}", a, b);
+        assert!(!(b > a), "{:?}, {:?}", a, b);
+
+        assert!(!(a != b), "{:?}, {:?}", a, b);
+    } else {
+        assert!(a != b, "{:?}, {:?}", a, b);
+        test_lt(a, b);
+    }
+}
+
+/// Test PartialOrd::partial_cmp for `a` and `b` returning `Ordering`
+pub fn test_cmp<T>(
+    a: LexicographicallyOrdered<T>, b: LexicographicallyOrdered<T>,
+    o: Option<crate::cmp::Ordering>,
+) where
+    LexicographicallyOrdered<T>: PartialOrd + Debug,
+    T: Debug + crate::sealed::Simd + Copy + Clone,
+    <T as crate::sealed::Simd>::Element: Default + Copy + Clone + PartialOrd,
+{
+    assert!(T::LANES <= 64, "array length in these two arrays needs updating");
+    let mut arr_a: [T::Element; 64] = [Default::default(); 64];
+    let mut arr_b: [T::Element; 64] = [Default::default(); 64];
+
+    unsafe {
+        crate::ptr::write_unaligned(
+            arr_a.as_mut_ptr() as *mut LexicographicallyOrdered<T>,
+            a,
+        )
+    }
+    unsafe {
+        crate::ptr::write_unaligned(
+            arr_b.as_mut_ptr() as *mut LexicographicallyOrdered<T>,
+            b,
+        )
+    }
+    let expected = arr_a[0..T::LANES].partial_cmp(&arr_b[0..T::LANES]);
+    let result = a.partial_cmp(&b);
+    assert_eq!(expected, result, "{:?}, {:?}", a, b);
+    assert_eq!(o, result, "{:?}, {:?}", a, b);
+    match o {
+        Some(crate::cmp::Ordering::Less) => {
+            test_lt(a, b);
+            test_le(a, b);
+        }
+        Some(crate::cmp::Ordering::Greater) => {
+            test_lt(b, a);
+            test_le(b, a);
+        }
+        Some(crate::cmp::Ordering::Equal) => {
+            assert!(a == b, "{:?}, {:?}", a, b);
+            assert!(!(a != b), "{:?}, {:?}", a, b);
+            assert!(!(a < b), "{:?}, {:?}", a, b);
+            assert!(!(b < a), "{:?}, {:?}", a, b);
+            assert!(!(a > b), "{:?}, {:?}", a, b);
+            assert!(!(b > a), "{:?}, {:?}", a, b);
+
+            test_le(a, b);
+            test_le(b, a);
+        }
+        None => {
+            assert!(!(a == b), "{:?}, {:?}", a, b);
+            assert!(!(a != b), "{:?}, {:?}", a, b);
+            assert!(!(a < b), "{:?}, {:?}", a, b);
+            assert!(!(a > b), "{:?}, {:?}", a, b);
+            assert!(!(b < a), "{:?}, {:?}", a, b);
+            assert!(!(b > a), "{:?}, {:?}", a, b);
+            assert!(!(a <= b), "{:?}, {:?}", a, b);
+            assert!(!(b <= a), "{:?}, {:?}", a, b);
+            assert!(!(a >= b), "{:?}, {:?}", a, b);
+            assert!(!(b >= a), "{:?}, {:?}", a, b);
+        }
+    }
+}
+
+// Returns a tuple containing two distinct pointer values of the same type as
+// the element type of the Simd vector `$id`.
+#[allow(unused)]
+macro_rules! ptr_vals {
+    ($id:ty) => {
+        // expands to an expression
+        #[allow(unused_unsafe)]
+        unsafe {
+            // all bits cleared
+            let clear: <$id as sealed::Simd>::Element = crate::mem::zeroed();
+            // all bits set
+            let set: <$id as sealed::Simd>::Element =
+                crate::mem::transmute(-1_isize);
+            (clear, set)
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/v128.rs b/vendor/packed_simd_2/src/v128.rs
new file mode 100644
index 000000000..7949f6619
--- /dev/null
+++ b/vendor/packed_simd_2/src/v128.rs
@@ -0,0 +1,80 @@
+//! 128-bit wide vector types
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_i!([i8; 16]: i8x16, m8x16 | i8, u16 | test_v128 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: |
+        /// A 128-bit vector with 16 `i8` lanes.
+);
+impl_u!([u8; 16]: u8x16, m8x16 | u8, u16 | test_v128 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: |
+        /// A 128-bit vector with 16 `u8` lanes.
+);
+impl_m!([m8; 16]: m8x16 | i8, u16 | test_v128 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: m16x16 |
+        /// A 128-bit vector mask with 16 `m8` lanes.
+);
+
+impl_i!([i16; 8]: i16x8, m16x8 | i16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: i8x8, u8x8 |
+        /// A 128-bit vector with 8 `i16` lanes.
+);
+impl_u!([u16; 8]: u16x8, m16x8 | u16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: u8x8 |
+        /// A 128-bit vector with 8 `u16` lanes.
+);
+impl_m!([m16; 8]: m16x8 | i16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: m8x8, m32x8 |
+        /// A 128-bit vector mask with 8 `m16` lanes.
+);
+
+impl_i!([i32; 4]: i32x4, m32x4 | i32, u8 | test_v128 | x0, x1, x2, x3 |
+        From: i8x4, u8x4, i16x4, u16x4  |
+        /// A 128-bit vector with 4 `i32` lanes.
+);
+impl_u!([u32; 4]: u32x4, m32x4 | u32, u8 | test_v128 | x0, x1, x2, x3 |
+        From: u8x4, u16x4 |
+        /// A 128-bit vector with 4 `u32` lanes.
+);
+impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 |
+        From: i8x4, u8x4, i16x4, u16x4 |
+        /// A 128-bit vector with 4 `f32` lanes.
+);
+impl_m!([m32; 4]: m32x4 | i32, u8 | test_v128 | x0, x1, x2, x3 |
+        From: m8x4, m16x4, m64x4 |
+        /// A 128-bit vector mask with 4 `m32` lanes.
+);
+
+impl_i!([i64; 2]: i64x2, m64x2 | i64, u8 | test_v128 | x0, x1 |
+        From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 |
+        /// A 128-bit vector with 2 `i64` lanes.
+);
+impl_u!([u64; 2]: u64x2, m64x2 | u64, u8 | test_v128 | x0, x1 |
+        From: u8x2, u16x2, u32x2 |
+        /// A 128-bit vector with 2 `u64` lanes.
+);
+impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 |
+        From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 |
+        /// A 128-bit vector with 2 `f64` lanes.
+);
+impl_m!([m64; 2]: m64x2 | i64, u8 | test_v128 | x0, x1 |
+        From: m8x2, m16x2, m32x2, m128x2 |
+        /// A 128-bit vector mask with 2 `m64` lanes.
+);
+
+impl_i!([i128; 1]: i128x1, m128x1 | i128, u8 | test_v128 | x0 |
+        From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types
+        /// A 128-bit vector with 1 `i128` lane.
+);
+impl_u!([u128; 1]: u128x1, m128x1 | u128, u8 | test_v128 | x0 |
+        From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types
+        /// A 128-bit vector with 1 `u128` lane.
+);
+impl_m!([m128; 1]: m128x1 | i128, u8 | test_v128 | x0 |
+        From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types
+        /// A 128-bit vector mask with 1 `m128` lane.
+);
diff --git a/vendor/packed_simd_2/src/v16.rs b/vendor/packed_simd_2/src/v16.rs
new file mode 100644
index 000000000..4ca5afb2a
--- /dev/null
+++ b/vendor/packed_simd_2/src/v16.rs
@@ -0,0 +1,16 @@
+//! 16-bit wide vector types
+
+use crate::*;
+
+impl_i!([i8; 2]: i8x2, m8x2 | i8, u8 | test_v16 | x0, x1 |
+        From: |
+        /// A 16-bit vector with 2 `i8` lanes.
+);
+impl_u!([u8; 2]: u8x2, m8x2 | u8, u8 | test_v16 | x0, x1 |
+        From: |
+        /// A 16-bit vector with 2 `u8` lanes.
+);
+impl_m!([m8; 2]: m8x2 | i8, u8 | test_v16 | x0, x1 |
+        From: m16x2, m32x2, m64x2, m128x2 |
+        /// A 16-bit vector mask with 2 `m8` lanes.
+);
diff --git a/vendor/packed_simd_2/src/v256.rs b/vendor/packed_simd_2/src/v256.rs
new file mode 100644
index 000000000..f0c3bc281
--- /dev/null
+++ b/vendor/packed_simd_2/src/v256.rs
@@ -0,0 +1,86 @@
+//! 256-bit wide vector types
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_i!([i8; 32]: i8x32, m8x32 | i8, u32 | test_v256 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+        From: |
+        /// A 256-bit vector with 32 `i8` lanes.
+);
+impl_u!([u8; 32]: u8x32, m8x32 | u8, u32 | test_v256 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+        From: |
+        /// A 256-bit vector with 32 `u8` lanes.
+);
+impl_m!([m8; 32]: m8x32 | i8, u32 | test_v256 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+        From:  |
+        /// A 256-bit vector mask with 32 `m8` lanes.
+);
+
+impl_i!([i16; 16]: i16x16, m16x16 | i16, u16 | test_v256 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: i8x16, u8x16 |
+        /// A 256-bit vector with 16 `i16` lanes.
+);
+impl_u!([u16; 16]: u16x16, m16x16 | u16, u16 | test_v256 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: u8x16 |
+        /// A 256-bit vector with 16 `u16` lanes.
+);
+impl_m!([m16; 16]: m16x16 | i16, u16 | test_v256 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: m8x16 |
+        /// A 256-bit vector mask with 16 `m16` lanes.
+);
+
+impl_i!([i32; 8]: i32x8, m32x8 | i32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7  |
+        From: i8x8, u8x8, i16x8, u16x8 |
+        /// A 256-bit vector with 8 `i32` lanes.
+);
+impl_u!([u32; 8]: u32x8, m32x8 | u32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: u8x8, u16x8 |
+        /// A 256-bit vector with 8 `u32` lanes.
+);
+impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: i8x8, u8x8, i16x8, u16x8 |
+        /// A 256-bit vector with 8 `f32` lanes.
+);
+impl_m!([m32; 8]: m32x8 | i32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: m8x8, m16x8 |
+        /// A 256-bit vector mask with 8 `m32` lanes.
+);
+
+impl_i!([i64; 4]: i64x4, m64x4 | i64, u8 | test_v256 | x0, x1, x2, x3 |
+        From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 |
+        /// A 256-bit vector with 4 `i64` lanes.
+);
+impl_u!([u64; 4]: u64x4, m64x4 | u64, u8 | test_v256 | x0, x1, x2, x3 |
+        From: u8x4, u16x4, u32x4 |
+        /// A 256-bit vector with 4 `u64` lanes.
+);
+impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 |
+        From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 |
+        /// A 256-bit vector with 4 `f64` lanes.
+);
+impl_m!([m64; 4]: m64x4 | i64, u8 | test_v256 | x0, x1, x2, x3 |
+        From: m8x4, m16x4, m32x4 |
+        /// A 256-bit vector mask with 4 `m64` lanes.
+);
+
+impl_i!([i128; 2]: i128x2, m128x2 | i128, u8 | test_v256 | x0, x1 |
+        From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 |
+        /// A 256-bit vector with 2 `i128` lanes.
+);
+impl_u!([u128; 2]: u128x2, m128x2 | u128, u8 | test_v256 | x0, x1 |
+        From: u8x2, u16x2, u32x2, u64x2 |
+        /// A 256-bit vector with 2 `u128` lanes.
+);
+impl_m!([m128; 2]: m128x2 | i128, u8 | test_v256 | x0, x1 |
+        From: m8x2, m16x2, m32x2, m64x2 |
+        /// A 256-bit vector mask with 2 `m128` lanes.
+);
diff --git a/vendor/packed_simd_2/src/v32.rs b/vendor/packed_simd_2/src/v32.rs
new file mode 100644
index 000000000..75a1838e5
--- /dev/null
+++ b/vendor/packed_simd_2/src/v32.rs
@@ -0,0 +1,29 @@
+//! 32-bit wide vector types
+
+use crate::*;
+
+impl_i!([i8; 4]: i8x4, m8x4 | i8, u8 | test_v32 | x0, x1, x2, x3 |
+        From: |
+        /// A 32-bit vector with 4 `i8` lanes.
+);
+impl_u!([u8; 4]: u8x4, m8x4 | u8, u8 | test_v32 | x0, x1, x2, x3 |
+        From: |
+        /// A 32-bit vector with 4 `u8` lanes.
+);
+impl_m!([m8; 4]: m8x4 | i8, u8 | test_v32 | x0, x1, x2, x3 |
+        From: m16x4, m32x4, m64x4 |
+        /// A 32-bit vector mask with 4 `m8` lanes.
+);
+
+impl_i!([i16; 2]: i16x2, m16x2 | i16, u8 | test_v32 | x0, x1 |
+        From: i8x2, u8x2 |
+        /// A 32-bit vector with 2 `i16` lanes.
+);
+impl_u!([u16; 2]: u16x2, m16x2 | u16, u8 | test_v32 | x0, x1 |
+        From: u8x2 |
+        /// A 32-bit vector with 2 `u16` lanes.
+);
+impl_m!([m16; 2]: m16x2 | i16, u8 | test_v32 | x0, x1 |
+        From: m8x2, m32x2, m64x2, m128x2 |
+        /// A 32-bit vector mask with 2 `m16` lanes.
+);
diff --git a/vendor/packed_simd_2/src/v512.rs b/vendor/packed_simd_2/src/v512.rs
new file mode 100644
index 000000000..4c8c71338
--- /dev/null
+++ b/vendor/packed_simd_2/src/v512.rs
@@ -0,0 +1,99 @@
+//! 512-bit wide vector types
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_i!([i8; 64]: i8x64, m8x64 | i8, u64 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
+        x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
+        x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
+        From: |
+        /// A 512-bit vector with 64 `i8` lanes.
+);
+impl_u!([u8; 64]: u8x64, m8x64 | u8, u64 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
+        x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
+        x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
+        From: |
+        /// A 512-bit vector with 64 `u8` lanes.
+);
+impl_m!([m8; 64]: m8x64 | i8, u64 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
+        x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
+        x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
+        From:  |
+        /// A 512-bit vector mask with 64 `m8` lanes.
+);
+
+impl_i!([i16; 32]: i16x32, m16x32 | i16, u32 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+        From: i8x32, u8x32 |
+        /// A 512-bit vector with 32 `i16` lanes.
+);
+impl_u!([u16; 32]: u16x32, m16x32 | u16, u32 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+        From: u8x32 |
+        /// A 512-bit vector with 32 `u16` lanes.
+);
+impl_m!([m16; 32]: m16x32 | i16, u32 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+        From: m8x32 |
+        /// A 512-bit vector mask with 32 `m16` lanes.
+);
+
+impl_i!([i32; 16]: i32x16, m32x16 | i32, u16 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: i8x16, u8x16, i16x16, u16x16 |
+        /// A 512-bit vector with 16 `i32` lanes.
+);
+impl_u!([u32; 16]: u32x16, m32x16 | u32, u16 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: u8x16, u16x16 |
+        /// A 512-bit vector with 16 `u32` lanes.
+);
+impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: i8x16, u8x16, i16x16, u16x16 |
+        /// A 512-bit vector with 16 `f32` lanes.
+);
+impl_m!([m32; 16]: m32x16 | i32, u16 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+        From: m8x16, m16x16 |
+        /// A 512-bit vector mask with 16 `m32` lanes.
+);
+
+impl_i!([i64; 8]: i64x8, m64x8 | i64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 |
+        /// A 512-bit vector with 8 `i64` lanes.
+);
+impl_u!([u64; 8]: u64x8, m64x8 | u64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: u8x8, u16x8, u32x8 |
+        /// A 512-bit vector with 8 `u64` lanes.
+);
+impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 |
+        /// A 512-bit vector with 8 `f64` lanes.
+);
+impl_m!([m64; 8]: m64x8 | i64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: m8x8, m16x8, m32x8 |
+        /// A 512-bit vector mask with 8 `m64` lanes.
+);
+
+impl_i!([i128; 4]: i128x4, m128x4 | i128, u8 | test_v512 | x0, x1, x2, x3 |
+        From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 |
+        /// A 512-bit vector with 4 `i128` lanes.
+);
+impl_u!([u128; 4]: u128x4, m128x4 | u128, u8 | test_v512 | x0, x1, x2, x3 |
+        From: u8x4, u16x4, u32x4, u64x4 |
+        /// A 512-bit vector with 4 `u128` lanes.
+);
+impl_m!([m128; 4]: m128x4 | i128, u8 | test_v512 | x0, x1, x2, x3 |
+        From: m8x4, m16x4, m32x4, m64x4 |
+        /// A 512-bit vector mask with 4 `m128` lanes.
+);
diff --git a/vendor/packed_simd_2/src/v64.rs b/vendor/packed_simd_2/src/v64.rs
new file mode 100644
index 000000000..bf6b9de61
--- /dev/null
+++ b/vendor/packed_simd_2/src/v64.rs
@@ -0,0 +1,66 @@
+//! 64-bit wide vector types
+#[rustfmt::skip]
+
+use super::*;
+
+impl_i!([i8; 8]: i8x8, m8x8 | i8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: |
+        /// A 64-bit vector with 8 `i8` lanes.
+);
+impl_u!([u8; 8]: u8x8, m8x8 | u8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: |
+        /// A 64-bit vector with 8 `u8` lanes.
+);
+impl_m!([m8; 8]: m8x8 | i8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: m16x8, m32x8 |
+        /// A 64-bit vector mask with 8 `m8` lanes.
+);
+
+impl_i!([i16; 4]: i16x4, m16x4 | i16, u8 | test_v64 | x0, x1, x2, x3 |
+        From: i8x4, u8x4 |
+        /// A 64-bit vector with 4 `i16` lanes.
+);
+impl_u!([u16; 4]: u16x4, m16x4 | u16, u8 | test_v64 | x0, x1, x2, x3 |
+        From: u8x4 |
+        /// A 64-bit vector with 4 `u16` lanes.
+);
+impl_m!([m16; 4]: m16x4 | i16, u8 | test_v64 | x0, x1, x2, x3 |
+        From: m8x4, m32x4, m64x4 |
+        /// A 64-bit vector mask with 4 `m16` lanes.
+);
+
+impl_i!([i32; 2]: i32x2, m32x2 | i32, u8 | test_v64 | x0, x1 |
+        From: i8x2, u8x2, i16x2, u16x2 |
+        /// A 64-bit vector with 2 `i32` lanes.
+);
+impl_u!([u32; 2]: u32x2, m32x2 | u32, u8 | test_v64 | x0, x1 |
+        From: u8x2, u16x2 |
+        /// A 64-bit vector with 2 `u32` lanes.
+);
+impl_m!([m32; 2]: m32x2 | i32, u8 | test_v64 | x0, x1 |
+        From: m8x2, m16x2, m64x2, m128x2 |
+        /// A 64-bit vector mask with 2 `m32` lanes.
+);
+impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 |
+        From: i8x2, u8x2, i16x2, u16x2 |
+        /// A 64-bit vector with 2 `f32` lanes.
+);
+
+/*
+impl_i!([i64; 1]: i64x1, m64x1 | i64, u8 | test_v64 | x0 |
+        From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ |  // FIXME: primitive to vector conversion
+        /// A 64-bit vector with 1 `i64` lanes.
+);
+impl_u!([u64; 1]: u64x1, m64x1 | u64, u8 | test_v64 | x0 |
+        From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion
+        /// A 64-bit vector with 1 `u64` lanes.
+);
+impl_m!([m64; 1]: m64x1 | i64, u8 | test_v64 | x0 |
+        From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types
+        /// A 64-bit vector mask with 1 `m64` lanes.
+);
+impl_f!([f64; 1]: f64x1, m64x1 | f64 | test_v64 | x0 |
+        From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, f32x1*/ | // FIXME: unary small vector types
+        /// A 64-bit vector with 1 `f64` lanes.
+);
+*/
diff --git a/vendor/packed_simd_2/src/vPtr.rs b/vendor/packed_simd_2/src/vPtr.rs
new file mode 100644
index 000000000..e34cb170e
--- /dev/null
+++ b/vendor/packed_simd_2/src/vPtr.rs
@@ -0,0 +1,34 @@
+//! Vectors of pointers
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_const_p!(
+    [*const T; 2]: cptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: |
+    /// A vector with 2 `*const T` lanes
+);
+
+impl_mut_p!(
+    [*mut T; 2]: mptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: |
+    /// A vector with 2 `*mut T` lanes
+);
+
+impl_const_p!(
+    [*const T; 4]: cptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: |
+    /// A vector with 4 `*const T` lanes
+);
+
+impl_mut_p!(
+    [*mut T; 4]: mptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: |
+    /// A vector with 4 `*mut T` lanes
+);
+
+impl_const_p!(
+    [*const T; 8]: cptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: |
+    /// A vector with 8 `*const T` lanes
+);
+
+impl_mut_p!(
+    [*mut T; 8]: mptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: |
+    /// A vector with 8 `*mut T` lanes
+);
diff --git a/vendor/packed_simd_2/src/vSize.rs b/vendor/packed_simd_2/src/vSize.rs
new file mode 100644
index 000000000..b5d891006
--- /dev/null
+++ b/vendor/packed_simd_2/src/vSize.rs
@@ -0,0 +1,53 @@
+//! Vectors with pointer-sized elements
+
+use crate::codegen::pointer_sized_int::{isize_, usize_};
+use crate::*;
+
+impl_i!([isize; 2]: isizex2, msizex2 | isize_, u8 | test_v128 |
+        x0, x1|
+        From: |
+        /// A vector with 2 `isize` lanes.
+);
+
+impl_u!([usize; 2]: usizex2, msizex2 | usize_, u8 | test_v128 |
+        x0, x1|
+        From: |
+        /// A vector with 2 `usize` lanes.
+);
+impl_m!([msize; 2]: msizex2 | isize_, u8 | test_v128 |
+        x0, x1 |
+        From: |
+        /// A vector mask with 2 `msize` lanes.
+);
+
+impl_i!([isize; 4]: isizex4, msizex4 | isize_, u8 | test_v256 |
+        x0, x1, x2, x3 |
+        From: |
+        /// A vector with 4 `isize` lanes.
+);
+impl_u!([usize; 4]: usizex4, msizex4 | usize_, u8 | test_v256 |
+        x0, x1, x2, x3|
+        From: |
+        /// A vector with 4 `usize` lanes.
+);
+impl_m!([msize; 4]: msizex4 | isize_, u8 | test_v256 |
+        x0, x1, x2, x3 |
+        From: |
+        /// A vector mask with 4 `msize` lanes.
+);
+
+impl_i!([isize; 8]: isizex8, msizex8 | isize_, u8 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: |
+        /// A vector with 8 `isize` lanes.
+);
+impl_u!([usize; 8]: usizex8, msizex8 | usize_, u8 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: |
+        /// A vector with 8 `usize` lanes.
+);
+impl_m!([msize; 8]: msizex8 | isize_, u8 | test_v512 |
+        x0, x1, x2, x3, x4, x5, x6, x7 |
+        From: |
+        /// A vector mask with 8 `msize` lanes.
+);
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
commit	698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree	173a775858bd501c378080a10dca74132f05bc50 /vendor/packed_simd_2/src
parent	Initial commit. (diff)
download	rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip