68 files changed, 7066 insertions, 0 deletions
diff --git a/library/portable-simd/crates/core_simd/Cargo.toml b/library/portable-simd/crates/core_simd/Cargo.toml
new file mode 100644
index 000000000..8a29cf156
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/Cargo.toml
@@ -0,0 +1,32 @@
+[package]
+name = "core_simd"
+version = "0.1.0"
+edition = "2021"
+homepage = "https://github.com/rust-lang/portable-simd"
+repository = "https://github.com/rust-lang/portable-simd"
+keywords = ["core", "simd", "intrinsics"]
+categories = ["hardware-support", "no-std"]
+license = "MIT OR Apache-2.0"
+
+[features]
+default = ["as_crate"]
+as_crate = []
+std = []
+generic_const_exprs = []
+
+[target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen]
+version = "0.2"
+
+[dev-dependencies.wasm-bindgen-test]
+version = "0.3"
+
+[dev-dependencies.proptest]
+version = "0.10"
+default-features = false
+features = ["alloc"]
+
+[dev-dependencies.test_helpers]
+path = "../test_helpers"
+
+[dev-dependencies]
+std_float = { path = "../std_float/", features = ["as_crate"] }
diff --git a/library/portable-simd/crates/core_simd/LICENSE-APACHE b/library/portable-simd/crates/core_simd/LICENSE-APACHE
new file mode 100644
index 000000000..d64569567
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/LICENSE-APACHE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/library/portable-simd/crates/core_simd/LICENSE-MIT b/library/portable-simd/crates/core_simd/LICENSE-MIT
new file mode 100644
index 000000000..0e9d2f43a
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/LICENSE-MIT
@@ -0,0 +1,19 @@
+Copyright (c) 2020 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
new file mode 100644
index 000000000..39f530f68
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
@@ -0,0 +1,316 @@
+//! 4x4 matrix inverse
+// Code ported from the `packed_simd` crate
+// Run this code with `cargo test --example matrix_inversion`
+#![feature(array_chunks, portable_simd)]
+use core_simd::simd::*;
+use Which::*;
+
+// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
+#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
+pub struct Matrix4x4([[f32; 4]; 4]);
+
+#[allow(clippy::too_many_lines)]
+pub fn scalar_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
+    let m = m.0;
+
+    #[rustfmt::skip]
+    let mut inv = [
+        // row 0:
+        [
+            // 0,0:
+            m[1][1] * m[2][2] * m[3][3] -
+            m[1][1] * m[2][3] * m[3][2] -
+            m[2][1] * m[1][2] * m[3][3] +
+            m[2][1] * m[1][3] * m[3][2] +
+            m[3][1] * m[1][2] * m[2][3] -
+            m[3][1] * m[1][3] * m[2][2],
+            // 0,1:
+           -m[0][1] * m[2][2] * m[3][3] +
+            m[0][1] * m[2][3] * m[3][2] +
+            m[2][1] * m[0][2] * m[3][3] -
+            m[2][1] * m[0][3] * m[3][2] -
+            m[3][1] * m[0][2] * m[2][3] +
+            m[3][1] * m[0][3] * m[2][2],
+            // 0,2:
+            m[0][1] * m[1][2] * m[3][3] -
+            m[0][1] * m[1][3] * m[3][2] -
+            m[1][1] * m[0][2] * m[3][3] +
+            m[1][1] * m[0][3] * m[3][2] +
+            m[3][1] * m[0][2] * m[1][3] -
+            m[3][1] * m[0][3] * m[1][2],
+            // 0,3:
+           -m[0][1] * m[1][2] * m[2][3] +
+            m[0][1] * m[1][3] * m[2][2] +
+            m[1][1] * m[0][2] * m[2][3] -
+            m[1][1] * m[0][3] * m[2][2] -
+            m[2][1] * m[0][2] * m[1][3] +
+            m[2][1] * m[0][3] * m[1][2],
+        ],
+        // row 1
+        [
+            // 1,0:
+           -m[1][0] * m[2][2] * m[3][3] +
+            m[1][0] * m[2][3] * m[3][2] +
+            m[2][0] * m[1][2] * m[3][3] -
+            m[2][0] * m[1][3] * m[3][2] -
+            m[3][0] * m[1][2] * m[2][3] +
+            m[3][0] * m[1][3] * m[2][2],
+            // 1,1:
+            m[0][0] * m[2][2] * m[3][3] -
+            m[0][0] * m[2][3] * m[3][2] -
+            m[2][0] * m[0][2] * m[3][3] +
+            m[2][0] * m[0][3] * m[3][2] +
+            m[3][0] * m[0][2] * m[2][3] -
+            m[3][0] * m[0][3] * m[2][2],
+            // 1,2:
+           -m[0][0] * m[1][2] * m[3][3] +
+            m[0][0] * m[1][3] * m[3][2] +
+            m[1][0] * m[0][2] * m[3][3] -
+            m[1][0] * m[0][3] * m[3][2] -
+            m[3][0] * m[0][2] * m[1][3] +
+            m[3][0] * m[0][3] * m[1][2],
+            // 1,3:
+            m[0][0] * m[1][2] * m[2][3] -
+            m[0][0] * m[1][3] * m[2][2] -
+            m[1][0] * m[0][2] * m[2][3] +
+            m[1][0] * m[0][3] * m[2][2] +
+            m[2][0] * m[0][2] * m[1][3] -
+            m[2][0] * m[0][3] * m[1][2],
+        ],
+        // row 2
+        [
+            // 2,0:
+            m[1][0] * m[2][1] * m[3][3] -
+            m[1][0] * m[2][3] * m[3][1] -
+            m[2][0] * m[1][1] * m[3][3] +
+            m[2][0] * m[1][3] * m[3][1] +
+            m[3][0] * m[1][1] * m[2][3] -
+            m[3][0] * m[1][3] * m[2][1],
+            // 2,1:
+           -m[0][0] * m[2][1] * m[3][3] +
+            m[0][0] * m[2][3] * m[3][1] +
+            m[2][0] * m[0][1] * m[3][3] -
+            m[2][0] * m[0][3] * m[3][1] -
+            m[3][0] * m[0][1] * m[2][3] +
+            m[3][0] * m[0][3] * m[2][1],
+            // 2,2:
+            m[0][0] * m[1][1] * m[3][3] -
+            m[0][0] * m[1][3] * m[3][1] -
+            m[1][0] * m[0][1] * m[3][3] +
+            m[1][0] * m[0][3] * m[3][1] +
+            m[3][0] * m[0][1] * m[1][3] -
+            m[3][0] * m[0][3] * m[1][1],
+            // 2,3:
+           -m[0][0] * m[1][1] * m[2][3] +
+            m[0][0] * m[1][3] * m[2][1] +
+            m[1][0] * m[0][1] * m[2][3] -
+            m[1][0] * m[0][3] * m[2][1] -
+            m[2][0] * m[0][1] * m[1][3] +
+            m[2][0] * m[0][3] * m[1][1],
+        ],
+        // row 3
+        [
+            // 3,0:
+           -m[1][0] * m[2][1] * m[3][2] +
+            m[1][0] * m[2][2] * m[3][1] +
+            m[2][0] * m[1][1] * m[3][2] -
+            m[2][0] * m[1][2] * m[3][1] -
+            m[3][0] * m[1][1] * m[2][2] +
+            m[3][0] * m[1][2] * m[2][1],
+            // 3,1:
+            m[0][0] * m[2][1] * m[3][2] -
+            m[0][0] * m[2][2] * m[3][1] -
+            m[2][0] * m[0][1] * m[3][2] +
+            m[2][0] * m[0][2] * m[3][1] +
+            m[3][0] * m[0][1] * m[2][2] -
+            m[3][0] * m[0][2] * m[2][1],
+            // 3,2:
+           -m[0][0] * m[1][1] * m[3][2] +
+            m[0][0] * m[1][2] * m[3][1] +
+            m[1][0] * m[0][1] * m[3][2] -
+            m[1][0] * m[0][2] * m[3][1] -
+            m[3][0] * m[0][1] * m[1][2] +
+            m[3][0] * m[0][2] * m[1][1],
+            // 3,3:
+            m[0][0] * m[1][1] * m[2][2] -
+            m[0][0] * m[1][2] * m[2][1] -
+            m[1][0] * m[0][1] * m[2][2] +
+            m[1][0] * m[0][2] * m[2][1] +
+            m[2][0] * m[0][1] * m[1][2] -
+            m[2][0] * m[0][2] * m[1][1],
+        ],
+    ];
+
+    let det = m[0][0] * inv[0][0] + m[0][1] * inv[1][0] + m[0][2] * inv[2][0] + m[0][3] * inv[3][0];
+    if det == 0. {
+        return None;
+    }
+
+    let det_inv = 1. / det;
+
+    for row in &mut inv {
+        for elem in row.iter_mut() {
+            *elem *= det_inv;
+        }
+    }
+
+    Some(Matrix4x4(inv))
+}
+
+pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
+    let m = m.0;
+    let m_0 = f32x4::from_array(m[0]);
+    let m_1 = f32x4::from_array(m[1]);
+    let m_2 = f32x4::from_array(m[2]);
+    let m_3 = f32x4::from_array(m[3]);
+
+    const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)];
+    const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)];
+    const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)];
+    const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)];
+
+    let tmp = simd_swizzle!(m_0, m_1, SHUFFLE01);
+    let row1 = simd_swizzle!(m_2, m_3, SHUFFLE01);
+
+    let row0 = simd_swizzle!(tmp, row1, SHUFFLE02);
+    let row1 = simd_swizzle!(row1, tmp, SHUFFLE13);
+
+    let tmp = simd_swizzle!(m_0, m_1, SHUFFLE23);
+    let row3 = simd_swizzle!(m_2, m_3, SHUFFLE23);
+    let row2 = simd_swizzle!(tmp, row3, SHUFFLE02);
+    let row3 = simd_swizzle!(row3, tmp, SHUFFLE13);
+
+    let tmp = (row2 * row3).reverse().rotate_lanes_right::<2>();
+    let minor0 = row1 * tmp;
+    let minor1 = row0 * tmp;
+    let tmp = tmp.rotate_lanes_right::<2>();
+    let minor0 = (row1 * tmp) - minor0;
+    let minor1 = (row0 * tmp) - minor1;
+    let minor1 = minor1.rotate_lanes_right::<2>();
+
+    let tmp = (row1 * row2).reverse().rotate_lanes_right::<2>();
+    let minor0 = (row3 * tmp) + minor0;
+    let minor3 = row0 * tmp;
+    let tmp = tmp.rotate_lanes_right::<2>();
+
+    let minor0 = minor0 - row3 * tmp;
+    let minor3 = row0 * tmp - minor3;
+    let minor3 = minor3.rotate_lanes_right::<2>();
+
+    let tmp = (row3 * row1.rotate_lanes_right::<2>())
+        .reverse()
+        .rotate_lanes_right::<2>();
+    let row2 = row2.rotate_lanes_right::<2>();
+    let minor0 = row2 * tmp + minor0;
+    let minor2 = row0 * tmp;
+    let tmp = tmp.rotate_lanes_right::<2>();
+    let minor0 = minor0 - row2 * tmp;
+    let minor2 = row0 * tmp - minor2;
+    let minor2 = minor2.rotate_lanes_right::<2>();
+
+    let tmp = (row0 * row1).reverse().rotate_lanes_right::<2>();
+    let minor2 = minor2 + row3 * tmp;
+    let minor3 = row2 * tmp - minor3;
+    let tmp = tmp.rotate_lanes_right::<2>();
+    let minor2 = row3 * tmp - minor2;
+    let minor3 = minor3 - row2 * tmp;
+
+    let tmp = (row0 * row3).reverse().rotate_lanes_right::<2>();
+    let minor1 = minor1 - row2 * tmp;
+    let minor2 = row1 * tmp + minor2;
+    let tmp = tmp.rotate_lanes_right::<2>();
+    let minor1 = row2 * tmp + minor1;
+    let minor2 = minor2 - row1 * tmp;
+
+    let tmp = (row0 * row2).reverse().rotate_lanes_right::<2>();
+    let minor1 = row3 * tmp + minor1;
+    let minor3 = minor3 - row1 * tmp;
+    let tmp = tmp.rotate_lanes_right::<2>();
+    let minor1 = minor1 - row3 * tmp;
+    let minor3 = row1 * tmp + minor3;
+
+    let det = row0 * minor0;
+    let det = det.rotate_lanes_right::<2>() + det;
+    let det = det.reverse().rotate_lanes_right::<2>() + det;
+
+    if det.reduce_sum() == 0. {
+        return None;
+    }
+    // calculate the reciprocal
+    let tmp = f32x4::splat(1.0) / det;
+    let det = tmp + tmp - det * tmp * tmp;
+
+    let res0 = minor0 * det;
+    let res1 = minor1 * det;
+    let res2 = minor2 * det;
+    let res3 = minor3 * det;
+
+    let mut m = m;
+
+    m[0] = res0.to_array();
+    m[1] = res1.to_array();
+    m[2] = res2.to_array();
+    m[3] = res3.to_array();
+
+    Some(Matrix4x4(m))
+}
+
+#[cfg(test)]
+#[rustfmt::skip]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test() {
+    let tests: &[(Matrix4x4, Option<Matrix4x4>)] = &[
+        // Identity:
+        (Matrix4x4([
+            [1., 0., 0., 0.],
+            [0., 1., 0., 0.],
+            [0., 0., 1., 0.],
+            [0., 0., 0., 1.],
+         ]),
+         Some(Matrix4x4([
+             [1., 0., 0., 0.],
+             [0., 1., 0., 0.],
+             [0., 0., 1., 0.],
+             [0., 0., 0., 1.],
+         ]))
+        ),
+        // None:
+        (Matrix4x4([
+            [1., 2., 3., 4.],
+            [12., 11., 10., 9.],
+            [5., 6., 7., 8.],
+            [16., 15., 14., 13.],
+        ]),
+         None
+        ),
+        // Other:
+        (Matrix4x4([
+            [1., 1., 1., 0.],
+            [0., 3., 1., 2.],
+            [2., 3., 1., 0.],
+            [1., 0., 2., 1.],
+        ]),
+         Some(Matrix4x4([
+             [-3., -0.5,   1.5,  1.0],
+             [ 1., 0.25, -0.25, -0.5],
+             [ 3., 0.25, -1.25, -0.5],
+             [-3., 0.0,    1.0,  1.0],
+         ]))
+        ),
+
+
+    ];
+
+        for &(input, output) in tests {
+            assert_eq!(scalar_inv4x4(input), output);
+            assert_eq!(simd_inv4x4(input), output);
+        }
+    }
+}
+
+fn main() {
+    // Empty main to make cargo happy
+}
diff --git a/library/portable-simd/crates/core_simd/examples/nbody.rs b/library/portable-simd/crates/core_simd/examples/nbody.rs
new file mode 100644
index 000000000..df38a0096
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/examples/nbody.rs
@@ -0,0 +1,193 @@
+#![feature(portable_simd)]
+extern crate std_float;
+
+/// Benchmarks game nbody code
+/// Taken from the `packed_simd` crate
+/// Run this benchmark with `cargo test --example nbody`
+mod nbody {
+    use core_simd::simd::*;
+    #[allow(unused)] // False positive?
+    use std_float::StdFloat;
+
+    use std::f64::consts::PI;
+    const SOLAR_MASS: f64 = 4.0 * PI * PI;
+    const DAYS_PER_YEAR: f64 = 365.24;
+
+    #[derive(Debug, Clone, Copy)]
+    struct Body {
+        pub x: f64x4,
+        pub v: f64x4,
+        pub mass: f64,
+    }
+
+    const N_BODIES: usize = 5;
+    const BODIES: [Body; N_BODIES] = [
+        // sun:
+        Body {
+            x: f64x4::from_array([0., 0., 0., 0.]),
+            v: f64x4::from_array([0., 0., 0., 0.]),
+            mass: SOLAR_MASS,
+        },
+        // jupiter:
+        Body {
+            x: f64x4::from_array([
+                4.84143144246472090e+00,
+                -1.16032004402742839e+00,
+                -1.03622044471123109e-01,
+                0.,
+            ]),
+            v: f64x4::from_array([
+                1.66007664274403694e-03 * DAYS_PER_YEAR,
+                7.69901118419740425e-03 * DAYS_PER_YEAR,
+                -6.90460016972063023e-05 * DAYS_PER_YEAR,
+                0.,
+            ]),
+            mass: 9.54791938424326609e-04 * SOLAR_MASS,
+        },
+        // saturn:
+        Body {
+            x: f64x4::from_array([
+                8.34336671824457987e+00,
+                4.12479856412430479e+00,
+                -4.03523417114321381e-01,
+                0.,
+            ]),
+            v: f64x4::from_array([
+                -2.76742510726862411e-03 * DAYS_PER_YEAR,
+                4.99852801234917238e-03 * DAYS_PER_YEAR,
+                2.30417297573763929e-05 * DAYS_PER_YEAR,
+                0.,
+            ]),
+            mass: 2.85885980666130812e-04 * SOLAR_MASS,
+        },
+        // uranus:
+        Body {
+            x: f64x4::from_array([
+                1.28943695621391310e+01,
+                -1.51111514016986312e+01,
+                -2.23307578892655734e-01,
+                0.,
+            ]),
+            v: f64x4::from_array([
+                2.96460137564761618e-03 * DAYS_PER_YEAR,
+                2.37847173959480950e-03 * DAYS_PER_YEAR,
+                -2.96589568540237556e-05 * DAYS_PER_YEAR,
+                0.,
+            ]),
+            mass: 4.36624404335156298e-05 * SOLAR_MASS,
+        },
+        // neptune:
+        Body {
+            x: f64x4::from_array([
+                1.53796971148509165e+01,
+                -2.59193146099879641e+01,
+                1.79258772950371181e-01,
+                0.,
+            ]),
+            v: f64x4::from_array([
+                2.68067772490389322e-03 * DAYS_PER_YEAR,
+                1.62824170038242295e-03 * DAYS_PER_YEAR,
+                -9.51592254519715870e-05 * DAYS_PER_YEAR,
+                0.,
+            ]),
+            mass: 5.15138902046611451e-05 * SOLAR_MASS,
+        },
+    ];
+
+    fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
+        let (sun, rest) = bodies.split_at_mut(1);
+        let sun = &mut sun[0];
+        for body in rest {
+            let m_ratio = body.mass / SOLAR_MASS;
+            sun.v -= body.v * Simd::splat(m_ratio);
+        }
+    }
+
+    fn energy(bodies: &[Body; N_BODIES]) -> f64 {
+        let mut e = 0.;
+        for i in 0..N_BODIES {
+            let bi = &bodies[i];
+            e += bi.mass * (bi.v * bi.v).reduce_sum() * 0.5;
+            for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
+                let dx = bi.x - bj.x;
+                e -= bi.mass * bj.mass / (dx * dx).reduce_sum().sqrt()
+            }
+        }
+        e
+    }
+
+    fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
+        const N: usize = N_BODIES * (N_BODIES - 1) / 2;
+
+        // compute distance between bodies:
+        let mut r = [f64x4::splat(0.); N];
+        {
+            let mut i = 0;
+            for j in 0..N_BODIES {
+                for k in j + 1..N_BODIES {
+                    r[i] = bodies[j].x - bodies[k].x;
+                    i += 1;
+                }
+            }
+        }
+
+        let mut mag = [0.0; N];
+        for i in (0..N).step_by(2) {
+            let d2s = f64x2::from_array([
+                (r[i] * r[i]).reduce_sum(),
+                (r[i + 1] * r[i + 1]).reduce_sum(),
+            ]);
+            let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
+            mag[i] = dmags[0];
+            mag[i + 1] = dmags[1];
+        }
+
+        let mut i = 0;
+        for j in 0..N_BODIES {
+            for k in j + 1..N_BODIES {
+                let f = r[i] * Simd::splat(mag[i]);
+                bodies[j].v -= f * Simd::splat(bodies[k].mass);
+                bodies[k].v += f * Simd::splat(bodies[j].mass);
+                i += 1
+            }
+        }
+        for body in bodies {
+            body.x += Simd::splat(dt) * body.v
+        }
+    }
+
+    pub fn run(n: usize) -> (f64, f64) {
+        let mut bodies = BODIES;
+        offset_momentum(&mut bodies);
+        let energy_before = energy(&bodies);
+        for _ in 0..n {
+            advance(&mut bodies, 0.01);
+        }
+        let energy_after = energy(&bodies);
+
+        (energy_before, energy_after)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    // Good enough for demonstration purposes, not going for strictness here.
+    fn approx_eq_f64(a: f64, b: f64) -> bool {
+        (a - b).abs() < 0.00001
+    }
+    #[test]
+    fn test() {
+        const OUTPUT: [f64; 2] = [-0.169075164, -0.169087605];
+        let (energy_before, energy_after) = super::nbody::run(1000);
+        assert!(approx_eq_f64(energy_before, OUTPUT[0]));
+        assert!(approx_eq_f64(energy_after, OUTPUT[1]));
+    }
+}
+
+fn main() {
+    {
+        let (energy_before, energy_after) = nbody::run(1000);
+        println!("Energy before: {energy_before}");
+        println!("Energy after:  {energy_after}");
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
new file mode 100644
index 000000000..012182e09
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
@@ -0,0 +1,77 @@
+#![feature(portable_simd)]
+
+use core_simd::simd::*;
+
+fn a(i: usize, j: usize) -> f64 {
+    ((i + j) * (i + j + 1) / 2 + i + 1) as f64
+}
+
+fn mult_av(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.reduce_sum();
+    }
+}
+
+fn mult_atv(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.reduce_sum();
+    }
+}
+
+fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
+    mult_av(v, tmp);
+    mult_atv(tmp, out);
+}
+
+pub fn spectral_norm(n: usize) -> f64 {
+    assert!(n % 2 == 0, "only even lengths are accepted");
+
+    let mut u = vec![1.0; n];
+    let mut v = u.clone();
+    let mut tmp = u.clone();
+
+    for _ in 0..10 {
+        mult_atav(&u, &mut v, &mut tmp);
+        mult_atav(&v, &mut u, &mut tmp);
+    }
+    (dot(&u, &v) / dot(&v, &v)).sqrt()
+}
+
+fn dot(x: &[f64], y: &[f64]) -> f64 {
+    // This is auto-vectorized:
+    x.iter().zip(y).map(|(&x, &y)| x * y).sum()
+}
+
+#[cfg(test)]
+#[test]
+fn test() {
+    assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
+}
+
+fn main() {
+    // Empty main to make cargo happy
+}
diff --git a/library/portable-simd/crates/core_simd/src/core_simd_docs.md b/library/portable-simd/crates/core_simd/src/core_simd_docs.md
new file mode 100644
index 000000000..15e8ed025
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/core_simd_docs.md
@@ -0,0 +1,4 @@
+Portable SIMD module.
+
+This module offers a portable abstraction for SIMD operations
+that is not bound to any particular hardware architecture.
diff --git a/library/portable-simd/crates/core_simd/src/elements.rs b/library/portable-simd/crates/core_simd/src/elements.rs
new file mode 100644
index 000000000..701eb66b2
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/elements.rs
@@ -0,0 +1,11 @@
+mod float;
+mod int;
+mod uint;
+
+mod sealed {
+    pub trait Sealed {}
+}
+
+pub use float::*;
+pub use int::*;
+pub use uint::*;
diff --git a/library/portable-simd/crates/core_simd/src/elements/float.rs b/library/portable-simd/crates/core_simd/src/elements/float.rs
new file mode 100644
index 000000000..d60223270
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/elements/float.rs
@@ -0,0 +1,357 @@
+use super::sealed::Sealed;
+use crate::simd::{
+    intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialEq, SimdPartialOrd,
+    SupportedLaneCount,
+};
+
+/// Operations on SIMD vectors of floats.
+pub trait SimdFloat: Copy + Sealed {
+    /// Mask type used for manipulating this SIMD vector type.
+    type Mask;
+
+    /// Scalar type contained by this SIMD vector type.
+    type Scalar;
+
+    /// Bit representation of this SIMD vector type.
+    type Bits;
+
+    /// Raw transmutation to an unsigned integer vector type with the
+    /// same size and number of lanes.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn to_bits(self) -> Self::Bits;
+
+    /// Raw transmutation from an unsigned integer vector type with the
+    /// same size and number of lanes.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn from_bits(bits: Self::Bits) -> Self;
+
+    /// Produces a vector where every lane has the absolute value of the
+    /// equivalently-indexed lane in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn abs(self) -> Self;
+
+    /// Takes the reciprocal (inverse) of each lane, `1/x`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn recip(self) -> Self;
+
+    /// Converts each lane from radians to degrees.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn to_degrees(self) -> Self;
+
+    /// Converts each lane from degrees to radians.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn to_radians(self) -> Self;
+
+    /// Returns true for each lane if it has a positive sign, including
+    /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_sign_positive(self) -> Self::Mask;
+
+    /// Returns true for each lane if it has a negative sign, including
+    /// `-0.0`, `NaN`s with negative sign bit and negative infinity.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_sign_negative(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is `NaN`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_nan(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is positive infinity or negative infinity.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_infinite(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is neither infinite nor `NaN`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_finite(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is subnormal.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_subnormal(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is neither zero, infinite,
+    /// subnormal, nor `NaN`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_normal(self) -> Self::Mask;
+
+    /// Replaces each lane with a number that represents its sign.
+    ///
+    /// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
+    /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
+    /// * `NAN` if the number is `NAN`
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn signum(self) -> Self;
+
+    /// Returns each lane with the magnitude of `self` and the sign of `sign`.
+    ///
+    /// For any lane containing a `NAN`, a `NAN` with the sign of `sign` is returned.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn copysign(self, sign: Self) -> Self;
+
+    /// Returns the minimum of each lane.
+    ///
+    /// If one of the values is `NAN`, then the other value is returned.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_min(self, other: Self) -> Self;
+
+    /// Returns the maximum of each lane.
+    ///
+    /// If one of the values is `NAN`, then the other value is returned.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_max(self, other: Self) -> Self;
+
+    /// Restrict each lane to a certain interval unless it is NaN.
+    ///
+    /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
+    /// greater than `max`, and the corresponding lane in `min` if the lane is less
+    /// than `min`.  Otherwise returns the lane in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_clamp(self, min: Self, max: Self) -> Self;
+
+    /// Returns the sum of the lanes of the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{f32x2, SimdFloat};
+    /// let v = f32x2::from_array([1., 2.]);
+    /// assert_eq!(v.reduce_sum(), 3.);
+    /// ```
+    fn reduce_sum(self) -> Self::Scalar;
+
+    /// Reducing multiply.  Returns the product of the lanes of the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{f32x2, SimdFloat};
+    /// let v = f32x2::from_array([3., 4.]);
+    /// assert_eq!(v.reduce_product(), 12.);
+    /// ```
+    fn reduce_product(self) -> Self::Scalar;
+
+    /// Returns the maximum lane in the vector.
+    ///
+    /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+    /// return either.
+    ///
+    /// This function will not return `NaN` unless all lanes are `NaN`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{f32x2, SimdFloat};
+    /// let v = f32x2::from_array([1., 2.]);
+    /// assert_eq!(v.reduce_max(), 2.);
+    ///
+    /// // NaN values are skipped...
+    /// let v = f32x2::from_array([1., f32::NAN]);
+    /// assert_eq!(v.reduce_max(), 1.);
+    ///
+    /// // ...unless all values are NaN
+    /// let v = f32x2::from_array([f32::NAN, f32::NAN]);
+    /// assert!(v.reduce_max().is_nan());
+    /// ```
+    fn reduce_max(self) -> Self::Scalar;
+
+    /// Returns the minimum lane in the vector.
+    ///
+    /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+    /// return either.
+    ///
+    /// This function will not return `NaN` unless all lanes are `NaN`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{f32x2, SimdFloat};
+    /// let v = f32x2::from_array([3., 7.]);
+    /// assert_eq!(v.reduce_min(), 3.);
+    ///
+    /// // NaN values are skipped...
+    /// let v = f32x2::from_array([1., f32::NAN]);
+    /// assert_eq!(v.reduce_min(), 1.);
+    ///
+    /// // ...unless all values are NaN
+    /// let v = f32x2::from_array([f32::NAN, f32::NAN]);
+    /// assert!(v.reduce_min().is_nan());
+    /// ```
+    fn reduce_min(self) -> Self::Scalar;
+}
+
+macro_rules! impl_trait {
+    { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => {
+        $(
+        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+        }
+
+        impl<const LANES: usize> SimdFloat for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>;
+            type Scalar = $ty;
+            type Bits = Simd<$bits_ty, LANES>;
+
+            #[inline]
+            fn to_bits(self) -> Simd<$bits_ty, LANES> {
+                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
+                // Safety: transmuting between vector types is safe
+                unsafe { core::mem::transmute_copy(&self) }
+            }
+
+            #[inline]
+            fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self {
+                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
+                // Safety: transmuting between vector types is safe
+                unsafe { core::mem::transmute_copy(&bits) }
+            }
+
+            #[inline]
+            fn abs(self) -> Self {
+                // Safety: `self` is a float vector
+                unsafe { intrinsics::simd_fabs(self) }
+            }
+
+            #[inline]
+            fn recip(self) -> Self {
+                Self::splat(1.0) / self
+            }
+
+            #[inline]
+            fn to_degrees(self) -> Self {
+                // to_degrees uses a special constant for better precision, so extract that constant
+                self * Self::splat(Self::Scalar::to_degrees(1.))
+            }
+
+            #[inline]
+            fn to_radians(self) -> Self {
+                self * Self::splat(Self::Scalar::to_radians(1.))
+            }
+
+            #[inline]
+            fn is_sign_positive(self) -> Self::Mask {
+                !self.is_sign_negative()
+            }
+
+            #[inline]
+            fn is_sign_negative(self) -> Self::Mask {
+                let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1);
+                sign_bits.simd_gt(Simd::splat(0))
+            }
+
+            #[inline]
+            fn is_nan(self) -> Self::Mask {
+                self.simd_ne(self)
+            }
+
+            #[inline]
+            fn is_infinite(self) -> Self::Mask {
+                self.abs().simd_eq(Self::splat(Self::Scalar::INFINITY))
+            }
+
+            #[inline]
+            fn is_finite(self) -> Self::Mask {
+                self.abs().simd_lt(Self::splat(Self::Scalar::INFINITY))
+            }
+
+            #[inline]
+            fn is_subnormal(self) -> Self::Mask {
+                self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
+            }
+
+            #[inline]
+            #[must_use = "method returns a new mask and does not mutate the original value"]
+            fn is_normal(self) -> Self::Mask {
+                !(self.abs().simd_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
+            }
+
+            #[inline]
+            fn signum(self) -> Self {
+                self.is_nan().select(Self::splat(Self::Scalar::NAN), Self::splat(1.0).copysign(self))
+            }
+
+            #[inline]
+            fn copysign(self, sign: Self) -> Self {
+                let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits();
+                let magnitude = self.to_bits() & !Self::splat(-0.).to_bits();
+                Self::from_bits(sign_bit | magnitude)
+            }
+
+            #[inline]
+            fn simd_min(self, other: Self) -> Self {
+                // Safety: `self` and `other` are float vectors
+                unsafe { intrinsics::simd_fmin(self, other) }
+            }
+
+            #[inline]
+            fn simd_max(self, other: Self) -> Self {
+                // Safety: `self` and `other` are floating point vectors
+                unsafe { intrinsics::simd_fmax(self, other) }
+            }
+
+            #[inline]
+            fn simd_clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.simd_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                let mut x = self;
+                x = x.simd_lt(min).select(min, x);
+                x = x.simd_gt(max).select(max, x);
+                x
+            }
+
+            #[inline]
+            fn reduce_sum(self) -> Self::Scalar {
+                // LLVM sum is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+                    self.as_array().iter().sum()
+                } else {
+                    // Safety: `self` is a float vector
+                    unsafe { intrinsics::simd_reduce_add_ordered(self, 0.) }
+                }
+            }
+
+            #[inline]
+            fn reduce_product(self) -> Self::Scalar {
+                // LLVM product is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+                    self.as_array().iter().product()
+                } else {
+                    // Safety: `self` is a float vector
+                    unsafe { intrinsics::simd_reduce_mul_ordered(self, 1.) }
+                }
+            }
+
+            #[inline]
+            fn reduce_max(self) -> Self::Scalar {
+                // Safety: `self` is a float vector
+                unsafe { intrinsics::simd_reduce_max(self) }
+            }
+
+            #[inline]
+            fn reduce_min(self) -> Self::Scalar {
+                // Safety: `self` is a float vector
+                unsafe { intrinsics::simd_reduce_min(self) }
+            }
+        }
+        )*
+    }
+}
+
+impl_trait! { f32 { bits: u32, mask: i32 }, f64 { bits: u64, mask: i64 } }
diff --git a/library/portable-simd/crates/core_simd/src/elements/int.rs b/library/portable-simd/crates/core_simd/src/elements/int.rs
new file mode 100644
index 000000000..9b8c37ed4
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/elements/int.rs
@@ -0,0 +1,298 @@
+use super::sealed::Sealed;
+use crate::simd::{
+    intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialOrd, SupportedLaneCount,
+};
+
+/// Operations on SIMD vectors of signed integers.
+pub trait SimdInt: Copy + Sealed {
+    /// Mask type used for manipulating this SIMD vector type.
+    type Mask;
+
+    /// Scalar type contained by this SIMD vector type.
+    type Scalar;
+
+    /// Lanewise saturating add.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
+    /// use core::i32::{MIN, MAX};
+    /// let x = Simd::from_array([MIN, 0, 1, MAX]);
+    /// let max = Simd::splat(MAX);
+    /// let unsat = x + max;
+    /// let sat = x.saturating_add(max);
+    /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2]));
+    /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX]));
+    /// ```
+    fn saturating_add(self, second: Self) -> Self;
+
+    /// Lanewise saturating subtract.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
+    /// use core::i32::{MIN, MAX};
+    /// let x = Simd::from_array([MIN, -2, -1, MAX]);
+    /// let max = Simd::splat(MAX);
+    /// let unsat = x - max;
+    /// let sat = x.saturating_sub(max);
+    /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0]));
+    /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
+    fn saturating_sub(self, second: Self) -> Self;
+
+    /// Lanewise absolute value, implemented in Rust.
+    /// Every lane becomes its absolute value.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
+    /// use core::i32::{MIN, MAX};
+    /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
+    /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
+    /// ```
+    fn abs(self) -> Self;
+
+    /// Lanewise saturating absolute value, implemented in Rust.
+    /// As abs(), except the MIN value becomes MAX instead of itself.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
+    /// use core::i32::{MIN, MAX};
+    /// let xs = Simd::from_array([MIN, -2, 0, 3]);
+    /// let unsat = xs.abs();
+    /// let sat = xs.saturating_abs();
+    /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3]));
+    /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3]));
+    /// ```
+    fn saturating_abs(self) -> Self;
+
+    /// Lanewise saturating negation, implemented in Rust.
+    /// As neg(), except the MIN value becomes MAX instead of itself.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
+    /// use core::i32::{MIN, MAX};
+    /// let x = Simd::from_array([MIN, -2, 3, MAX]);
+    /// let unsat = -x;
+    /// let sat = x.saturating_neg();
+    /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1]));
+    /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1]));
+    /// ```
+    fn saturating_neg(self) -> Self;
+
+    /// Returns true for each positive lane and false if it is zero or negative.
+    fn is_positive(self) -> Self::Mask;
+
+    /// Returns true for each negative lane and false if it is zero or positive.
+    fn is_negative(self) -> Self::Mask;
+
+    /// Returns numbers representing the sign of each lane.
+    /// * `0` if the number is zero
+    /// * `1` if the number is positive
+    /// * `-1` if the number is negative
+    fn signum(self) -> Self;
+
+    /// Returns the sum of the lanes of the vector, with wrapping addition.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{i32x4, SimdInt};
+    /// let v = i32x4::from_array([1, 2, 3, 4]);
+    /// assert_eq!(v.reduce_sum(), 10);
+    ///
+    /// // SIMD integer addition is always wrapping
+    /// let v = i32x4::from_array([i32::MAX, 1, 0, 0]);
+    /// assert_eq!(v.reduce_sum(), i32::MIN);
+    /// ```
+    fn reduce_sum(self) -> Self::Scalar;
+
+    /// Returns the product of the lanes of the vector, with wrapping multiplication.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{i32x4, SimdInt};
+    /// let v = i32x4::from_array([1, 2, 3, 4]);
+    /// assert_eq!(v.reduce_product(), 24);
+    ///
+    /// // SIMD integer multiplication is always wrapping
+    /// let v = i32x4::from_array([i32::MAX, 2, 1, 1]);
+    /// assert!(v.reduce_product() < i32::MAX);
+    /// ```
+    fn reduce_product(self) -> Self::Scalar;
+
+    /// Returns the maximum lane in the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{i32x4, SimdInt};
+    /// let v = i32x4::from_array([1, 2, 3, 4]);
+    /// assert_eq!(v.reduce_max(), 4);
+    /// ```
+    fn reduce_max(self) -> Self::Scalar;
+
+    /// Returns the minimum lane in the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{i32x4, SimdInt};
+    /// let v = i32x4::from_array([1, 2, 3, 4]);
+    /// assert_eq!(v.reduce_min(), 1);
+    /// ```
+    fn reduce_min(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "and" across the lanes of the vector.
+    fn reduce_and(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "or" across the lanes of the vector.
+    fn reduce_or(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "xor" across the lanes of the vector.
+    fn reduce_xor(self) -> Self::Scalar;
+}
+
+macro_rules! impl_trait {
+    { $($ty:ty),* } => {
+        $(
+        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+        }
+
+        impl<const LANES: usize> SimdInt for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Mask<<$ty as SimdElement>::Mask, LANES>;
+            type Scalar = $ty;
+
+            #[inline]
+            fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
+                unsafe { intrinsics::simd_saturating_add(self, second) }
+            }
+
+            #[inline]
+            fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
+                unsafe { intrinsics::simd_saturating_sub(self, second) }
+            }
+
+            #[inline]
+            fn abs(self) -> Self {
+                const SHR: $ty = <$ty>::BITS as $ty - 1;
+                let m = self >> Simd::splat(SHR);
+                (self^m) - m
+            }
+
+            #[inline]
+            fn saturating_abs(self) -> Self {
+                // arith shift for -1 or 0 mask based on sign bit, giving 2s complement
+                const SHR: $ty = <$ty>::BITS as $ty - 1;
+                let m = self >> Simd::splat(SHR);
+                (self^m).saturating_sub(m)
+            }
+
+            #[inline]
+            fn saturating_neg(self) -> Self {
+                Self::splat(0).saturating_sub(self)
+            }
+
+            #[inline]
+            fn is_positive(self) -> Self::Mask {
+                self.simd_gt(Self::splat(0))
+            }
+
+            #[inline]
+            fn is_negative(self) -> Self::Mask {
+                self.simd_lt(Self::splat(0))
+            }
+
+            #[inline]
+            fn signum(self) -> Self {
+                self.is_positive().select(
+                    Self::splat(1),
+                    self.is_negative().select(Self::splat(-1), Self::splat(0))
+                )
+            }
+
+            #[inline]
+            fn reduce_sum(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_add_ordered(self, 0) }
+            }
+
+            #[inline]
+            fn reduce_product(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) }
+            }
+
+            #[inline]
+            fn reduce_max(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_max(self) }
+            }
+
+            #[inline]
+            fn reduce_min(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_min(self) }
+            }
+
+            #[inline]
+            fn reduce_and(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_and(self) }
+            }
+
+            #[inline]
+            fn reduce_or(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_or(self) }
+            }
+
+            #[inline]
+            fn reduce_xor(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_xor(self) }
+            }
+        }
+        )*
+    }
+}
+
+impl_trait! { i8, i16, i32, i64, isize }
diff --git a/library/portable-simd/crates/core_simd/src/elements/uint.rs b/library/portable-simd/crates/core_simd/src/elements/uint.rs
new file mode 100644
index 000000000..21e7e76eb
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/elements/uint.rs
@@ -0,0 +1,139 @@
+use super::sealed::Sealed;
+use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount};
+
+/// Operations on SIMD vectors of unsigned integers.
+pub trait SimdUint: Copy + Sealed {
+    /// Scalar type contained by this SIMD vector type.
+    type Scalar;
+
+    /// Lanewise saturating add.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdUint};
+    /// use core::u32::MAX;
+    /// let x = Simd::from_array([2, 1, 0, MAX]);
+    /// let max = Simd::splat(MAX);
+    /// let unsat = x + max;
+    /// let sat = x.saturating_add(max);
+    /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1]));
+    /// assert_eq!(sat, max);
+    /// ```
+    fn saturating_add(self, second: Self) -> Self;
+
+    /// Lanewise saturating subtract.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdUint};
+    /// use core::u32::MAX;
+    /// let x = Simd::from_array([2, 1, 0, MAX]);
+    /// let max = Simd::splat(MAX);
+    /// let unsat = x - max;
+    /// let sat = x.saturating_sub(max);
+    /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0]));
+    /// assert_eq!(sat, Simd::splat(0));
+    fn saturating_sub(self, second: Self) -> Self;
+
+    /// Returns the sum of the lanes of the vector, with wrapping addition.
+    fn reduce_sum(self) -> Self::Scalar;
+
+    /// Returns the product of the lanes of the vector, with wrapping multiplication.
+    fn reduce_product(self) -> Self::Scalar;
+
+    /// Returns the maximum lane in the vector.
+    fn reduce_max(self) -> Self::Scalar;
+
+    /// Returns the minimum lane in the vector.
+    fn reduce_min(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "and" across the lanes of the vector.
+    fn reduce_and(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "or" across the lanes of the vector.
+    fn reduce_or(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "xor" across the lanes of the vector.
+    fn reduce_xor(self) -> Self::Scalar;
+}
+
+macro_rules! impl_trait {
+    { $($ty:ty),* } => {
+        $(
+        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+        }
+
+        impl<const LANES: usize> SimdUint for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Scalar = $ty;
+
+            #[inline]
+            fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
+                unsafe { intrinsics::simd_saturating_add(self, second) }
+            }
+
+            #[inline]
+            fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
+                unsafe { intrinsics::simd_saturating_sub(self, second) }
+            }
+
+            #[inline]
+            fn reduce_sum(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_add_ordered(self, 0) }
+            }
+
+            #[inline]
+            fn reduce_product(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) }
+            }
+
+            #[inline]
+            fn reduce_max(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_max(self) }
+            }
+
+            #[inline]
+            fn reduce_min(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_min(self) }
+            }
+
+            #[inline]
+            fn reduce_and(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_and(self) }
+            }
+
+            #[inline]
+            fn reduce_or(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_or(self) }
+            }
+
+            #[inline]
+            fn reduce_xor(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_xor(self) }
+            }
+        }
+        )*
+    }
+}
+
+impl_trait! { u8, u16, u32, u64, usize }
diff --git a/library/portable-simd/crates/core_simd/src/eq.rs b/library/portable-simd/crates/core_simd/src/eq.rs
new file mode 100644
index 000000000..c7111f720
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/eq.rs
@@ -0,0 +1,73 @@
+use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount};
+
+/// Parallel `PartialEq`.
+pub trait SimdPartialEq {
+    /// The mask type returned by each comparison.
+    type Mask;
+
+    /// Test if each lane is equal to the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_eq(self, other: Self) -> Self::Mask;
+
+    /// Test if each lane is equal to the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_ne(self, other: Self) -> Self::Mask;
+}
+
+macro_rules! impl_number {
+    { $($number:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialEq for Simd<$number, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Mask<<$number as SimdElement>::Mask, LANES>;
+
+            #[inline]
+            fn simd_eq(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
+            }
+
+            #[inline]
+            fn simd_ne(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
+            }
+        }
+        )*
+    }
+}
+
+impl_number! { f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize }
+
+macro_rules! impl_mask {
+    { $($integer:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialEq for Mask<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Self;
+
+            #[inline]
+            fn simd_eq(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_eq(self.to_int(), other.to_int())) }
+            }
+
+            #[inline]
+            fn simd_ne(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_ne(self.to_int(), other.to_int())) }
+            }
+        }
+        )*
+    }
+}
+
+impl_mask! { i8, i16, i32, i64, isize }
diff --git a/library/portable-simd/crates/core_simd/src/fmt.rs b/library/portable-simd/crates/core_simd/src/fmt.rs
new file mode 100644
index 000000000..dbd9839c4
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/fmt.rs
@@ -0,0 +1,39 @@
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::fmt;
+
+macro_rules! impl_fmt_trait {
+    { $($trait:ident,)* } => {
+        $(
+            impl<T, const LANES: usize> fmt::$trait for Simd<T, LANES>
+            where
+                LaneCount<LANES>: SupportedLaneCount,
+                T: SimdElement + fmt::$trait,
+            {
+                fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                    #[repr(transparent)]
+                    struct Wrapper<'a, T: fmt::$trait>(&'a T);
+
+                    impl<T: fmt::$trait> fmt::Debug for Wrapper<'_, T> {
+                        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                            self.0.fmt(f)
+                        }
+                    }
+
+                    f.debug_list()
+                        .entries(self.as_array().iter().map(|x| Wrapper(x)))
+                        .finish()
+                }
+            }
+        )*
+    }
+}
+
+impl_fmt_trait! {
+    Debug,
+    Binary,
+    LowerExp,
+    UpperExp,
+    Octal,
+    LowerHex,
+    UpperHex,
+}
diff --git a/library/portable-simd/crates/core_simd/src/intrinsics.rs b/library/portable-simd/crates/core_simd/src/intrinsics.rs
new file mode 100644
index 000000000..962c83a78
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs
@@ -0,0 +1,153 @@
+//! This module contains the LLVM intrinsics bindings that provide the functionality for this
+//! crate.
+//!
+//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
+//!
+//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
+//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
+//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
+//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
+//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
+//!   this means that division by poison or undef is like division by zero, which means it inflicts...
+//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
+//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
+//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
+//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
+//!
+//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
+//!
+//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
+
+// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
+// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
+// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
+extern "platform-intrinsic" {
+    /// add/fadd
+    pub(crate) fn simd_add<T>(x: T, y: T) -> T;
+
+    /// sub/fsub
+    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
+
+    /// mul/fmul
+    pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
+
+    /// udiv/sdiv/fdiv
+    /// ints and uints: {s,u}div incur UB if division by zero occurs.
+    /// ints: sdiv is UB for int::MIN / -1.
+    /// floats: fdiv is never UB, but may create NaNs or infinities.
+    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
+
+    /// urem/srem/frem
+    /// ints and uints: {s,u}rem incur UB if division by zero occurs.
+    /// ints: srem is UB for int::MIN / -1.
+    /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
+    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
+
+    /// shl
+    /// for (u)ints. poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
+
+    /// ints: ashr
+    /// uints: lshr
+    /// poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
+
+    /// and
+    pub(crate) fn simd_and<T>(x: T, y: T) -> T;
+
+    /// or
+    pub(crate) fn simd_or<T>(x: T, y: T) -> T;
+
+    /// xor
+    pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
+
+    /// getelementptr (without inbounds)
+    pub(crate) fn simd_arith_offset<T, U>(ptrs: T, offsets: U) -> T;
+
+    /// fptoui/fptosi/uitofp/sitofp
+    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
+    /// but the truncated value must fit in the target type or the result is poison.
+    /// use `simd_as` instead for a cast that performs a saturating conversion.
+    pub(crate) fn simd_cast<T, U>(x: T) -> U;
+    /// follows Rust's `T as U` semantics, including saturating float casts
+    /// which amounts to the same as `simd_cast` for many cases
+    pub(crate) fn simd_as<T, U>(x: T) -> U;
+
+    /// neg/fneg
+    /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
+    /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
+    /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
+    pub(crate) fn simd_neg<T>(x: T) -> T;
+
+    /// fabs
+    pub(crate) fn simd_fabs<T>(x: T) -> T;
+
+    // minnum/maxnum
+    pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
+
+    // these return Simd<int, N> with the same BITS size as the inputs
+    pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
+    pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
+    pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
+    pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
+    pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
+    pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
+
+    // shufflevector
+    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
+    pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
+
+    /// llvm.masked.gather
+    /// like a loop of pointer reads
+    /// val: vector of values to select if a lane is masked
+    /// ptr: vector of pointers to read from
+    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
+    /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
+    /// FIXME: review this if/when we fix up our mask story in general?
+    pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+    /// llvm.masked.scatter
+    /// like gather, but more spicy, as it writes instead of reads
+    pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
+
+    // {s,u}add.sat
+    pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
+
+    // {s,u}sub.sat
+    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
+
+    // reductions
+    // llvm.vector.reduce.{add,fadd}
+    pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    // llvm.vector.reduce.{mul,fmul}
+    pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
+    #[allow(unused)]
+    pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
+    #[allow(unused)]
+    pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
+    pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
+
+    // truncate integer vector to bitmask
+    // `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and
+    // returns either an unsigned integer or array of `u8`.
+    // Every element in the vector becomes a single bit in the returned bitmask.
+    // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
+    // The bit order of the result depends on the byte endianness. LSB-first for little
+    // endian and MSB-first for big endian.
+    //
+    // UB if called on a vector with values other than 0 and -1.
+    #[allow(unused)]
+    pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
+
+    // select
+    // first argument is a vector of integers, -1 (all bits 1) is "true"
+    // logically equivalent to (yes & m) | (no & (m^-1),
+    // but you can use it on floats.
+    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
+    #[allow(unused)]
+    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
+}
diff --git a/library/portable-simd/crates/core_simd/src/iter.rs b/library/portable-simd/crates/core_simd/src/iter.rs
new file mode 100644
index 000000000..3275b4db8
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/iter.rs
@@ -0,0 +1,58 @@
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use core::{
+    iter::{Product, Sum},
+    ops::{Add, Mul},
+};
+
+macro_rules! impl_traits {
+    { $type:ty } => {
+        impl<const LANES: usize> Sum<Self> for Simd<$type, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Simd::splat(0 as $type), Add::add)
+            }
+        }
+
+        impl<const LANES: usize> Product<Self> for Simd<$type, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Simd::splat(1 as $type), Mul::mul)
+            }
+        }
+
+        impl<'a, const LANES: usize> Sum<&'a Self> for Simd<$type, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Simd::splat(0 as $type), Add::add)
+            }
+        }
+
+        impl<'a, const LANES: usize> Product<&'a Self> for Simd<$type, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Simd::splat(1 as $type), Mul::mul)
+            }
+        }
+    }
+}
+
+impl_traits! { f32 }
+impl_traits! { f64 }
+impl_traits! { u8 }
+impl_traits! { u16 }
+impl_traits! { u32 }
+impl_traits! { u64 }
+impl_traits! { usize }
+impl_traits! { i8 }
+impl_traits! { i16 }
+impl_traits! { i32 }
+impl_traits! { i64 }
+impl_traits! { isize }
diff --git a/library/portable-simd/crates/core_simd/src/lane_count.rs b/library/portable-simd/crates/core_simd/src/lane_count.rs
new file mode 100644
index 000000000..63723e2ec
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/lane_count.rs
@@ -0,0 +1,46 @@
+mod sealed {
+    pub trait Sealed {}
+}
+use sealed::Sealed;
+
+/// Specifies the number of lanes in a SIMD vector as a type.
+pub struct LaneCount<const LANES: usize>;
+
+impl<const LANES: usize> LaneCount<LANES> {
+    /// The number of bytes in a bitmask with this many lanes.
+    pub const BITMASK_LEN: usize = (LANES + 7) / 8;
+}
+
+/// Statically guarantees that a lane count is marked as supported.
+///
+/// This trait is *sealed*: the list of implementors below is total.
+/// Users do not have the ability to mark additional `LaneCount<N>` values as supported.
+/// Only SIMD vectors with supported lane counts are constructable.
+pub trait SupportedLaneCount: Sealed {
+    #[doc(hidden)]
+    type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
+}
+
+impl<const LANES: usize> Sealed for LaneCount<LANES> {}
+
+impl SupportedLaneCount for LaneCount<1> {
+    type BitMask = [u8; 1];
+}
+impl SupportedLaneCount for LaneCount<2> {
+    type BitMask = [u8; 1];
+}
+impl SupportedLaneCount for LaneCount<4> {
+    type BitMask = [u8; 1];
+}
+impl SupportedLaneCount for LaneCount<8> {
+    type BitMask = [u8; 1];
+}
+impl SupportedLaneCount for LaneCount<16> {
+    type BitMask = [u8; 2];
+}
+impl SupportedLaneCount for LaneCount<32> {
+    type BitMask = [u8; 4];
+}
+impl SupportedLaneCount for LaneCount<64> {
+    type BitMask = [u8; 8];
+}
diff --git a/library/portable-simd/crates/core_simd/src/lib.rs b/library/portable-simd/crates/core_simd/src/lib.rs
new file mode 100644
index 000000000..715f258f6
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@@ -0,0 +1,22 @@
+#![no_std]
+#![feature(
+    convert_float_to_int,
+    decl_macro,
+    intra_doc_pointers,
+    platform_intrinsics,
+    repr_simd,
+    simd_ffi,
+    staged_api,
+    stdsimd
+)]
+#![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))]
+#![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))]
+#![warn(missing_docs)]
+#![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
+#![unstable(feature = "portable_simd", issue = "86656")]
+//! Portable SIMD module.
+
+#[path = "mod.rs"]
+mod core_simd;
+pub use self::core_simd::simd;
+pub use simd::*;
diff --git a/library/portable-simd/crates/core_simd/src/masks.rs b/library/portable-simd/crates/core_simd/src/masks.rs
new file mode 100644
index 000000000..c36c336d8
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@@ -0,0 +1,595 @@
+//! Types and traits associated with masking lanes of vectors.
+//! Types representing
+#![allow(non_camel_case_types)]
+
+#[cfg_attr(
+    not(all(target_arch = "x86_64", target_feature = "avx512f")),
+    path = "masks/full_masks.rs"
+)]
+#[cfg_attr(
+    all(target_arch = "x86_64", target_feature = "avx512f"),
+    path = "masks/bitmask.rs"
+)]
+mod mask_impl;
+
+mod to_bitmask;
+pub use to_bitmask::ToBitMask;
+
+#[cfg(feature = "generic_const_exprs")]
+pub use to_bitmask::{bitmask_len, ToBitMaskArray};
+
+use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
+use core::cmp::Ordering;
+use core::{fmt, mem};
+
+mod sealed {
+    use super::*;
+
+    /// Not only does this seal the `MaskElement` trait, but these functions prevent other traits
+    /// from bleeding into the parent bounds.
+    ///
+    /// For example, `eq` could be provided by requiring `MaskElement: PartialEq`, but that would
+    /// prevent us from ever removing that bound, or from implementing `MaskElement` on
+    /// non-`PartialEq` types in the future.
+    pub trait Sealed {
+        fn valid<const LANES: usize>(values: Simd<Self, LANES>) -> bool
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+            Self: SimdElement;
+
+        fn eq(self, other: Self) -> bool;
+
+        const TRUE: Self;
+
+        const FALSE: Self;
+    }
+}
+use sealed::Sealed;
+
+/// Marker trait for types that may be used as SIMD mask elements.
+///
+/// # Safety
+/// Type must be a signed integer.
+pub unsafe trait MaskElement: SimdElement + Sealed {}
+
+macro_rules! impl_element {
+    { $ty:ty } => {
+        impl Sealed for $ty {
+            fn valid<const LANES: usize>(value: Simd<Self, LANES>) -> bool
+            where
+                LaneCount<LANES>: SupportedLaneCount,
+            {
+                (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all()
+            }
+
+            fn eq(self, other: Self) -> bool { self == other }
+
+            const TRUE: Self = -1;
+            const FALSE: Self = 0;
+        }
+
+        // Safety: this is a valid mask element type
+        unsafe impl MaskElement for $ty {}
+    }
+}
+
+impl_element! { i8 }
+impl_element! { i16 }
+impl_element! { i32 }
+impl_element! { i64 }
+impl_element! { isize }
+
+/// A SIMD vector mask for `LANES` elements of width specified by `Element`.
+///
+/// Masks represent boolean inclusion/exclusion on a per-lane basis.
+///
+/// The layout of this type is unspecified.
+#[repr(transparent)]
+pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>)
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    /// Construct a mask by setting all lanes to the given value.
+    pub fn splat(value: bool) -> Self {
+        Self(mask_impl::Mask::splat(value))
+    }
+
+    /// Converts an array of bools to a SIMD mask.
+    pub fn from_array(array: [bool; LANES]) -> Self {
+        // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
+        //     true:    0b_0000_0001
+        //     false:   0b_0000_0000
+        // Thus, an array of bools is also a valid array of bytes: [u8; N]
+        // This would be hypothetically valid as an "in-place" transmute,
+        // but these are "dependently-sized" types, so copy elision it is!
+        unsafe {
+            let bytes: [u8; LANES] = mem::transmute_copy(&array);
+            let bools: Simd<i8, LANES> =
+                intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
+            Mask::from_int_unchecked(intrinsics::simd_cast(bools))
+        }
+    }
+
+    /// Converts a SIMD mask to an array of bools.
+    pub fn to_array(self) -> [bool; LANES] {
+        // This follows mostly the same logic as from_array.
+        // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
+        //     true:    0b_0000_0001
+        //     false:   0b_0000_0000
+        // Thus, an array of bools is also a valid array of bytes: [u8; N]
+        // Since our masks are equal to integers where all bits are set,
+        // we can simply convert them to i8s, and then bitand them by the
+        // bitpattern for Rust's "true" bool.
+        // This would be hypothetically valid as an "in-place" transmute,
+        // but these are "dependently-sized" types, so copy elision it is!
+        unsafe {
+            let mut bytes: Simd<i8, LANES> = intrinsics::simd_cast(self.to_int());
+            bytes &= Simd::splat(1i8);
+            mem::transmute_copy(&bytes)
+        }
+    }
+
+    /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+    /// represents `true`.
+    ///
+    /// # Safety
+    /// All lanes must be either 0 or -1.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        // Safety: the caller must confirm this invariant
+        unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
+    }
+
+    /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+    /// represents `true`.
+    ///
+    /// # Panics
+    /// Panics if any lane is not 0 or -1.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn from_int(value: Simd<T, LANES>) -> Self {
+        assert!(T::valid(value), "all values must be either 0 or -1",);
+        // Safety: the validity has been checked
+        unsafe { Self::from_int_unchecked(value) }
+    }
+
+    /// Converts the mask to a vector of integers, where 0 represents `false` and -1
+    /// represents `true`.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    pub fn to_int(self) -> Simd<T, LANES> {
+        self.0.to_int()
+    }
+
+    /// Converts the mask to a mask of any other lane size.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn cast<U: MaskElement>(self) -> Mask<U, LANES> {
+        Mask(self.0.convert())
+    }
+
+    /// Tests the value of the specified lane.
+    ///
+    /// # Safety
+    /// `lane` must be less than `LANES`.
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        // Safety: the caller must confirm this invariant
+        unsafe { self.0.test_unchecked(lane) }
+    }
+
+    /// Tests the value of the specified lane.
+    ///
+    /// # Panics
+    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub fn test(&self, lane: usize) -> bool {
+        assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
+        unsafe { self.test_unchecked(lane) }
+    }
+
+    /// Sets the value of the specified lane.
+    ///
+    /// # Safety
+    /// `lane` must be less than `LANES`.
+    #[inline]
+    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        // Safety: the caller must confirm this invariant
+        unsafe {
+            self.0.set_unchecked(lane, value);
+        }
+    }
+
+    /// Sets the value of the specified lane.
+    ///
+    /// # Panics
+    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+    #[inline]
+    pub fn set(&mut self, lane: usize, value: bool) {
+        assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
+        unsafe {
+            self.set_unchecked(lane, value);
+        }
+    }
+
+    /// Returns true if any lane is set, or false otherwise.
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub fn any(self) -> bool {
+        self.0.any()
+    }
+
+    /// Returns true if all lanes are set, or false otherwise.
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub fn all(self) -> bool {
+        self.0.all()
+    }
+}
+
+// vector/array conversion
+impl<T, const LANES: usize> From<[bool; LANES]> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn from(array: [bool; LANES]) -> Self {
+        Self::from_array(array)
+    }
+}
+
+impl<T, const LANES: usize> From<Mask<T, LANES>> for [bool; LANES]
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn from(vector: Mask<T, LANES>) -> Self {
+        vector.to_array()
+    }
+}
+
+impl<T, const LANES: usize> Default for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    #[must_use = "method returns a defaulted mask with all lanes set to false (0)"]
+    fn default() -> Self {
+        Self::splat(false)
+    }
+}
+
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+where
+    T: MaskElement + PartialEq,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    fn eq(&self, other: &Self) -> bool {
+        self.0 == other.0
+    }
+}
+
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+where
+    T: MaskElement + PartialOrd,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    #[must_use = "method returns a new Ordering and does not mutate the original value"]
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.0.partial_cmp(&other.0)
+    }
+}
+
+impl<T, const LANES: usize> fmt::Debug for Mask<T, LANES>
+where
+    T: MaskElement + fmt::Debug,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list()
+            .entries((0..LANES).map(|lane| self.test(lane)))
+            .finish()
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitand(self, rhs: Self) -> Self {
+        Self(self.0 & rhs.0)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitand(self, rhs: bool) -> Self {
+        self & Self::splat(rhs)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd<Mask<T, LANES>> for bool
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Mask<T, LANES>;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitand(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+        Mask::splat(self) & rhs
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitor(self, rhs: Self) -> Self {
+        Self(self.0 | rhs.0)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitor(self, rhs: bool) -> Self {
+        self | Self::splat(rhs)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr<Mask<T, LANES>> for bool
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Mask<T, LANES>;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitor(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+        Mask::splat(self) | rhs
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitxor(self, rhs: Self) -> Self::Output {
+        Self(self.0 ^ rhs.0)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitxor(self, rhs: bool) -> Self::Output {
+        self ^ Self::splat(rhs)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor<Mask<T, LANES>> for bool
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Mask<T, LANES>;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitxor(self, rhs: Mask<T, LANES>) -> Self::Output {
+        Mask::splat(self) ^ rhs
+    }
+}
+
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Mask<T, LANES>;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn not(self) -> Self::Output {
+        Self(!self.0)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAndAssign for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitand_assign(&mut self, rhs: Self) {
+        self.0 = self.0 & rhs.0;
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAndAssign<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitand_assign(&mut self, rhs: bool) {
+        *self &= Self::splat(rhs);
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOrAssign for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitor_assign(&mut self, rhs: Self) {
+        self.0 = self.0 | rhs.0;
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOrAssign<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitor_assign(&mut self, rhs: bool) {
+        *self |= Self::splat(rhs);
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXorAssign for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitxor_assign(&mut self, rhs: Self) {
+        self.0 = self.0 ^ rhs.0;
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXorAssign<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitxor_assign(&mut self, rhs: bool) {
+        *self ^= Self::splat(rhs);
+    }
+}
+
+/// A mask for SIMD vectors with eight elements of 8 bits.
+pub type mask8x8 = Mask<i8, 8>;
+
+/// A mask for SIMD vectors with 16 elements of 8 bits.
+pub type mask8x16 = Mask<i8, 16>;
+
+/// A mask for SIMD vectors with 32 elements of 8 bits.
+pub type mask8x32 = Mask<i8, 32>;
+
+/// A mask for SIMD vectors with 64 elements of 8 bits.
+pub type mask8x64 = Mask<i8, 64>;
+
+/// A mask for SIMD vectors with four elements of 16 bits.
+pub type mask16x4 = Mask<i16, 4>;
+
+/// A mask for SIMD vectors with eight elements of 16 bits.
+pub type mask16x8 = Mask<i16, 8>;
+
+/// A mask for SIMD vectors with 16 elements of 16 bits.
+pub type mask16x16 = Mask<i16, 16>;
+
+/// A mask for SIMD vectors with 32 elements of 16 bits.
+pub type mask16x32 = Mask<i16, 32>;
+
+/// A mask for SIMD vectors with two elements of 32 bits.
+pub type mask32x2 = Mask<i32, 2>;
+
+/// A mask for SIMD vectors with four elements of 32 bits.
+pub type mask32x4 = Mask<i32, 4>;
+
+/// A mask for SIMD vectors with eight elements of 32 bits.
+pub type mask32x8 = Mask<i32, 8>;
+
+/// A mask for SIMD vectors with 16 elements of 32 bits.
+pub type mask32x16 = Mask<i32, 16>;
+
+/// A mask for SIMD vectors with two elements of 64 bits.
+pub type mask64x2 = Mask<i64, 2>;
+
+/// A mask for SIMD vectors with four elements of 64 bits.
+pub type mask64x4 = Mask<i64, 4>;
+
+/// A mask for SIMD vectors with eight elements of 64 bits.
+pub type mask64x8 = Mask<i64, 8>;
+
+/// A mask for SIMD vectors with two elements of pointer width.
+pub type masksizex2 = Mask<isize, 2>;
+
+/// A mask for SIMD vectors with four elements of pointer width.
+pub type masksizex4 = Mask<isize, 4>;
+
+/// A mask for SIMD vectors with eight elements of pointer width.
+pub type masksizex8 = Mask<isize, 8>;
+
+macro_rules! impl_from {
+    { $from:ty  => $($to:ty),* } => {
+        $(
+        impl<const LANES: usize> From<Mask<$from, LANES>> for Mask<$to, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            fn from(value: Mask<$from, LANES>) -> Self {
+                value.cast()
+            }
+        }
+        )*
+    }
+}
+impl_from! { i8 => i16, i32, i64, isize }
+impl_from! { i16 => i32, i64, isize, i8 }
+impl_from! { i32 => i64, isize, i8, i16 }
+impl_from! { i64 => isize, i8, i16, i32 }
+impl_from! { isize => i8, i16, i32, i64 }
diff --git a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
new file mode 100644
index 000000000..365ecc0a3
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
@@ -0,0 +1,246 @@
+#![allow(unused_imports)]
+use super::MaskElement;
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+use core::marker::PhantomData;
+
+/// A mask where each lane is represented by a single bit.
+#[repr(transparent)]
+pub struct Mask<T, const LANES: usize>(
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask,
+    PhantomData<T>,
+)
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn eq(&self, other: &Self) -> bool {
+        self.0.as_ref() == other.0.as_ref()
+    }
+}
+
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        self.0.as_ref().partial_cmp(other.0.as_ref())
+    }
+}
+
+impl<T, const LANES: usize> Eq for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Ord for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        self.0.as_ref().cmp(other.0.as_ref())
+    }
+}
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn splat(value: bool) -> Self {
+        let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default();
+        if value {
+            mask.as_mut().fill(u8::MAX)
+        } else {
+            mask.as_mut().fill(u8::MIN)
+        }
+        if LANES % 8 > 0 {
+            *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+        }
+        Self(mask, PhantomData)
+    }
+
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
+    }
+
+    #[inline]
+    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        unsafe {
+            self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8)
+        }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    pub fn to_int(self) -> Simd<T, LANES> {
+        unsafe {
+            intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE))
+        }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
+    }
+
+    #[cfg(feature = "generic_const_exprs")]
+    #[inline]
+    #[must_use = "method returns a new array and does not mutate the original value"]
+    pub fn to_bitmask_array<const N: usize>(self) -> [u8; N] {
+        assert!(core::mem::size_of::<Self>() == N);
+
+        // Safety: converting an integer to an array of bytes of the same size is safe
+        unsafe { core::mem::transmute_copy(&self.0) }
+    }
+
+    #[cfg(feature = "generic_const_exprs")]
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn from_bitmask_array<const N: usize>(bitmask: [u8; N]) -> Self {
+        assert!(core::mem::size_of::<Self>() == N);
+
+        // Safety: converting an array of bytes to an integer of the same size is safe
+        Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+    }
+
+    #[inline]
+    pub fn to_bitmask_integer<U>(self) -> U
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: these are the same types
+        unsafe { core::mem::transmute_copy(&self.0) }
+    }
+
+    #[inline]
+    pub fn from_bitmask_integer<U>(bitmask: U) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: these are the same types
+        unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn convert<U>(self) -> Mask<U, LANES>
+    where
+        U: MaskElement,
+    {
+        // Safety: bitmask layout does not depend on the element width
+        unsafe { core::mem::transmute_copy(&self) }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub fn any(self) -> bool {
+        self != Self::splat(false)
+    }
+
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub fn all(self) -> bool {
+        self == Self::splat(true)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitand(mut self, rhs: Self) -> Self {
+        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+            *l &= r;
+        }
+        self
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitor(mut self, rhs: Self) -> Self {
+        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+            *l |= r;
+        }
+        self
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitxor(mut self, rhs: Self) -> Self::Output {
+        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+            *l ^= r;
+        }
+        self
+    }
+}
+
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn not(mut self) -> Self::Output {
+        for x in self.0.as_mut() {
+            *x = !*x;
+        }
+        if LANES % 8 > 0 {
+            *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+        }
+        self
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
new file mode 100644
index 000000000..adf0fcbea
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
@@ -0,0 +1,323 @@
+//! Masks that take up full SIMD vector registers.
+
+use super::MaskElement;
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+
+#[cfg(feature = "generic_const_exprs")]
+use crate::simd::ToBitMaskArray;
+
+#[repr(transparent)]
+pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+where
+    T: MaskElement + PartialEq,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn eq(&self, other: &Self) -> bool {
+        self.0.eq(&other.0)
+    }
+}
+
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+where
+    T: MaskElement + PartialOrd,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        self.0.partial_cmp(&other.0)
+    }
+}
+
+impl<T, const LANES: usize> Eq for Mask<T, LANES>
+where
+    T: MaskElement + Eq,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Ord for Mask<T, LANES>
+where
+    T: MaskElement + Ord,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        self.0.cmp(&other.0)
+    }
+}
+
+// Used for bitmask bit order workaround
+pub(crate) trait ReverseBits {
+    // Reverse the least significant `n` bits of `self`.
+    // (Remaining bits must be 0.)
+    fn reverse_bits(self, n: usize) -> Self;
+}
+
+macro_rules! impl_reverse_bits {
+    { $($int:ty),* } => {
+        $(
+        impl ReverseBits for $int {
+            #[inline(always)]
+            fn reverse_bits(self, n: usize) -> Self {
+                let rev = <$int>::reverse_bits(self);
+                let bitsize = core::mem::size_of::<$int>() * 8;
+                if n < bitsize {
+                    // Shift things back to the right
+                    rev >> (bitsize - n)
+                } else {
+                    rev
+                }
+            }
+        }
+        )*
+    }
+}
+
+impl_reverse_bits! { u8, u16, u32, u64 }
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn splat(value: bool) -> Self {
+        Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
+    }
+
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        T::eq(self.0[lane], T::TRUE)
+    }
+
+    #[inline]
+    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        self.0[lane] = if value { T::TRUE } else { T::FALSE }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    pub fn to_int(self) -> Simd<T, LANES> {
+        self.0
+    }
+
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        Self(value)
+    }
+
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn convert<U>(self) -> Mask<U, LANES>
+    where
+        U: MaskElement,
+    {
+        // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
+        unsafe { Mask(intrinsics::simd_cast(self.0)) }
+    }
+
+    #[cfg(feature = "generic_const_exprs")]
+    #[inline]
+    #[must_use = "method returns a new array and does not mutate the original value"]
+    pub fn to_bitmask_array<const N: usize>(self) -> [u8; N]
+    where
+        super::Mask<T, LANES>: ToBitMaskArray,
+        [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
+    {
+        assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
+
+        // Safety: N is the correct bitmask size
+        unsafe {
+            // Compute the bitmask
+            let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
+                intrinsics::simd_bitmask(self.0);
+
+            // Transmute to the return type, previously asserted to be the same size
+            let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask);
+
+            // LLVM assumes bit order should match endianness
+            if cfg!(target_endian = "big") {
+                for x in bitmask.as_mut() {
+                    *x = x.reverse_bits();
+                }
+            };
+
+            bitmask
+        }
+    }
+
+    #[cfg(feature = "generic_const_exprs")]
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn from_bitmask_array<const N: usize>(mut bitmask: [u8; N]) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMaskArray,
+        [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
+    {
+        assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
+
+        // Safety: N is the correct bitmask size
+        unsafe {
+            // LLVM assumes bit order should match endianness
+            if cfg!(target_endian = "big") {
+                for x in bitmask.as_mut() {
+                    *x = x.reverse_bits();
+                }
+            }
+
+            // Transmute to the bitmask type, previously asserted to be the same size
+            let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
+                core::mem::transmute_copy(&bitmask);
+
+            // Compute the regular mask
+            Self::from_int_unchecked(intrinsics::simd_select_bitmask(
+                bitmask,
+                Self::splat(true).to_int(),
+                Self::splat(false).to_int(),
+            ))
+        }
+    }
+
+    #[inline]
+    pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: U is required to be the appropriate bitmask type
+        let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
+
+        // LLVM assumes bit order should match endianness
+        if cfg!(target_endian = "big") {
+            bitmask.reverse_bits(LANES)
+        } else {
+            bitmask
+        }
+    }
+
+    #[inline]
+    pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // LLVM assumes bit order should match endianness
+        let bitmask = if cfg!(target_endian = "big") {
+            bitmask.reverse_bits(LANES)
+        } else {
+            bitmask
+        };
+
+        // Safety: U is required to be the appropriate bitmask type
+        unsafe {
+            Self::from_int_unchecked(intrinsics::simd_select_bitmask(
+                bitmask,
+                Self::splat(true).to_int(),
+                Self::splat(false).to_int(),
+            ))
+        }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
+    pub fn any(self) -> bool {
+        // Safety: use `self` as an integer vector
+        unsafe { intrinsics::simd_reduce_any(self.to_int()) }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    pub fn all(self) -> bool {
+        // Safety: use `self` as an integer vector
+        unsafe { intrinsics::simd_reduce_all(self.to_int()) }
+    }
+}
+
+impl<T, const LANES: usize> core::convert::From<Mask<T, LANES>> for Simd<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn from(value: Mask<T, LANES>) -> Self {
+        value.0
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitand(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
+        unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitor(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
+        unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn bitxor(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
+        unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
+    }
+}
+
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn not(self) -> Self::Output {
+        Self::splat(true) ^ self
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
new file mode 100644
index 000000000..65d3ce9be
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
@@ -0,0 +1,93 @@
+use super::{mask_impl, Mask, MaskElement};
+use crate::simd::{LaneCount, SupportedLaneCount};
+
+mod sealed {
+    pub trait Sealed {}
+}
+pub use sealed::Sealed;
+
+impl<T, const LANES: usize> Sealed for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+/// Converts masks to and from integer bitmasks.
+///
+/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
+pub trait ToBitMask: Sealed {
+    /// The integer bitmask type.
+    type BitMask;
+
+    /// Converts a mask to a bitmask.
+    fn to_bitmask(self) -> Self::BitMask;
+
+    /// Converts a bitmask to a mask.
+    fn from_bitmask(bitmask: Self::BitMask) -> Self;
+}
+
+/// Converts masks to and from byte array bitmasks.
+///
+/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte.
+#[cfg(feature = "generic_const_exprs")]
+pub trait ToBitMaskArray: Sealed {
+    /// The length of the bitmask array.
+    const BYTES: usize;
+
+    /// Converts a mask to a bitmask.
+    fn to_bitmask_array(self) -> [u8; Self::BYTES];
+
+    /// Converts a bitmask to a mask.
+    fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self;
+}
+
+macro_rules! impl_integer_intrinsic {
+    { $(impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
+        $(
+        impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
+            type BitMask = $int;
+
+            fn to_bitmask(self) -> $int {
+                self.0.to_bitmask_integer()
+            }
+
+            fn from_bitmask(bitmask: $int) -> Self {
+                Self(mask_impl::Mask::from_bitmask_integer(bitmask))
+            }
+        }
+        )*
+    }
+}
+
+impl_integer_intrinsic! {
+    impl ToBitMask<BitMask=u8> for Mask<_, 1>
+    impl ToBitMask<BitMask=u8> for Mask<_, 2>
+    impl ToBitMask<BitMask=u8> for Mask<_, 4>
+    impl ToBitMask<BitMask=u8> for Mask<_, 8>
+    impl ToBitMask<BitMask=u16> for Mask<_, 16>
+    impl ToBitMask<BitMask=u32> for Mask<_, 32>
+    impl ToBitMask<BitMask=u64> for Mask<_, 64>
+}
+
+/// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes.
+#[cfg(feature = "generic_const_exprs")]
+pub const fn bitmask_len(lanes: usize) -> usize {
+    (lanes + 7) / 8
+}
+
+#[cfg(feature = "generic_const_exprs")]
+impl<T: MaskElement, const LANES: usize> ToBitMaskArray for Mask<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    const BYTES: usize = bitmask_len(LANES);
+
+    fn to_bitmask_array(self) -> [u8; Self::BYTES] {
+        self.0.to_bitmask_array()
+    }
+
+    fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self {
+        Mask(mask_impl::Mask::from_bitmask_array(bitmask))
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/mod.rs b/library/portable-simd/crates/core_simd/src/mod.rs
new file mode 100644
index 000000000..b472aa3ab
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/mod.rs
@@ -0,0 +1,32 @@
+#[macro_use]
+mod swizzle;
+
+pub(crate) mod intrinsics;
+
+#[cfg(feature = "generic_const_exprs")]
+mod to_bytes;
+
+mod elements;
+mod eq;
+mod fmt;
+mod iter;
+mod lane_count;
+mod masks;
+mod ops;
+mod ord;
+mod select;
+mod vector;
+mod vendor;
+
+#[doc = include_str!("core_simd_docs.md")]
+pub mod simd {
+    pub(crate) use crate::core_simd::intrinsics;
+
+    pub use crate::core_simd::elements::*;
+    pub use crate::core_simd::eq::*;
+    pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
+    pub use crate::core_simd::masks::*;
+    pub use crate::core_simd::ord::*;
+    pub use crate::core_simd::swizzle::*;
+    pub use crate::core_simd::vector::*;
+}
diff --git a/library/portable-simd/crates/core_simd/src/ops.rs b/library/portable-simd/crates/core_simd/src/ops.rs
new file mode 100644
index 000000000..5a077a469
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@@ -0,0 +1,254 @@
+use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
+use core::ops::{Add, Mul};
+use core::ops::{BitAnd, BitOr, BitXor};
+use core::ops::{Div, Rem, Sub};
+use core::ops::{Shl, Shr};
+
+mod assign;
+mod deref;
+mod unary;
+
+impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+    I: core::slice::SliceIndex<[T]>,
+{
+    type Output = I::Output;
+    fn index(&self, index: I) -> &Self::Output {
+        &self.as_array()[index]
+    }
+}
+
+impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+    I: core::slice::SliceIndex<[T]>,
+{
+    fn index_mut(&mut self, index: I) -> &mut Self::Output {
+        &mut self.as_mut_array()[index]
+    }
+}
+
+macro_rules! unsafe_base {
+    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
+        // Safety: $lhs and $rhs are vectors
+        unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+    };
+}
+
+/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
+/// It handles performing a bitand in addition to calling the shift operator, so that the result
+/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if rhs >= <Int>::BITS
+/// At worst, this will maybe add another instruction and cycle,
+/// at best, it may open up more optimization opportunities,
+/// or simply be elided entirely, especially for SIMD ISAs which default to this.
+///
+// FIXME: Consider implementing this in cg_llvm instead?
+// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
+macro_rules! wrap_bitshift {
+    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
+        #[allow(clippy::suspicious_arithmetic_impl)]
+        // Safety: $lhs and the bitand result are vectors
+        unsafe {
+            $crate::simd::intrinsics::$simd_call(
+                $lhs,
+                $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
+            )
+        }
+    };
+}
+
+/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
+/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
+/// thus guaranteeing a Rust value returns instead.
+///
+/// |                  | LLVM | Rust
+/// | :--------------: | :--- | :----------
+/// | N {/,%} 0        | UB   | panic!()
+/// | <$int>::MIN / -1 | UB   | <$int>::MIN
+/// | <$int>::MIN % -1 | UB   | 0
+///
+macro_rules! int_divrem_guard {
+    (   $lhs:ident,
+        $rhs:ident,
+        {   const PANIC_ZERO: &'static str = $zero:literal;
+            $simd_call:ident
+        },
+        $int:ident ) => {
+        if $rhs.simd_eq(Simd::splat(0 as _)).any() {
+            panic!($zero);
+        } else {
+            // Prevent otherwise-UB overflow on the MIN / -1 case.
+            let rhs = if <$int>::MIN != 0 {
+                // This should, at worst, optimize to a few branchless logical ops
+                // Ideally, this entire conditional should evaporate
+                // Fire LLVM and implement those manually if it doesn't get the hint
+                ($lhs.simd_eq(Simd::splat(<$int>::MIN))
+                // type inference can break here, so cut an SInt to size
+                & $rhs.simd_eq(Simd::splat(-1i64 as _)))
+                .select(Simd::splat(1 as _), $rhs)
+            } else {
+                // Nice base case to make it easy to const-fold away the other branch.
+                $rhs
+            };
+            // Safety: $lhs and rhs are vectors
+            unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
+        }
+    };
+}
+
+macro_rules! for_base_types {
+    (   T = ($($scalar:ident),*);
+        type Lhs = Simd<T, N>;
+        type Rhs = Simd<T, N>;
+        type Output = $out:ty;
+
+        impl $op:ident::$call:ident {
+            $macro_impl:ident $inner:tt
+        }) => {
+            $(
+                impl<const N: usize> $op<Self> for Simd<$scalar, N>
+                where
+                    $scalar: SimdElement,
+                    LaneCount<N>: SupportedLaneCount,
+                {
+                    type Output = $out;
+
+                    #[inline]
+                    #[must_use = "operator returns a new vector without mutating the inputs"]
+                    fn $call(self, rhs: Self) -> Self::Output {
+                        $macro_impl!(self, rhs, $inner, $scalar)
+                    }
+                })*
+    }
+}
+
+// A "TokenTree muncher": takes a set of scalar types `T = {};`
+// type parameters for the ops it implements, `Op::fn` names,
+// and a macro that expands into an expr, substituting in an intrinsic.
+// It passes that to for_base_types, which expands an impl for the types,
+// using the expanded expr in the function, and recurses with itself.
+//
+// tl;dr impls a set of ops::{Traits} for a set of types
+macro_rules! for_base_ops {
+    (
+        T = $types:tt;
+        type Lhs = Simd<T, N>;
+        type Rhs = Simd<T, N>;
+        type Output = $out:ident;
+        impl $op:ident::$call:ident
+            $inner:tt
+        $($rest:tt)*
+    ) => {
+        for_base_types! {
+            T = $types;
+            type Lhs = Simd<T, N>;
+            type Rhs = Simd<T, N>;
+            type Output = $out;
+            impl $op::$call
+                $inner
+        }
+        for_base_ops! {
+            T = $types;
+            type Lhs = Simd<T, N>;
+            type Rhs = Simd<T, N>;
+            type Output = $out;
+            $($rest)*
+        }
+    };
+    ($($done:tt)*) => {
+        // Done.
+    }
+}
+
+// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
+// For all of these operations, simd_* intrinsics apply wrapping logic.
+for_base_ops! {
+    T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
+    type Lhs = Simd<T, N>;
+    type Rhs = Simd<T, N>;
+    type Output = Self;
+
+    impl Add::add {
+        unsafe_base { simd_add }
+    }
+
+    impl Mul::mul {
+        unsafe_base { simd_mul }
+    }
+
+    impl Sub::sub {
+        unsafe_base { simd_sub }
+    }
+
+    impl BitAnd::bitand {
+        unsafe_base { simd_and }
+    }
+
+    impl BitOr::bitor {
+        unsafe_base { simd_or }
+    }
+
+    impl BitXor::bitxor {
+        unsafe_base { simd_xor }
+    }
+
+    impl Div::div {
+        int_divrem_guard {
+            const PANIC_ZERO: &'static str = "attempt to divide by zero";
+            simd_div
+        }
+    }
+
+    impl Rem::rem {
+        int_divrem_guard {
+            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
+            simd_rem
+        }
+    }
+
+    // The only question is how to handle shifts >= <Int>::BITS?
+    // Our current solution uses wrapping logic.
+    impl Shl::shl {
+        wrap_bitshift { simd_shl }
+    }
+
+    impl Shr::shr {
+        wrap_bitshift {
+            // This automatically monomorphizes to lshr or ashr, depending,
+            // so it's fine to use it for both UInts and SInts.
+            simd_shr
+        }
+    }
+}
+
+// We don't need any special precautions here:
+// Floats always accept arithmetic ops, but may become NaN.
+for_base_ops! {
+    T = (f32, f64);
+    type Lhs = Simd<T, N>;
+    type Rhs = Simd<T, N>;
+    type Output = Self;
+
+    impl Add::add {
+        unsafe_base { simd_add }
+    }
+
+    impl Mul::mul {
+        unsafe_base { simd_mul }
+    }
+
+    impl Sub::sub {
+        unsafe_base { simd_sub }
+    }
+
+    impl Div::div {
+        unsafe_base { simd_div }
+    }
+
+    impl Rem::rem {
+        unsafe_base { simd_rem }
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/ops/assign.rs b/library/portable-simd/crates/core_simd/src/ops/assign.rs
new file mode 100644
index 000000000..d2b48614f
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/ops/assign.rs
@@ -0,0 +1,124 @@
+//! Assignment operators
+use super::*;
+use core::ops::{AddAssign, MulAssign}; // commutative binary op-assignment
+use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign}; // commutative bit binary op-assignment
+use core::ops::{DivAssign, RemAssign, SubAssign}; // non-commutative binary op-assignment
+use core::ops::{ShlAssign, ShrAssign}; // non-commutative bit binary op-assignment
+
+// Arithmetic
+
+macro_rules! assign_ops {
+    ($(impl<T, U, const LANES: usize> $assignTrait:ident<U> for Simd<T, LANES>
+        where
+            Self: $trait:ident,
+        {
+            fn $assign_call:ident(rhs: U) {
+                $call:ident
+            }
+        })*) => {
+        $(impl<T, U, const LANES: usize> $assignTrait<U> for Simd<T, LANES>
+        where
+            Self: $trait<U, Output = Self>,
+            T: SimdElement,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn $assign_call(&mut self, rhs: U) {
+                *self = self.$call(rhs);
+            }
+        })*
+    }
+}
+
+assign_ops! {
+    // Arithmetic
+    impl<T, U, const LANES: usize> AddAssign<U> for Simd<T, LANES>
+    where
+        Self: Add,
+    {
+        fn add_assign(rhs: U) {
+            add
+        }
+    }
+
+    impl<T, U, const LANES: usize> MulAssign<U> for Simd<T, LANES>
+    where
+        Self: Mul,
+    {
+        fn mul_assign(rhs: U) {
+            mul
+        }
+    }
+
+    impl<T, U, const LANES: usize> SubAssign<U> for Simd<T, LANES>
+    where
+        Self: Sub,
+    {
+        fn sub_assign(rhs: U) {
+            sub
+        }
+    }
+
+    impl<T, U, const LANES: usize> DivAssign<U> for Simd<T, LANES>
+    where
+        Self: Div,
+    {
+        fn div_assign(rhs: U) {
+            div
+        }
+    }
+    impl<T, U, const LANES: usize> RemAssign<U> for Simd<T, LANES>
+    where
+        Self: Rem,
+    {
+        fn rem_assign(rhs: U) {
+            rem
+        }
+    }
+
+    // Bitops
+    impl<T, U, const LANES: usize> BitAndAssign<U> for Simd<T, LANES>
+    where
+        Self: BitAnd,
+    {
+        fn bitand_assign(rhs: U) {
+            bitand
+        }
+    }
+
+    impl<T, U, const LANES: usize> BitOrAssign<U> for Simd<T, LANES>
+    where
+        Self: BitOr,
+    {
+        fn bitor_assign(rhs: U) {
+            bitor
+        }
+    }
+
+    impl<T, U, const LANES: usize> BitXorAssign<U> for Simd<T, LANES>
+    where
+        Self: BitXor,
+    {
+        fn bitxor_assign(rhs: U) {
+            bitxor
+        }
+    }
+
+    impl<T, U, const LANES: usize> ShlAssign<U> for Simd<T, LANES>
+    where
+        Self: Shl,
+    {
+        fn shl_assign(rhs: U) {
+            shl
+        }
+    }
+
+    impl<T, U, const LANES: usize> ShrAssign<U> for Simd<T, LANES>
+    where
+        Self: Shr,
+    {
+        fn shr_assign(rhs: U) {
+            shr
+        }
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/ops/deref.rs b/library/portable-simd/crates/core_simd/src/ops/deref.rs
new file mode 100644
index 000000000..9883a74c9
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/ops/deref.rs
@@ -0,0 +1,124 @@
+//! This module hacks in "implicit deref" for Simd's operators.
+//! Ideally, Rust would take care of this itself,
+//! and method calls usually handle the LHS implicitly.
+//! But this is not the case with arithmetic ops.
+use super::*;
+
+macro_rules! deref_lhs {
+    (impl<T, const LANES: usize> $trait:ident for $simd:ty {
+            fn $call:ident
+        }) => {
+        impl<T, const LANES: usize> $trait<$simd> for &$simd
+        where
+            T: SimdElement,
+            $simd: $trait<$simd, Output = $simd>,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Output = Simd<T, LANES>;
+
+            #[inline]
+            #[must_use = "operator returns a new vector without mutating the inputs"]
+            fn $call(self, rhs: $simd) -> Self::Output {
+                (*self).$call(rhs)
+            }
+        }
+    };
+}
+
+macro_rules! deref_rhs {
+    (impl<T, const LANES: usize> $trait:ident for $simd:ty {
+            fn $call:ident
+        }) => {
+        impl<T, const LANES: usize> $trait<&$simd> for $simd
+        where
+            T: SimdElement,
+            $simd: $trait<$simd, Output = $simd>,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Output = Simd<T, LANES>;
+
+            #[inline]
+            #[must_use = "operator returns a new vector without mutating the inputs"]
+            fn $call(self, rhs: &$simd) -> Self::Output {
+                self.$call(*rhs)
+            }
+        }
+    };
+}
+
+macro_rules! deref_ops {
+    ($(impl<T, const LANES: usize> $trait:ident for $simd:ty {
+            fn $call:ident
+        })*) => {
+        $(
+            deref_rhs! {
+                impl<T, const LANES: usize> $trait for $simd {
+                    fn $call
+                }
+            }
+            deref_lhs! {
+                impl<T, const LANES: usize> $trait for $simd {
+                    fn $call
+                }
+            }
+            impl<'lhs, 'rhs, T, const LANES: usize> $trait<&'rhs $simd> for &'lhs $simd
+            where
+                T: SimdElement,
+                $simd: $trait<$simd, Output = $simd>,
+                LaneCount<LANES>: SupportedLaneCount,
+            {
+                type Output = $simd;
+
+                #[inline]
+                #[must_use = "operator returns a new vector without mutating the inputs"]
+                fn $call(self, rhs: &$simd) -> Self::Output {
+                    (*self).$call(*rhs)
+                }
+            }
+        )*
+    }
+}
+
+deref_ops! {
+    // Arithmetic
+    impl<T, const LANES: usize> Add for Simd<T, LANES> {
+        fn add
+    }
+
+    impl<T, const LANES: usize> Mul for Simd<T, LANES> {
+        fn mul
+    }
+
+    impl<T, const LANES: usize> Sub for Simd<T, LANES> {
+        fn sub
+    }
+
+    impl<T, const LANES: usize> Div for Simd<T, LANES> {
+        fn div
+    }
+
+    impl<T, const LANES: usize> Rem for Simd<T, LANES> {
+        fn rem
+    }
+
+    // Bitops
+    impl<T, const LANES: usize> BitAnd for Simd<T, LANES> {
+        fn bitand
+    }
+
+    impl<T, const LANES: usize> BitOr for Simd<T, LANES> {
+        fn bitor
+    }
+
+    impl<T, const LANES: usize> BitXor for Simd<T, LANES> {
+        fn bitxor
+    }
+
+    impl<T, const LANES: usize> Shl for Simd<T, LANES> {
+        fn shl
+    }
+
+    impl<T, const LANES: usize> Shr for Simd<T, LANES> {
+        fn shr
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/ops/unary.rs b/library/portable-simd/crates/core_simd/src/ops/unary.rs
new file mode 100644
index 000000000..4ad022150
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/ops/unary.rs
@@ -0,0 +1,78 @@
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::ops::{Neg, Not}; // unary ops
+
+macro_rules! neg {
+    ($(impl<const LANES: usize> Neg for Simd<$scalar:ty, LANES>)*) => {
+        $(impl<const LANES: usize> Neg for Simd<$scalar, LANES>
+        where
+            $scalar: SimdElement,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Output = Self;
+
+            #[inline]
+            #[must_use = "operator returns a new vector without mutating the input"]
+            fn neg(self) -> Self::Output {
+                // Safety: `self` is a signed vector
+                unsafe { intrinsics::simd_neg(self) }
+            }
+        })*
+    }
+}
+
+neg! {
+    impl<const LANES: usize> Neg for Simd<f32, LANES>
+
+    impl<const LANES: usize> Neg for Simd<f64, LANES>
+
+    impl<const LANES: usize> Neg for Simd<i8, LANES>
+
+    impl<const LANES: usize> Neg for Simd<i16, LANES>
+
+    impl<const LANES: usize> Neg for Simd<i32, LANES>
+
+    impl<const LANES: usize> Neg for Simd<i64, LANES>
+
+    impl<const LANES: usize> Neg for Simd<isize, LANES>
+}
+
+macro_rules! not {
+    ($(impl<const LANES: usize> Not for Simd<$scalar:ty, LANES>)*) => {
+        $(impl<const LANES: usize> Not for Simd<$scalar, LANES>
+        where
+            $scalar: SimdElement,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Output = Self;
+
+            #[inline]
+            #[must_use = "operator returns a new vector without mutating the input"]
+            fn not(self) -> Self::Output {
+                self ^ (Simd::splat(!(0 as $scalar)))
+            }
+        })*
+    }
+}
+
+not! {
+    impl<const LANES: usize> Not for Simd<i8, LANES>
+
+    impl<const LANES: usize> Not for Simd<i16, LANES>
+
+    impl<const LANES: usize> Not for Simd<i32, LANES>
+
+    impl<const LANES: usize> Not for Simd<i64, LANES>
+
+    impl<const LANES: usize> Not for Simd<isize, LANES>
+
+    impl<const LANES: usize> Not for Simd<u8, LANES>
+
+    impl<const LANES: usize> Not for Simd<u16, LANES>
+
+    impl<const LANES: usize> Not for Simd<u32, LANES>
+
+    impl<const LANES: usize> Not for Simd<u64, LANES>
+
+    impl<const LANES: usize> Not for Simd<usize, LANES>
+}
diff --git a/library/portable-simd/crates/core_simd/src/ord.rs b/library/portable-simd/crates/core_simd/src/ord.rs
new file mode 100644
index 000000000..9a87bc2e3
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/ord.rs
@@ -0,0 +1,213 @@
+use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount};
+
+/// Parallel `PartialOrd`.
+pub trait SimdPartialOrd: SimdPartialEq {
+    /// Test if each lane is less than the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_lt(self, other: Self) -> Self::Mask;
+
+    /// Test if each lane is less than or equal to the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_le(self, other: Self) -> Self::Mask;
+
+    /// Test if each lane is greater than the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_gt(self, other: Self) -> Self::Mask;
+
+    /// Test if each lane is greater than or equal to the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_ge(self, other: Self) -> Self::Mask;
+}
+
+/// Parallel `Ord`.
+pub trait SimdOrd: SimdPartialOrd {
+    /// Returns the lane-wise maximum with `other`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_max(self, other: Self) -> Self;
+
+    /// Returns the lane-wise minimum with `other`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_min(self, other: Self) -> Self;
+
+    /// Restrict each lane to a certain interval.
+    ///
+    /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is
+    /// less than `min`. Otherwise returns `self`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `min > max` on any lane.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_clamp(self, min: Self, max: Self) -> Self;
+}
+
+macro_rules! impl_integer {
+    { $($integer:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialOrd for Simd<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn simd_lt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
+            }
+
+            #[inline]
+            fn simd_le(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
+            }
+
+            #[inline]
+            fn simd_gt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
+            }
+
+            #[inline]
+            fn simd_ge(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
+            }
+        }
+
+        impl<const LANES: usize> SimdOrd for Simd<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn simd_max(self, other: Self) -> Self {
+                self.simd_lt(other).select(other, self)
+            }
+
+            #[inline]
+            fn simd_min(self, other: Self) -> Self {
+                self.simd_gt(other).select(other, self)
+            }
+
+            #[inline]
+            fn simd_clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.simd_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                self.simd_max(min).simd_min(max)
+            }
+        }
+        )*
+    }
+}
+
+impl_integer! { u8, u16, u32, u64, usize, i8, i16, i32, i64, isize }
+
+macro_rules! impl_float {
+    { $($float:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialOrd for Simd<$float, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn simd_lt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
+            }
+
+            #[inline]
+            fn simd_le(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
+            }
+
+            #[inline]
+            fn simd_gt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
+            }
+
+            #[inline]
+            fn simd_ge(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
+            }
+        }
+        )*
+    }
+}
+
+impl_float! { f32, f64 }
+
+macro_rules! impl_mask {
+    { $($integer:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialOrd for Mask<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn simd_lt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_lt(self.to_int(), other.to_int())) }
+            }
+
+            #[inline]
+            fn simd_le(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_le(self.to_int(), other.to_int())) }
+            }
+
+            #[inline]
+            fn simd_gt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_gt(self.to_int(), other.to_int())) }
+            }
+
+            #[inline]
+            fn simd_ge(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_ge(self.to_int(), other.to_int())) }
+            }
+        }
+
+        impl<const LANES: usize> SimdOrd for Mask<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn simd_max(self, other: Self) -> Self {
+                self.simd_gt(other).select_mask(other, self)
+            }
+
+            #[inline]
+            fn simd_min(self, other: Self) -> Self {
+                self.simd_lt(other).select_mask(other, self)
+            }
+
+            #[inline]
+            fn simd_clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.simd_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                self.simd_max(min).simd_min(max)
+            }
+        }
+        )*
+    }
+}
+
+impl_mask! { i8, i16, i32, i64, isize }
diff --git a/library/portable-simd/crates/core_simd/src/select.rs b/library/portable-simd/crates/core_simd/src/select.rs
new file mode 100644
index 000000000..065c5987d
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/select.rs
@@ -0,0 +1,59 @@
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    /// Choose lanes from two vectors.
+    ///
+    /// For each lane in the mask, choose the corresponding lane from `true_values` if
+    /// that lane mask is true, and `false_values` if that lane mask is false.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::{Simd, Mask};
+    /// let a = Simd::from_array([0, 1, 2, 3]);
+    /// let b = Simd::from_array([4, 5, 6, 7]);
+    /// let mask = Mask::from_array([true, false, false, true]);
+    /// let c = mask.select(a, b);
+    /// assert_eq!(c.to_array(), [0, 5, 6, 3]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn select<U>(
+        self,
+        true_values: Simd<U, LANES>,
+        false_values: Simd<U, LANES>,
+    ) -> Simd<U, LANES>
+    where
+        U: SimdElement<Mask = T>,
+    {
+        // Safety: The mask has been cast to a vector of integers,
+        // and the operands to select between are vectors of the same type and length.
+        unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
+    }
+
+    /// Choose lanes from two masks.
+    ///
+    /// For each lane in the mask, choose the corresponding lane from `true_values` if
+    /// that lane mask is true, and `false_values` if that lane mask is false.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Mask;
+    /// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
+    /// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
+    /// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
+    /// let c = mask.select_mask(a, b);
+    /// assert_eq!(c.to_array(), [true, false, true, false]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original inputs"]
+    pub fn select_mask(self, true_values: Self, false_values: Self) -> Self {
+        self & true_values | !self & false_values
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/swizzle.rs b/library/portable-simd/crates/core_simd/src/swizzle.rs
new file mode 100644
index 000000000..22999d249
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/swizzle.rs
@@ -0,0 +1,385 @@
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+
+/// Constructs a new SIMD vector by copying elements from selected lanes in other vectors.
+///
+/// When swizzling one vector, lanes are selected by a `const` array of `usize`,
+/// like [`Swizzle`].
+///
+/// When swizzling two vectors, lanes are selected by a `const` array of [`Which`],
+/// like [`Swizzle2`].
+///
+/// # Examples
+///
+/// With a single SIMD vector, the const array specifies lane indices in that vector:
+/// ```
+/// # #![feature(portable_simd)]
+/// # use core::simd::{u32x2, u32x4, simd_swizzle};
+/// let v = u32x4::from_array([10, 11, 12, 13]);
+///
+/// // Keeping the same size
+/// let r: u32x4 = simd_swizzle!(v, [3, 0, 1, 2]);
+/// assert_eq!(r.to_array(), [13, 10, 11, 12]);
+///
+/// // Changing the number of lanes
+/// let r: u32x2 = simd_swizzle!(v, [3, 1]);
+/// assert_eq!(r.to_array(), [13, 11]);
+/// ```
+///
+/// With two input SIMD vectors, the const array uses `Which` to specify the source of each index:
+/// ```
+/// # #![feature(portable_simd)]
+/// # use core::simd::{u32x2, u32x4, simd_swizzle, Which};
+/// use Which::{First, Second};
+/// let a = u32x4::from_array([0, 1, 2, 3]);
+/// let b = u32x4::from_array([4, 5, 6, 7]);
+///
+/// // Keeping the same size
+/// let r: u32x4 = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]);
+/// assert_eq!(r.to_array(), [0, 1, 6, 7]);
+///
+/// // Changing the number of lanes
+/// let r: u32x2 = simd_swizzle!(a, b, [First(0), Second(0)]);
+/// assert_eq!(r.to_array(), [0, 4]);
+/// ```
+#[allow(unused_macros)]
+pub macro simd_swizzle {
+    (
+        $vector:expr, $index:expr $(,)?
+    ) => {
+        {
+            use $crate::simd::Swizzle;
+            struct Impl;
+            impl<const LANES: usize> Swizzle<LANES, {$index.len()}> for Impl {
+                const INDEX: [usize; {$index.len()}] = $index;
+            }
+            Impl::swizzle($vector)
+        }
+    },
+    (
+        $first:expr, $second:expr, $index:expr $(,)?
+    ) => {
+        {
+            use $crate::simd::{Which, Swizzle2};
+            struct Impl;
+            impl<const LANES: usize> Swizzle2<LANES, {$index.len()}> for Impl {
+                const INDEX: [Which; {$index.len()}] = $index;
+            }
+            Impl::swizzle2($first, $second)
+        }
+    }
+}
+
+/// Specifies a lane index into one of two SIMD vectors.
+///
+/// This is an input type for [Swizzle2] and helper macros like [simd_swizzle].
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Which {
+    /// Index of a lane in the first input SIMD vector.
+    First(usize),
+    /// Index of a lane in the second input SIMD vector.
+    Second(usize),
+}
+
+/// Create a vector from the elements of another vector.
+pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+    /// Map from the lanes of the input vector to the output vector.
+    const INDEX: [usize; OUTPUT_LANES];
+
+    /// Create a new vector from the lanes of `vector`.
+    ///
+    /// Lane `i` of the output is `vector[Self::INDEX[i]]`.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES>
+    where
+        T: SimdElement,
+        LaneCount<INPUT_LANES>: SupportedLaneCount,
+        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+    {
+        // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
+        unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
+    }
+}
+
+/// Create a vector from the elements of two other vectors.
+pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+    /// Map from the lanes of the input vectors to the output vector
+    const INDEX: [Which; OUTPUT_LANES];
+
+    /// Create a new vector from the lanes of `first` and `second`.
+    ///
+    /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is
+    /// `Second(j)`.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    fn swizzle2<T>(
+        first: Simd<T, INPUT_LANES>,
+        second: Simd<T, INPUT_LANES>,
+    ) -> Simd<T, OUTPUT_LANES>
+    where
+        T: SimdElement,
+        LaneCount<INPUT_LANES>: SupportedLaneCount,
+        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+    {
+        // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
+        unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
+    }
+}
+
+/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
+/// This trait hides `INDEX_IMPL` from the public API.
+trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+    const INDEX_IMPL: [u32; OUTPUT_LANES];
+}
+
+impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES>
+    for T
+where
+    T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized,
+{
+    const INDEX_IMPL: [u32; OUTPUT_LANES] = {
+        let mut output = [0; OUTPUT_LANES];
+        let mut i = 0;
+        while i < OUTPUT_LANES {
+            let index = Self::INDEX[i];
+            assert!(index as u32 as usize == index);
+            assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
+            output[i] = index as u32;
+            i += 1;
+        }
+        output
+    };
+}
+
+/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
+/// This trait hides `INDEX_IMPL` from the public API.
+trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+    const INDEX_IMPL: [u32; OUTPUT_LANES];
+}
+
+impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES>
+    for T
+where
+    T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized,
+{
+    const INDEX_IMPL: [u32; OUTPUT_LANES] = {
+        let mut output = [0; OUTPUT_LANES];
+        let mut i = 0;
+        while i < OUTPUT_LANES {
+            let (offset, index) = match Self::INDEX[i] {
+                Which::First(index) => (false, index),
+                Which::Second(index) => (true, index),
+            };
+            assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
+
+            // lanes are indexed by the first vector, then second vector
+            let index = if offset { index + INPUT_LANES } else { index };
+            assert!(index as u32 as usize == index);
+            output[i] = index as u32;
+            i += 1;
+        }
+        output
+    };
+}
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    /// Reverse the order of the lanes in the vector.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn reverse(self) -> Self {
+        const fn reverse_index<const LANES: usize>() -> [usize; LANES] {
+            let mut index = [0; LANES];
+            let mut i = 0;
+            while i < LANES {
+                index[i] = LANES - i - 1;
+                i += 1;
+            }
+            index
+        }
+
+        struct Reverse;
+
+        impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse {
+            const INDEX: [usize; LANES] = reverse_index::<LANES>();
+        }
+
+        Reverse::swizzle(self)
+    }
+
+    /// Rotates the vector such that the first `OFFSET` elements of the slice move to the end
+    /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_lanes_left`,
+    /// the element previously in lane `OFFSET` will become the first element in the slice.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn rotate_lanes_left<const OFFSET: usize>(self) -> Self {
+        const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
+            let offset = OFFSET % LANES;
+            let mut index = [0; LANES];
+            let mut i = 0;
+            while i < LANES {
+                index[i] = (i + offset) % LANES;
+                i += 1;
+            }
+            index
+        }
+
+        struct Rotate<const OFFSET: usize>;
+
+        impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
+            const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+        }
+
+        Rotate::<OFFSET>::swizzle(self)
+    }
+
+    /// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to
+    /// the end while the last `OFFSET` elements move to the front. After calling `rotate_lanes_right`,
+    /// the element previously at index `LANES - OFFSET` will become the first element in the slice.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn rotate_lanes_right<const OFFSET: usize>(self) -> Self {
+        const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
+            let offset = LANES - OFFSET % LANES;
+            let mut index = [0; LANES];
+            let mut i = 0;
+            while i < LANES {
+                index[i] = (i + offset) % LANES;
+                i += 1;
+            }
+            index
+        }
+
+        struct Rotate<const OFFSET: usize>;
+
+        impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
+            const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+        }
+
+        Rotate::<OFFSET>::swizzle(self)
+    }
+
+    /// Interleave two vectors.
+    ///
+    /// Produces two vectors with lanes taken alternately from `self` and `other`.
+    ///
+    /// The first result contains the first `LANES / 2` lanes from `self` and `other`,
+    /// alternating, starting with the first lane of `self`.
+    ///
+    /// The second result contains the last `LANES / 2` lanes from `self` and `other`,
+    /// alternating, starting with the lane `LANES / 2` from the start of `self`.
+    ///
+    /// ```
+    /// #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// let a = Simd::from_array([0, 1, 2, 3]);
+    /// let b = Simd::from_array([4, 5, 6, 7]);
+    /// let (x, y) = a.interleave(b);
+    /// assert_eq!(x.to_array(), [0, 4, 1, 5]);
+    /// assert_eq!(y.to_array(), [2, 6, 3, 7]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn interleave(self, other: Self) -> (Self, Self) {
+        const fn lo<const LANES: usize>() -> [Which; LANES] {
+            let mut idx = [Which::First(0); LANES];
+            let mut i = 0;
+            while i < LANES {
+                let offset = i / 2;
+                idx[i] = if i % 2 == 0 {
+                    Which::First(offset)
+                } else {
+                    Which::Second(offset)
+                };
+                i += 1;
+            }
+            idx
+        }
+        const fn hi<const LANES: usize>() -> [Which; LANES] {
+            let mut idx = [Which::First(0); LANES];
+            let mut i = 0;
+            while i < LANES {
+                let offset = (LANES + i) / 2;
+                idx[i] = if i % 2 == 0 {
+                    Which::First(offset)
+                } else {
+                    Which::Second(offset)
+                };
+                i += 1;
+            }
+            idx
+        }
+
+        struct Lo;
+        struct Hi;
+
+        impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo {
+            const INDEX: [Which; LANES] = lo::<LANES>();
+        }
+
+        impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi {
+            const INDEX: [Which; LANES] = hi::<LANES>();
+        }
+
+        (Lo::swizzle2(self, other), Hi::swizzle2(self, other))
+    }
+
+    /// Deinterleave two vectors.
+    ///
+    /// The first result takes every other lane of `self` and then `other`, starting with
+    /// the first lane.
+    ///
+    /// The second result takes every other lane of `self` and then `other`, starting with
+    /// the second lane.
+    ///
+    /// ```
+    /// #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// let a = Simd::from_array([0, 4, 1, 5]);
+    /// let b = Simd::from_array([2, 6, 3, 7]);
+    /// let (x, y) = a.deinterleave(b);
+    /// assert_eq!(x.to_array(), [0, 1, 2, 3]);
+    /// assert_eq!(y.to_array(), [4, 5, 6, 7]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn deinterleave(self, other: Self) -> (Self, Self) {
+        const fn even<const LANES: usize>() -> [Which; LANES] {
+            let mut idx = [Which::First(0); LANES];
+            let mut i = 0;
+            while i < LANES / 2 {
+                idx[i] = Which::First(2 * i);
+                idx[i + LANES / 2] = Which::Second(2 * i);
+                i += 1;
+            }
+            idx
+        }
+        const fn odd<const LANES: usize>() -> [Which; LANES] {
+            let mut idx = [Which::First(0); LANES];
+            let mut i = 0;
+            while i < LANES / 2 {
+                idx[i] = Which::First(2 * i + 1);
+                idx[i + LANES / 2] = Which::Second(2 * i + 1);
+                i += 1;
+            }
+            idx
+        }
+
+        struct Even;
+        struct Odd;
+
+        impl<const LANES: usize> Swizzle2<LANES, LANES> for Even {
+            const INDEX: [Which; LANES] = even::<LANES>();
+        }
+
+        impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd {
+            const INDEX: [Which; LANES] = odd::<LANES>();
+        }
+
+        (Even::swizzle2(self, other), Odd::swizzle2(self, other))
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/to_bytes.rs b/library/portable-simd/crates/core_simd/src/to_bytes.rs
new file mode 100644
index 000000000..b36b1a347
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs
@@ -0,0 +1,41 @@
+macro_rules! impl_to_bytes {
+    { $ty:ty, $size:literal } => {
+        impl<const LANES: usize> crate::simd::Simd<$ty, LANES>
+        where
+            crate::simd::LaneCount<LANES>: crate::simd::SupportedLaneCount,
+            crate::simd::LaneCount<{{ $size * LANES }}>: crate::simd::SupportedLaneCount,
+        {
+            /// Return the memory representation of this integer as a byte array in native byte
+            /// order.
+            pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
+                // Safety: transmuting between vectors is safe
+                unsafe { core::mem::transmute_copy(&self) }
+            }
+
+            /// Create a native endian integer value from its memory representation as a byte array
+            /// in native endianness.
+            pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
+                // Safety: transmuting between vectors is safe
+                unsafe { core::mem::transmute_copy(&bytes) }
+            }
+        }
+    }
+}
+
+impl_to_bytes! { u8, 1 }
+impl_to_bytes! { u16, 2 }
+impl_to_bytes! { u32, 4 }
+impl_to_bytes! { u64, 8 }
+#[cfg(target_pointer_width = "32")]
+impl_to_bytes! { usize, 4 }
+#[cfg(target_pointer_width = "64")]
+impl_to_bytes! { usize, 8 }
+
+impl_to_bytes! { i8, 1 }
+impl_to_bytes! { i16, 2 }
+impl_to_bytes! { i32, 4 }
+impl_to_bytes! { i64, 8 }
+#[cfg(target_pointer_width = "32")]
+impl_to_bytes! { isize, 4 }
+#[cfg(target_pointer_width = "64")]
+impl_to_bytes! { isize, 8 }
diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs
new file mode 100644
index 000000000..78f56402e
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@@ -0,0 +1,742 @@
+mod float;
+mod int;
+mod uint;
+
+pub use float::*;
+pub use int::*;
+pub use uint::*;
+
+// Vectors of pointers are not for public use at the current time.
+pub(crate) mod ptr;
+
+use crate::simd::{
+    intrinsics, LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount, Swizzle,
+};
+
+/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
+///
+/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does.
+/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation,
+/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping
+/// the two arrays together and mapping the operator over each lane.
+///
+/// ```rust
+/// # #![feature(array_zip, portable_simd)]
+/// # use core::simd::{Simd};
+/// let a0: [i32; 4] = [-2, 0, 2, 4];
+/// let a1 = [10, 9, 8, 7];
+/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs);
+/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs);
+///
+/// // `Simd<T, N>` implements `From<[T; N]>
+/// let (v0, v1) = (Simd::from(a0), Simd::from(a1));
+/// // Which means arrays implement `Into<Simd<T, N>>`.
+/// assert_eq!(v0 + v1, zm_add.into());
+/// assert_eq!(v0 * v1, zm_mul.into());
+/// ```
+///
+/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`].
+/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior.
+/// This means there is no warning on overflows, even in "debug" builds.
+/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap,
+/// and even "debug builds" are unlikely to tolerate the loss of performance.
+/// You may want to consider using explicitly checked arithmetic if such is required.
+/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable.
+///
+/// [`Wrapping<T>`]: core::num::Wrapping
+///
+/// # Layout
+/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
+/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
+/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
+/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
+///
+/// # ABI "Features"
+/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
+/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
+/// The need for this may be corrected in the future.
+///
+/// # Safe SIMD with Unsafe Rust
+///
+/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
+/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
+/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
+/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
+/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
+/// [`read_unaligned`] and [`write_unaligned`]. This is because:
+/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
+/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
+/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
+/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
+/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
+///
+/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
+/// and may be just as fast as stricter alternatives.
+/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
+/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
+/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
+/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
+/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
+/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
+///
+/// [`transmute`]: core::mem::transmute
+/// [raw pointers]: pointer
+/// [`read_unaligned`]: pointer::read_unaligned
+/// [`write_unaligned`]: pointer::write_unaligned
+/// [`read`]: pointer::read
+/// [`write`]: pointer::write
+/// [as_simd]: slice::as_simd
+#[repr(simd)]
+pub struct Simd<T, const LANES: usize>([T; LANES])
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    /// Number of lanes in this vector.
+    pub const LANES: usize = LANES;
+
+    /// Returns the number of lanes in this SIMD vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::u32x4;
+    /// let v = u32x4::splat(0);
+    /// assert_eq!(v.lanes(), 4);
+    /// ```
+    pub const fn lanes(&self) -> usize {
+        LANES
+    }
+
+    /// Constructs a new SIMD vector with all lanes set to the given value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::u32x4;
+    /// let v = u32x4::splat(8);
+    /// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
+    /// ```
+    pub fn splat(value: T) -> Self {
+        // This is preferred over `[value; LANES]`, since it's explicitly a splat:
+        // https://github.com/rust-lang/rust/issues/97804
+        struct Splat;
+        impl<const LANES: usize> Swizzle<1, LANES> for Splat {
+            const INDEX: [usize; LANES] = [0; LANES];
+        }
+        Splat::swizzle(Simd::<T, 1>::from([value]))
+    }
+
+    /// Returns an array reference containing the entire SIMD vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::{Simd, u64x4};
+    /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]);
+    /// assert_eq!(v.as_array(), &[0, 1, 2, 3]);
+    /// ```
+    pub const fn as_array(&self) -> &[T; LANES] {
+        &self.0
+    }
+
+    /// Returns a mutable array reference containing the entire SIMD vector.
+    pub fn as_mut_array(&mut self) -> &mut [T; LANES] {
+        &mut self.0
+    }
+
+    /// Converts an array to a SIMD vector.
+    pub const fn from_array(array: [T; LANES]) -> Self {
+        Self(array)
+    }
+
+    /// Converts a SIMD vector to an array.
+    pub const fn to_array(self) -> [T; LANES] {
+        self.0
+    }
+
+    /// Converts a slice to a SIMD vector containing `slice[..LANES]`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the slice's length is less than the vector's `Simd::LANES`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::u32x4;
+    /// let source = vec![1, 2, 3, 4, 5, 6];
+    /// let v = u32x4::from_slice(&source);
+    /// assert_eq!(v.as_array(), &[1, 2, 3, 4]);
+    /// ```
+    #[must_use]
+    pub const fn from_slice(slice: &[T]) -> Self {
+        assert!(slice.len() >= LANES, "slice length must be at least the number of lanes");
+        let mut array = [slice[0]; LANES];
+        let mut i = 0;
+        while i < LANES {
+            array[i] = slice[i];
+            i += 1;
+        }
+        Self(array)
+    }
+
+    /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type.
+    ///
+    /// This follows the semantics of Rust's `as` conversion for casting
+    /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`),
+    /// and from floats to integers (truncating, or saturating at the limits) for each lane,
+    /// or vice versa.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
+    /// let ints = floats.cast::<i32>();
+    /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
+    ///
+    /// // Formally equivalent, but `Simd::cast` can optimize better.
+    /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32)));
+    ///
+    /// // The float conversion does not round-trip.
+    /// let floats_again = ints.cast();
+    /// assert_ne!(floats, floats_again);
+    /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
+        // Safety: The input argument is a vector of a valid SIMD element type.
+        unsafe { intrinsics::simd_as(self) }
+    }
+
+    /// Rounds toward zero and converts to the same-width integer type, assuming that
+    /// the value is finite and fits in that type.
+    ///
+    /// # Safety
+    /// The value must:
+    ///
+    /// * Not be NaN
+    /// * Not be infinite
+    /// * Be representable in the return type, after truncating off its fractional part
+    ///
+    /// If these requirements are infeasible or costly, consider using the safe function [cast],
+    /// which saturates on conversion.
+    ///
+    /// [cast]: Simd::cast
+    #[inline]
+    pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
+    where
+        T: core::convert::FloatToInt<I>,
+        I: SimdElement,
+    {
+        // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to
+        // an integer.
+        unsafe { intrinsics::simd_cast(self) }
+    }
+
+    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+    /// If an index is out-of-bounds, the lane is instead selected from the `or` vector.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+    /// let alt = Simd::from_array([-5, -4, -3, -2]);
+    ///
+    /// let result = Simd::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, Simd::from_array([-5, 13, 10, 15]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn gather_or(slice: &[T], idxs: Simd<usize, LANES>, or: Self) -> Self {
+        Self::gather_select(slice, Mask::splat(true), idxs, or)
+    }
+
+    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+    /// If an index is out-of-bounds, the lane is set to the default value for the type.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+    ///
+    /// let result = Simd::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, Simd::from_array([0, 13, 10, 15]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn gather_or_default(slice: &[T], idxs: Simd<usize, LANES>) -> Self
+    where
+        T: Default,
+    {
+        Self::gather_or(slice, idxs, Self::splat(T::default()))
+    }
+
+    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+    /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+    /// If an index is disabled or is out-of-bounds, the lane is selected from the `or` vector.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::{Simd, Mask};
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+    /// let alt = Simd::from_array([-5, -4, -3, -2]);
+    /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+    ///
+    /// let result = Simd::gather_select(&vec, enable, idxs, alt); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn gather_select(
+        slice: &[T],
+        enable: Mask<isize, LANES>,
+        idxs: Simd<usize, LANES>,
+        or: Self,
+    ) -> Self {
+        let enable: Mask<isize, LANES> = enable & idxs.simd_lt(Simd::splat(slice.len()));
+        // Safety: We have masked-off out-of-bounds lanes.
+        unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
+    }
+
+    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+    /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+    /// If an index is disabled, the lane is selected from the `or` vector.
+    ///
+    /// # Safety
+    ///
+    /// Calling this function with an `enable`d out-of-bounds index is *[undefined behavior]*
+    /// even if the resulting value is not used.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdPartialOrd, Mask};
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+    /// let alt = Simd::from_array([-5, -4, -3, -2]);
+    /// let enable = Mask::from_array([true, true, true, false]); // Note the final mask lane.
+    /// // If this mask was used to gather, it would be unsound. Let's fix that.
+    /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len()));
+    ///
+    /// // We have masked the OOB lane, so it's safe to gather now.
+    /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) };
+    /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2]));
+    /// ```
+    /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+    #[must_use]
+    #[inline]
+    pub unsafe fn gather_select_unchecked(
+        slice: &[T],
+        enable: Mask<isize, LANES>,
+        idxs: Simd<usize, LANES>,
+        or: Self,
+    ) -> Self {
+        let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
+        // Ferris forgive me, I have done pointer arithmetic here.
+        let ptrs = base_ptr.wrapping_add(idxs);
+        // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+        unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) }
+    }
+
+    /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
+    /// If two lanes in the scattered vector would write to the same index
+    /// only the last lane is guaranteed to actually be written.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 0]);
+    /// let vals = Simd::from_array([-27, 82, -41, 124]);
+    ///
+    /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
+    /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
+    /// ```
+    #[inline]
+    pub fn scatter(self, slice: &mut [T], idxs: Simd<usize, LANES>) {
+        self.scatter_select(slice, Mask::splat(true), idxs)
+    }
+
+    /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`.
+    /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+    /// If an enabled index is out-of-bounds, the lane is not written.
+    /// If two enabled lanes in the scattered vector would write to the same index,
+    /// only the last lane is guaranteed to actually be written.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, Mask};
+    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 0]);
+    /// let vals = Simd::from_array([-27, 82, -41, 124]);
+    /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+    ///
+    /// vals.scatter_select(&mut vec, enable, idxs); // index 0's second write is masked, thus omitted.
+    /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+    /// ```
+    #[inline]
+    pub fn scatter_select(
+        self,
+        slice: &mut [T],
+        enable: Mask<isize, LANES>,
+        idxs: Simd<usize, LANES>,
+    ) {
+        let enable: Mask<isize, LANES> = enable & idxs.simd_lt(Simd::splat(slice.len()));
+        // Safety: We have masked-off out-of-bounds lanes.
+        unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
+    }
+
+    /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`.
+    /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+    /// If two enabled lanes in the scattered vector would write to the same index,
+    /// only the last lane is guaranteed to actually be written.
+    ///
+    /// # Safety
+    ///
+    /// Calling this function with an enabled out-of-bounds index is *[undefined behavior]*,
+    /// and may lead to memory corruption.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdPartialOrd, Mask};
+    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 0]);
+    /// let vals = Simd::from_array([-27, 82, -41, 124]);
+    /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+    /// // If this mask was used to scatter, it would be unsound. Let's fix that.
+    /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len()));
+    ///
+    /// // We have masked the OOB lane, so it's safe to scatter now.
+    /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); }
+    /// // index 0's second write is masked, thus was omitted.
+    /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+    /// ```
+    /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+    #[inline]
+    pub unsafe fn scatter_select_unchecked(
+        self,
+        slice: &mut [T],
+        enable: Mask<isize, LANES>,
+        idxs: Simd<usize, LANES>,
+    ) {
+        // Safety: This block works with *mut T derived from &mut 'a [T],
+        // which means it is delicate in Rust's borrowing model, circa 2021:
+        // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
+        // Even though this block is largely safe methods, it must be exactly this way
+        // to prevent invalidating the raw ptrs while they're live.
+        // Thus, entering this block requires all values to use being already ready:
+        // 0. idxs we want to write to, which are used to construct the mask.
+        // 1. enable, which depends on an initial &'a [T] and the idxs.
+        // 2. actual values to scatter (self).
+        // 3. &mut [T] which will become our base ptr.
+        unsafe {
+            // Now Entering ☢️ *mut T Zone
+            let base_ptr = crate::simd::ptr::SimdMutPtr::splat(slice.as_mut_ptr());
+            // Ferris forgive me, I have done pointer arithmetic here.
+            let ptrs = base_ptr.wrapping_add(idxs);
+            // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
+            intrinsics::simd_scatter(self, ptrs, enable.to_int())
+            // Cleared ☢️ *mut T Zone
+        }
+    }
+}
+
+impl<T, const LANES: usize> Copy for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T, const LANES: usize> Default for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + Default,
+{
+    #[inline]
+    fn default() -> Self {
+        Self::splat(T::default())
+    }
+}
+
+impl<T, const LANES: usize> PartialEq for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + PartialEq,
+{
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.
+        let mask = unsafe {
+            let tfvec: Simd<<T as SimdElement>::Mask, LANES> = intrinsics::simd_eq(*self, *other);
+            Mask::from_int_unchecked(tfvec)
+        };
+
+        // Two vectors are equal if all lanes tested true for vertical equality.
+        mask.all()
+    }
+
+    #[allow(clippy::partialeq_ne_impl)]
+    #[inline]
+    fn ne(&self, other: &Self) -> bool {
+        // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.
+        let mask = unsafe {
+            let tfvec: Simd<<T as SimdElement>::Mask, LANES> = intrinsics::simd_ne(*self, *other);
+            Mask::from_int_unchecked(tfvec)
+        };
+
+        // Two vectors are non-equal if any lane tested true for vertical non-equality.
+        mask.any()
+    }
+}
+
+impl<T, const LANES: usize> PartialOrd for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + PartialOrd,
+{
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        // TODO use SIMD equality
+        self.to_array().partial_cmp(other.as_ref())
+    }
+}
+
+impl<T, const LANES: usize> Eq for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + Eq,
+{
+}
+
+impl<T, const LANES: usize> Ord for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + Ord,
+{
+    #[inline]
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        // TODO use SIMD equality
+        self.to_array().cmp(other.as_ref())
+    }
+}
+
+impl<T, const LANES: usize> core::hash::Hash for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + core::hash::Hash,
+{
+    #[inline]
+    fn hash<H>(&self, state: &mut H)
+    where
+        H: core::hash::Hasher,
+    {
+        self.as_array().hash(state)
+    }
+}
+
+// array references
+impl<T, const LANES: usize> AsRef<[T; LANES]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    #[inline]
+    fn as_ref(&self) -> &[T; LANES] {
+        &self.0
+    }
+}
+
+impl<T, const LANES: usize> AsMut<[T; LANES]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    #[inline]
+    fn as_mut(&mut self) -> &mut [T; LANES] {
+        &mut self.0
+    }
+}
+
+// slice references
+impl<T, const LANES: usize> AsRef<[T]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    #[inline]
+    fn as_ref(&self) -> &[T] {
+        &self.0
+    }
+}
+
+impl<T, const LANES: usize> AsMut<[T]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    #[inline]
+    fn as_mut(&mut self) -> &mut [T] {
+        &mut self.0
+    }
+}
+
+// vector/array conversion
+impl<T, const LANES: usize> From<[T; LANES]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    fn from(array: [T; LANES]) -> Self {
+        Self(array)
+    }
+}
+
+impl<T, const LANES: usize> From<Simd<T, LANES>> for [T; LANES]
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    fn from(vector: Simd<T, LANES>) -> Self {
+        vector.to_array()
+    }
+}
+
+mod sealed {
+    pub trait Sealed {}
+}
+use sealed::Sealed;
+
+/// Marker trait for types that may be used as SIMD vector elements.
+///
+/// # Safety
+/// This trait, when implemented, asserts the compiler can monomorphize
+/// `#[repr(simd)]` structs with the marked type as an element.
+/// Strictly, it is valid to impl if the vector will not be miscompiled.
+/// Practically, it is user-unfriendly to impl it if the vector won't compile,
+/// even when no soundness guarantees are broken by allowing the user to try.
+pub unsafe trait SimdElement: Sealed + Copy {
+    /// The mask element type corresponding to this element type.
+    type Mask: MaskElement;
+}
+
+impl Sealed for u8 {}
+
+// Safety: u8 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for u8 {
+    type Mask = i8;
+}
+
+impl Sealed for u16 {}
+
+// Safety: u16 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for u16 {
+    type Mask = i16;
+}
+
+impl Sealed for u32 {}
+
+// Safety: u32 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for u32 {
+    type Mask = i32;
+}
+
+impl Sealed for u64 {}
+
+// Safety: u64 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for u64 {
+    type Mask = i64;
+}
+
+impl Sealed for usize {}
+
+// Safety: usize is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for usize {
+    type Mask = isize;
+}
+
+impl Sealed for i8 {}
+
+// Safety: i8 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for i8 {
+    type Mask = i8;
+}
+
+impl Sealed for i16 {}
+
+// Safety: i16 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for i16 {
+    type Mask = i16;
+}
+
+impl Sealed for i32 {}
+
+// Safety: i32 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for i32 {
+    type Mask = i32;
+}
+
+impl Sealed for i64 {}
+
+// Safety: i64 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for i64 {
+    type Mask = i64;
+}
+
+impl Sealed for isize {}
+
+// Safety: isize is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for isize {
+    type Mask = isize;
+}
+
+impl Sealed for f32 {}
+
+// Safety: f32 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for f32 {
+    type Mask = i32;
+}
+
+impl Sealed for f64 {}
+
+// Safety: f64 is a valid SIMD element type, and is supported by this API
+unsafe impl SimdElement for f64 {
+    type Mask = i64;
+}
diff --git a/library/portable-simd/crates/core_simd/src/vector/float.rs b/library/portable-simd/crates/core_simd/src/vector/float.rs
new file mode 100644
index 000000000..f836c99b1
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vector/float.rs
@@ -0,0 +1,24 @@
+#![allow(non_camel_case_types)]
+
+use crate::simd::Simd;
+
+/// A 64-bit SIMD vector with two elements of type `f32`.
+pub type f32x2 = Simd<f32, 2>;
+
+/// A 128-bit SIMD vector with four elements of type `f32`.
+pub type f32x4 = Simd<f32, 4>;
+
+/// A 256-bit SIMD vector with eight elements of type `f32`.
+pub type f32x8 = Simd<f32, 8>;
+
+/// A 512-bit SIMD vector with 16 elements of type `f32`.
+pub type f32x16 = Simd<f32, 16>;
+
+/// A 128-bit SIMD vector with two elements of type `f64`.
+pub type f64x2 = Simd<f64, 2>;
+
+/// A 256-bit SIMD vector with four elements of type `f64`.
+pub type f64x4 = Simd<f64, 4>;
+
+/// A 512-bit SIMD vector with eight elements of type `f64`.
+pub type f64x8 = Simd<f64, 8>;
diff --git a/library/portable-simd/crates/core_simd/src/vector/int.rs b/library/portable-simd/crates/core_simd/src/vector/int.rs
new file mode 100644
index 000000000..20e56c7dc
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vector/int.rs
@@ -0,0 +1,63 @@
+#![allow(non_camel_case_types)]
+
+use crate::simd::Simd;
+
+/// A SIMD vector with two elements of type `isize`.
+pub type isizex2 = Simd<isize, 2>;
+
+/// A SIMD vector with four elements of type `isize`.
+pub type isizex4 = Simd<isize, 4>;
+
+/// A SIMD vector with eight elements of type `isize`.
+pub type isizex8 = Simd<isize, 8>;
+
+/// A 32-bit SIMD vector with two elements of type `i16`.
+pub type i16x2 = Simd<i16, 2>;
+
+/// A 64-bit SIMD vector with four elements of type `i16`.
+pub type i16x4 = Simd<i16, 4>;
+
+/// A 128-bit SIMD vector with eight elements of type `i16`.
+pub type i16x8 = Simd<i16, 8>;
+
+/// A 256-bit SIMD vector with 16 elements of type `i16`.
+pub type i16x16 = Simd<i16, 16>;
+
+/// A 512-bit SIMD vector with 32 elements of type `i16`.
+pub type i16x32 = Simd<i16, 32>;
+
+/// A 64-bit SIMD vector with two elements of type `i32`.
+pub type i32x2 = Simd<i32, 2>;
+
+/// A 128-bit SIMD vector with four elements of type `i32`.
+pub type i32x4 = Simd<i32, 4>;
+
+/// A 256-bit SIMD vector with eight elements of type `i32`.
+pub type i32x8 = Simd<i32, 8>;
+
+/// A 512-bit SIMD vector with 16 elements of type `i32`.
+pub type i32x16 = Simd<i32, 16>;
+
+/// A 128-bit SIMD vector with two elements of type `i64`.
+pub type i64x2 = Simd<i64, 2>;
+
+/// A 256-bit SIMD vector with four elements of type `i64`.
+pub type i64x4 = Simd<i64, 4>;
+
+/// A 512-bit SIMD vector with eight elements of type `i64`.
+pub type i64x8 = Simd<i64, 8>;
+
+/// A 32-bit SIMD vector with four elements of type `i8`.
+pub type i8x4 = Simd<i8, 4>;
+
+/// A 64-bit SIMD vector with eight elements of type `i8`.
+pub type i8x8 = Simd<i8, 8>;
+
+/// A 128-bit SIMD vector with 16 elements of type `i8`.
+pub type i8x16 = Simd<i8, 16>;
+
+/// A 256-bit SIMD vector with 32 elements of type `i8`.
+pub type i8x32 = Simd<i8, 32>;
+
+/// A 512-bit SIMD vector with 64 elements of type `i8`.
+pub type i8x64 = Simd<i8, 64>;
diff --git a/library/portable-simd/crates/core_simd/src/vector/ptr.rs b/library/portable-simd/crates/core_simd/src/vector/ptr.rs
new file mode 100644
index 000000000..fa756344d
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vector/ptr.rs
@@ -0,0 +1,51 @@
+//! Private implementation details of public gather/scatter APIs.
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+
+/// A vector of *const T.
+#[derive(Debug, Copy, Clone)]
+#[repr(simd)]
+pub(crate) struct SimdConstPtr<T, const LANES: usize>([*const T; LANES]);
+
+impl<T, const LANES: usize> SimdConstPtr<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: Sized,
+{
+    #[inline]
+    #[must_use]
+    pub fn splat(ptr: *const T) -> Self {
+        Self([ptr; LANES])
+    }
+
+    #[inline]
+    #[must_use]
+    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        // Safety: this intrinsic doesn't have a precondition
+        unsafe { intrinsics::simd_arith_offset(self, addend) }
+    }
+}
+
+/// A vector of *mut T. Be very careful around potential aliasing.
+#[derive(Debug, Copy, Clone)]
+#[repr(simd)]
+pub(crate) struct SimdMutPtr<T, const LANES: usize>([*mut T; LANES]);
+
+impl<T, const LANES: usize> SimdMutPtr<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: Sized,
+{
+    #[inline]
+    #[must_use]
+    pub fn splat(ptr: *mut T) -> Self {
+        Self([ptr; LANES])
+    }
+
+    #[inline]
+    #[must_use]
+    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        // Safety: this intrinsic doesn't have a precondition
+        unsafe { intrinsics::simd_arith_offset(self, addend) }
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/src/vector/uint.rs b/library/portable-simd/crates/core_simd/src/vector/uint.rs
new file mode 100644
index 000000000..b4a69c443
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vector/uint.rs
@@ -0,0 +1,63 @@
+#![allow(non_camel_case_types)]
+
+use crate::simd::Simd;
+
+/// A SIMD vector with two elements of type `usize`.
+pub type usizex2 = Simd<usize, 2>;
+
+/// A SIMD vector with four elements of type `usize`.
+pub type usizex4 = Simd<usize, 4>;
+
+/// A SIMD vector with eight elements of type `usize`.
+pub type usizex8 = Simd<usize, 8>;
+
+/// A 32-bit SIMD vector with two elements of type `u16`.
+pub type u16x2 = Simd<u16, 2>;
+
+/// A 64-bit SIMD vector with four elements of type `u16`.
+pub type u16x4 = Simd<u16, 4>;
+
+/// A 128-bit SIMD vector with eight elements of type `u16`.
+pub type u16x8 = Simd<u16, 8>;
+
+/// A 256-bit SIMD vector with 16 elements of type `u16`.
+pub type u16x16 = Simd<u16, 16>;
+
+/// A 512-bit SIMD vector with 32 elements of type `u16`.
+pub type u16x32 = Simd<u16, 32>;
+
+/// A 64-bit SIMD vector with two elements of type `u32`.
+pub type u32x2 = Simd<u32, 2>;
+
+/// A 128-bit SIMD vector with four elements of type `u32`.
+pub type u32x4 = Simd<u32, 4>;
+
+/// A 256-bit SIMD vector with eight elements of type `u32`.
+pub type u32x8 = Simd<u32, 8>;
+
+/// A 512-bit SIMD vector with 16 elements of type `u32`.
+pub type u32x16 = Simd<u32, 16>;
+
+/// A 128-bit SIMD vector with two elements of type `u64`.
+pub type u64x2 = Simd<u64, 2>;
+
+/// A 256-bit SIMD vector with four elements of type `u64`.
+pub type u64x4 = Simd<u64, 4>;
+
+/// A 512-bit SIMD vector with eight elements of type `u64`.
+pub type u64x8 = Simd<u64, 8>;
+
+/// A 32-bit SIMD vector with four elements of type `u8`.
+pub type u8x4 = Simd<u8, 4>;
+
+/// A 64-bit SIMD vector with eight elements of type `u8`.
+pub type u8x8 = Simd<u8, 8>;
+
+/// A 128-bit SIMD vector with 16 elements of type `u8`.
+pub type u8x16 = Simd<u8, 16>;
+
+/// A 256-bit SIMD vector with 32 elements of type `u8`.
+pub type u8x32 = Simd<u8, 32>;
+
+/// A 512-bit SIMD vector with 64 elements of type `u8`.
+pub type u8x64 = Simd<u8, 64>;
diff --git a/library/portable-simd/crates/core_simd/src/vendor.rs b/library/portable-simd/crates/core_simd/src/vendor.rs
new file mode 100644
index 000000000..9fb70218c
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vendor.rs
@@ -0,0 +1,31 @@
+/// Provides implementations of `From<$a> for $b` and `From<$b> for $a` that transmutes the value.
+#[allow(unused)]
+macro_rules! from_transmute {
+    { unsafe $a:ty => $b:ty } => {
+        from_transmute!{ @impl $a => $b }
+        from_transmute!{ @impl $b => $a }
+    };
+    { @impl $from:ty => $to:ty } => {
+        impl core::convert::From<$from> for $to {
+            #[inline]
+            fn from(value: $from) -> $to {
+                // Safety: transmuting between vectors is safe, but the caller of this macro
+                // checks the invariants
+                unsafe { core::mem::transmute(value) }
+            }
+        }
+    };
+}
+
+/// Conversions to x86's SIMD types.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+mod x86;
+
+#[cfg(any(target_arch = "wasm32"))]
+mod wasm32;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm",))]
+mod arm;
+
+#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
+mod powerpc;
diff --git a/library/portable-simd/crates/core_simd/src/vendor/arm.rs b/library/portable-simd/crates/core_simd/src/vendor/arm.rs
new file mode 100644
index 000000000..ff3b69ccf
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vendor/arm.rs
@@ -0,0 +1,76 @@
+#![allow(unused)]
+use crate::simd::*;
+
+#[cfg(target_arch = "arm")]
+use core::arch::arm::*;
+
+#[cfg(target_arch = "aarch64")]
+use core::arch::aarch64::*;
+
+#[cfg(any(
+    target_arch = "aarch64",
+    all(target_arch = "arm", target_feature = "v7"),
+))]
+mod neon {
+    use super::*;
+
+    from_transmute! { unsafe f32x2 => float32x2_t }
+    from_transmute! { unsafe f32x4 => float32x4_t }
+
+    from_transmute! { unsafe u8x8 => uint8x8_t }
+    from_transmute! { unsafe u8x16 => uint8x16_t }
+    from_transmute! { unsafe i8x8 => int8x8_t }
+    from_transmute! { unsafe i8x16 => int8x16_t }
+    from_transmute! { unsafe u8x8 => poly8x8_t }
+    from_transmute! { unsafe u8x16 => poly8x16_t }
+
+    from_transmute! { unsafe u16x4 => uint16x4_t }
+    from_transmute! { unsafe u16x8 => uint16x8_t }
+    from_transmute! { unsafe i16x4 => int16x4_t }
+    from_transmute! { unsafe i16x8 => int16x8_t }
+    from_transmute! { unsafe u16x4 => poly16x4_t }
+    from_transmute! { unsafe u16x8 => poly16x8_t }
+
+    from_transmute! { unsafe u32x2 => uint32x2_t }
+    from_transmute! { unsafe u32x4 => uint32x4_t }
+    from_transmute! { unsafe i32x2 => int32x2_t }
+    from_transmute! { unsafe i32x4 => int32x4_t }
+
+    from_transmute! { unsafe Simd<u64, 1> => uint64x1_t }
+    from_transmute! { unsafe u64x2 => uint64x2_t }
+    from_transmute! { unsafe Simd<i64, 1> => int64x1_t }
+    from_transmute! { unsafe i64x2 => int64x2_t }
+    from_transmute! { unsafe Simd<u64, 1> => poly64x1_t }
+    from_transmute! { unsafe u64x2 => poly64x2_t }
+}
+
+#[cfg(any(
+    all(target_feature = "v5te", not(target_feature = "mclass")),
+    all(target_feature = "mclass", target_feature = "dsp"),
+))]
+mod dsp {
+    use super::*;
+
+    from_transmute! { unsafe Simd<u16, 2> => uint16x2_t }
+    from_transmute! { unsafe Simd<i16, 2> => int16x2_t }
+}
+
+#[cfg(any(
+    all(target_feature = "v6", not(target_feature = "mclass")),
+    all(target_feature = "mclass", target_feature = "dsp"),
+))]
+mod simd32 {
+    use super::*;
+
+    from_transmute! { unsafe Simd<u8, 4> => uint8x4_t }
+    from_transmute! { unsafe Simd<i8, 4> => int8x4_t }
+}
+
+#[cfg(target_arch = "aarch64")]
+mod aarch64 {
+    use super::neon::*;
+    use super::*;
+
+    from_transmute! { unsafe Simd<f64, 1> => float64x1_t }
+    from_transmute! { unsafe f64x2 => float64x2_t }
+}
diff --git a/library/portable-simd/crates/core_simd/src/vendor/powerpc.rs b/library/portable-simd/crates/core_simd/src/vendor/powerpc.rs
new file mode 100644
index 000000000..92f97d471
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vendor/powerpc.rs
@@ -0,0 +1,11 @@
+use crate::simd::*;
+
+#[cfg(target_arch = "powerpc")]
+use core::arch::powerpc::*;
+
+#[cfg(target_arch = "powerpc64")]
+use core::arch::powerpc64::*;
+
+from_transmute! { unsafe f64x2 => vector_double }
+from_transmute! { unsafe i64x2 => vector_signed_long }
+from_transmute! { unsafe u64x2 => vector_unsigned_long }
diff --git a/library/portable-simd/crates/core_simd/src/vendor/wasm32.rs b/library/portable-simd/crates/core_simd/src/vendor/wasm32.rs
new file mode 100644
index 000000000..ef3baf885
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vendor/wasm32.rs
@@ -0,0 +1,30 @@
+use crate::simd::*;
+use core::arch::wasm32::v128;
+
+from_transmute! { unsafe u8x16 => v128 }
+from_transmute! { unsafe i8x16 => v128 }
+
+from_transmute! { unsafe u16x8 => v128 }
+from_transmute! { unsafe i16x8 => v128 }
+
+from_transmute! { unsafe u32x4 => v128 }
+from_transmute! { unsafe i32x4 => v128 }
+from_transmute! { unsafe f32x4 => v128 }
+
+from_transmute! { unsafe u64x2 => v128 }
+from_transmute! { unsafe i64x2 => v128 }
+from_transmute! { unsafe f64x2 => v128 }
+
+#[cfg(target_pointer_width = "32")]
+mod p32 {
+    use super::*;
+    from_transmute! { unsafe usizex4 => v128 }
+    from_transmute! { unsafe isizex4 => v128 }
+}
+
+#[cfg(target_pointer_width = "64")]
+mod p64 {
+    use super::*;
+    from_transmute! { unsafe usizex2 => v128 }
+    from_transmute! { unsafe isizex2 => v128 }
+}
diff --git a/library/portable-simd/crates/core_simd/src/vendor/x86.rs b/library/portable-simd/crates/core_simd/src/vendor/x86.rs
new file mode 100644
index 000000000..0dd47015e
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vendor/x86.rs
@@ -0,0 +1,63 @@
+use crate::simd::*;
+
+#[cfg(any(target_arch = "x86"))]
+use core::arch::x86::*;
+
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+from_transmute! { unsafe u8x16 => __m128i }
+from_transmute! { unsafe u8x32 => __m256i }
+from_transmute! { unsafe u8x64 => __m512i }
+from_transmute! { unsafe i8x16 => __m128i }
+from_transmute! { unsafe i8x32 => __m256i }
+from_transmute! { unsafe i8x64 => __m512i }
+
+from_transmute! { unsafe u16x8 => __m128i }
+from_transmute! { unsafe u16x16 => __m256i }
+from_transmute! { unsafe u16x32 => __m512i }
+from_transmute! { unsafe i16x8 => __m128i }
+from_transmute! { unsafe i16x16 => __m256i }
+from_transmute! { unsafe i16x32 => __m512i }
+
+from_transmute! { unsafe u32x4 => __m128i }
+from_transmute! { unsafe u32x8 => __m256i }
+from_transmute! { unsafe u32x16 => __m512i }
+from_transmute! { unsafe i32x4 => __m128i }
+from_transmute! { unsafe i32x8 => __m256i }
+from_transmute! { unsafe i32x16 => __m512i }
+from_transmute! { unsafe f32x4 => __m128 }
+from_transmute! { unsafe f32x8 => __m256 }
+from_transmute! { unsafe f32x16 => __m512 }
+
+from_transmute! { unsafe u64x2 => __m128i }
+from_transmute! { unsafe u64x4 => __m256i }
+from_transmute! { unsafe u64x8 => __m512i }
+from_transmute! { unsafe i64x2 => __m128i }
+from_transmute! { unsafe i64x4 => __m256i }
+from_transmute! { unsafe i64x8 => __m512i }
+from_transmute! { unsafe f64x2 => __m128d }
+from_transmute! { unsafe f64x4 => __m256d }
+from_transmute! { unsafe f64x8 => __m512d }
+
+#[cfg(target_pointer_width = "32")]
+mod p32 {
+    use super::*;
+    from_transmute! { unsafe usizex4 => __m128i }
+    from_transmute! { unsafe usizex8 => __m256i }
+    from_transmute! { unsafe Simd<usize, 16> => __m512i }
+    from_transmute! { unsafe isizex4 => __m128i }
+    from_transmute! { unsafe isizex8 => __m256i }
+    from_transmute! { unsafe Simd<isize, 16> => __m512i }
+}
+
+#[cfg(target_pointer_width = "64")]
+mod p64 {
+    use super::*;
+    from_transmute! { unsafe usizex2 => __m128i }
+    from_transmute! { unsafe usizex4 => __m256i }
+    from_transmute! { unsafe usizex8 => __m512i }
+    from_transmute! { unsafe isizex2 => __m128i }
+    from_transmute! { unsafe isizex4 => __m256i }
+    from_transmute! { unsafe isizex8 => __m512i }
+}
diff --git a/library/portable-simd/crates/core_simd/tests/autoderef.rs b/library/portable-simd/crates/core_simd/tests/autoderef.rs
new file mode 100644
index 000000000..9359da16e
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/autoderef.rs
@@ -0,0 +1,22 @@
+// Test that we handle all our "auto-deref" cases correctly.
+#![feature(portable_simd)]
+use core_simd::f32x4;
+
+#[cfg(target_arch = "wasm32")]
+use wasm_bindgen_test::*;
+
+#[cfg(target_arch = "wasm32")]
+wasm_bindgen_test_configure!(run_in_browser);
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn deref() {
+    let x = f32x4::splat(1.0);
+    let y = f32x4::splat(2.0);
+    let a = &x;
+    let b = &y;
+    assert_eq!(f32x4::splat(3.0), x + y);
+    assert_eq!(f32x4::splat(3.0), x + b);
+    assert_eq!(f32x4::splat(3.0), a + y);
+    assert_eq!(f32x4::splat(3.0), a + b);
+}
diff --git a/library/portable-simd/crates/core_simd/tests/cast.rs b/library/portable-simd/crates/core_simd/tests/cast.rs
new file mode 100644
index 000000000..ab5650f07
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/cast.rs
@@ -0,0 +1,37 @@
+#![feature(portable_simd)]
+macro_rules! cast_types {
+    ($start:ident, $($target:ident),*) => {
+        mod $start {
+            use core_simd::simd::Simd;
+            type Vector<const N: usize> = Simd<$start, N>;
+            $(
+                mod $target {
+                    use super::*;
+                    test_helpers::test_lanes! {
+                        fn cast_as<const N: usize>() {
+                            test_helpers::test_unary_elementwise(
+                                &Vector::<N>::cast::<$target>,
+                                &|x| x as $target,
+                                &|_| true,
+                            )
+                        }
+                    }
+                }
+            )*
+        }
+    };
+}
+
+// The hypothesis is that widening conversions aren't terribly interesting.
+cast_types!(f32, f64, i8, u8, usize, isize);
+cast_types!(f64, f32, i8, u8, usize, isize);
+cast_types!(i8, u8, f32);
+cast_types!(u8, i8, f32);
+cast_types!(i16, u16, i8, u8, f32);
+cast_types!(u16, i16, i8, u8, f32);
+cast_types!(i32, u32, i8, u8, f32, f64);
+cast_types!(u32, i32, i8, u8, f32, f64);
+cast_types!(i64, u64, i8, u8, isize, usize, f32, f64);
+cast_types!(u64, i64, i8, u8, isize, usize, f32, f64);
+cast_types!(isize, usize, i8, u8, f32, f64);
+cast_types!(usize, isize, i8, u8, f32, f64);
diff --git a/library/portable-simd/crates/core_simd/tests/f32_ops.rs b/library/portable-simd/crates/core_simd/tests/f32_ops.rs
new file mode 100644
index 000000000..414a832b1
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/f32_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_float_tests! { f32, i32 }
diff --git a/library/portable-simd/crates/core_simd/tests/f64_ops.rs b/library/portable-simd/crates/core_simd/tests/f64_ops.rs
new file mode 100644
index 000000000..e0a1fa33f
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/f64_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_float_tests! { f64, i64 }
diff --git a/library/portable-simd/crates/core_simd/tests/i16_ops.rs b/library/portable-simd/crates/core_simd/tests/i16_ops.rs
new file mode 100644
index 000000000..f6c5d74fb
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/i16_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_signed_tests! { i16 }
diff --git a/library/portable-simd/crates/core_simd/tests/i32_ops.rs b/library/portable-simd/crates/core_simd/tests/i32_ops.rs
new file mode 100644
index 000000000..69a831c52
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/i32_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_signed_tests! { i32 }
diff --git a/library/portable-simd/crates/core_simd/tests/i64_ops.rs b/library/portable-simd/crates/core_simd/tests/i64_ops.rs
new file mode 100644
index 000000000..37ac08117
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/i64_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_signed_tests! { i64 }
diff --git a/library/portable-simd/crates/core_simd/tests/i8_ops.rs b/library/portable-simd/crates/core_simd/tests/i8_ops.rs
new file mode 100644
index 000000000..11e4a5cd6
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/i8_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_signed_tests! { i8 }
diff --git a/library/portable-simd/crates/core_simd/tests/isize_ops.rs b/library/portable-simd/crates/core_simd/tests/isize_ops.rs
new file mode 100644
index 000000000..5cc9de2b7
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/isize_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_signed_tests! { isize }
diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops.rs b/library/portable-simd/crates/core_simd/tests/mask_ops.rs
new file mode 100644
index 000000000..f113b50cb
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/mask_ops.rs
@@ -0,0 +1,3 @@
+#![feature(portable_simd)]
+
+mod mask_ops_impl;
diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask16.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask16.rs
new file mode 100644
index 000000000..0fe82fa68
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask16.rs
@@ -0,0 +1,4 @@
+mask_tests! { mask16x4, 4 }
+mask_tests! { mask16x8, 8 }
+mask_tests! { mask16x16, 16 }
+mask_tests! { mask16x32, 32 }
diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask32.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask32.rs
new file mode 100644
index 000000000..66d987a43
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask32.rs
@@ -0,0 +1,4 @@
+mask_tests! { mask32x2, 2 }
+mask_tests! { mask32x4, 4 }
+mask_tests! { mask32x8, 8 }
+mask_tests! { mask32x16, 16 }
diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask64.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask64.rs
new file mode 100644
index 000000000..a1f1f67b2
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask64.rs
@@ -0,0 +1,3 @@
+mask_tests! { mask64x2, 2 }
+mask_tests! { mask64x4, 4 }
+mask_tests! { mask64x8, 8 }
diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask8.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask8.rs
new file mode 100644
index 000000000..9c06fbc04
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask8.rs
@@ -0,0 +1,3 @@
+mask_tests! { mask8x8, 8 }
+mask_tests! { mask8x16, 16 }
+mask_tests! { mask8x32, 32 }
diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask_macros.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask_macros.rs
new file mode 100644
index 000000000..795f9e27c
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask_macros.rs
@@ -0,0 +1,225 @@
+macro_rules! mask_tests {
+    { $vector:ident, $lanes:literal } => {
+        #[cfg(test)]
+        mod $vector {
+            use core_simd::$vector as Vector;
+            const LANES: usize = $lanes;
+
+            #[cfg(target_arch = "wasm32")]
+            use wasm_bindgen_test::*;
+
+            #[cfg(target_arch = "wasm32")]
+            wasm_bindgen_test_configure!(run_in_browser);
+
+            fn from_slice(slice: &[bool]) -> Vector {
+                let mut value = Vector::default();
+                for (i, b) in slice.iter().take(LANES).enumerate() {
+                    value.set(i, *b);
+                }
+                value
+            }
+
+            fn apply_unary_lanewise(x: Vector, f: impl Fn(bool) -> bool) -> Vector {
+                let mut value = Vector::default();
+                for i in 0..LANES {
+                    value.set(i, f(x.test(i)));
+                }
+                value
+            }
+
+            fn apply_binary_lanewise(x: Vector, y: Vector, f: impl Fn(bool, bool) -> bool) -> Vector {
+                let mut value = Vector::default();
+                for i in 0..LANES {
+                    value.set(i, f(x.test(i), y.test(i)));
+                }
+                value
+            }
+
+            fn apply_binary_scalar_lhs_lanewise(x: bool, mut y: Vector, f: impl Fn(bool, bool) -> bool) -> Vector {
+                for i in 0..LANES {
+                    y.set(i, f(x, y.test(i)));
+                }
+                y
+            }
+
+            fn apply_binary_scalar_rhs_lanewise(mut x: Vector, y: bool, f: impl Fn(bool, bool) -> bool) -> Vector {
+                for i in 0..LANES {
+                    x.set(i, f(x.test(i), y));
+                }
+                x
+            }
+
+            const A: [bool; 64] = [
+                false, true, false, true, false, false, true, true,
+                false, true, false, true, false, false, true, true,
+                false, true, false, true, false, false, true, true,
+                false, true, false, true, false, false, true, true,
+                false, true, false, true, false, false, true, true,
+                false, true, false, true, false, false, true, true,
+                false, true, false, true, false, false, true, true,
+                false, true, false, true, false, false, true, true,
+            ];
+            const B: [bool; 64] = [
+                false, false, true, true, false, true, false, true,
+                false, false, true, true, false, true, false, true,
+                false, false, true, true, false, true, false, true,
+                false, false, true, true, false, true, false, true,
+                false, false, true, true, false, true, false, true,
+                false, false, true, true, false, true, false, true,
+                false, false, true, true, false, true, false, true,
+                false, false, true, true, false, true, false, true,
+            ];
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitand() {
+                let a = from_slice(&A);
+                let b = from_slice(&B);
+                let expected = apply_binary_lanewise(a, b, core::ops::BitAnd::bitand);
+                assert_eq!(a & b, expected);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitand_assign() {
+                let mut a = from_slice(&A);
+                let b = from_slice(&B);
+                let expected = apply_binary_lanewise(a, b, core::ops::BitAnd::bitand);
+                a &= b;
+                assert_eq!(a, expected);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitand_scalar_rhs() {
+                let a = from_slice(&A);
+                let expected = a;
+                assert_eq!(a & true, expected);
+                assert_eq!(a & false, Vector::splat(false));
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitand_scalar_lhs() {
+                let a = from_slice(&A);
+                let expected = a;
+                assert_eq!(true & a, expected);
+                assert_eq!(false & a, Vector::splat(false));
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitand_assign_scalar() {
+                let mut a = from_slice(&A);
+                let expected = a;
+                a &= true;
+                assert_eq!(a, expected);
+                a &= false;
+                assert_eq!(a, Vector::splat(false));
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitor() {
+                let a = from_slice(&A);
+                let b = from_slice(&B);
+                let expected = apply_binary_lanewise(a, b, core::ops::BitOr::bitor);
+                assert_eq!(a | b, expected);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitor_assign() {
+                let mut a = from_slice(&A);
+                let b = from_slice(&B);
+                let expected = apply_binary_lanewise(a, b, core::ops::BitOr::bitor);
+                a |= b;
+                assert_eq!(a, expected);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitor_scalar_rhs() {
+                let a = from_slice(&A);
+                assert_eq!(a | false, a);
+                assert_eq!(a | true, Vector::splat(true));
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitor_scalar_lhs() {
+                let a = from_slice(&A);
+                assert_eq!(false | a, a);
+                assert_eq!(true | a, Vector::splat(true));
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitor_assign_scalar() {
+                let mut a = from_slice(&A);
+                let expected = a;
+                a |= false;
+                assert_eq!(a, expected);
+                a |= true;
+                assert_eq!(a, Vector::splat(true));
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitxor() {
+                let a = from_slice(&A);
+                let b = from_slice(&B);
+                let expected = apply_binary_lanewise(a, b, core::ops::BitXor::bitxor);
+                assert_eq!(a ^ b, expected);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitxor_assign() {
+                let mut a = from_slice(&A);
+                let b = from_slice(&B);
+                let expected = apply_binary_lanewise(a, b, core::ops::BitXor::bitxor);
+                a ^= b;
+                assert_eq!(a, expected);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitxor_scalar_rhs() {
+                let a = from_slice(&A);
+                let expected = apply_binary_scalar_rhs_lanewise(a, true, core::ops::BitXor::bitxor);
+                assert_eq!(a ^ false, a);
+                assert_eq!(a ^ true, expected);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitxor_scalar_lhs() {
+                let a = from_slice(&A);
+                let expected = apply_binary_scalar_lhs_lanewise(true, a, core::ops::BitXor::bitxor);
+                assert_eq!(false ^ a, a);
+                assert_eq!(true ^ a, expected);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn bitxor_assign_scalar() {
+                let mut a = from_slice(&A);
+                let expected_unset = a;
+                let expected_set = apply_binary_scalar_rhs_lanewise(a, true, core::ops::BitXor::bitxor);
+                a ^= false;
+                assert_eq!(a, expected_unset);
+                a ^= true;
+                assert_eq!(a, expected_set);
+            }
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn not() {
+                let v = from_slice(&A);
+                let expected = apply_unary_lanewise(v, core::ops::Not::not);
+                assert_eq!(!v, expected);
+            }
+        }
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/masksize.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/masksize.rs
new file mode 100644
index 000000000..e0a44d870
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/masksize.rs
@@ -0,0 +1,3 @@
+mask_tests! { masksizex2, 2 }
+mask_tests! { masksizex4, 4 }
+mask_tests! { masksizex8, 8 }
diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mod.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mod.rs
new file mode 100644
index 000000000..b9ec8462a
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mod.rs
@@ -0,0 +1,9 @@
+#[macro_use]
+mod mask_macros;
+
+#[rustfmt::skip]
+mod mask8;
+mod mask16;
+mod mask32;
+mod mask64;
+mod masksize;
diff --git a/library/portable-simd/crates/core_simd/tests/masks.rs b/library/portable-simd/crates/core_simd/tests/masks.rs
new file mode 100644
index 000000000..673d0db93
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/masks.rs
@@ -0,0 +1,158 @@
+#![feature(portable_simd)]
+
+#[cfg(target_arch = "wasm32")]
+use wasm_bindgen_test::*;
+
+#[cfg(target_arch = "wasm32")]
+wasm_bindgen_test_configure!(run_in_browser);
+
+macro_rules! test_mask_api {
+    { $type:ident } => {
+        #[allow(non_snake_case)]
+        mod $type {
+            #[cfg(target_arch = "wasm32")]
+            use wasm_bindgen_test::*;
+
+            #[test]
+            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+            fn set_and_test() {
+                let values = [true, false, false, true, false, false, true, false];
+                let mut mask = core_simd::Mask::<$type, 8>::splat(false);
+                for (lane, value) in values.iter().copied().enumerate() {
+                    mask.set(lane, value);
+                }
+                for (lane, value) in values.iter().copied().enumerate() {
+                    assert_eq!(mask.test(lane), value);
+                }
+            }
+
+            #[test]
+            #[should_panic]
+            fn set_invalid_lane() {
+                let mut mask = core_simd::Mask::<$type, 8>::splat(false);
+                mask.set(8, true);
+                let _ = mask;
+            }
+
+            #[test]
+            #[should_panic]
+            fn test_invalid_lane() {
+                let mask = core_simd::Mask::<$type, 8>::splat(false);
+                let _ = mask.test(8);
+            }
+
+            #[test]
+            fn any() {
+                assert!(!core_simd::Mask::<$type, 8>::splat(false).any());
+                assert!(core_simd::Mask::<$type, 8>::splat(true).any());
+                let mut v = core_simd::Mask::<$type, 8>::splat(false);
+                v.set(2, true);
+                assert!(v.any());
+            }
+
+            #[test]
+            fn all() {
+                assert!(!core_simd::Mask::<$type, 8>::splat(false).all());
+                assert!(core_simd::Mask::<$type, 8>::splat(true).all());
+                let mut v = core_simd::Mask::<$type, 8>::splat(false);
+                v.set(2, true);
+                assert!(!v.all());
+            }
+
+            #[test]
+            fn roundtrip_int_conversion() {
+                let values = [true, false, false, true, false, false, true, false];
+                let mask = core_simd::Mask::<$type, 8>::from_array(values);
+                let int = mask.to_int();
+                assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
+                assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
+            }
+
+            #[test]
+            fn roundtrip_bitmask_conversion() {
+                use core_simd::ToBitMask;
+                let values = [
+                    true, false, false, true, false, false, true, false,
+                    true, true, false, false, false, false, false, true,
+                ];
+                let mask = core_simd::Mask::<$type, 16>::from_array(values);
+                let bitmask = mask.to_bitmask();
+                assert_eq!(bitmask, 0b1000001101001001);
+                assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
+            }
+
+            #[test]
+            fn roundtrip_bitmask_conversion_short() {
+                use core_simd::ToBitMask;
+
+                let values = [
+                    false, false, false, true,
+                ];
+                let mask = core_simd::Mask::<$type, 4>::from_array(values);
+                let bitmask = mask.to_bitmask();
+                assert_eq!(bitmask, 0b1000);
+                assert_eq!(core_simd::Mask::<$type, 4>::from_bitmask(bitmask), mask);
+
+                let values = [true, false];
+                let mask = core_simd::Mask::<$type, 2>::from_array(values);
+                let bitmask = mask.to_bitmask();
+                assert_eq!(bitmask, 0b01);
+                assert_eq!(core_simd::Mask::<$type, 2>::from_bitmask(bitmask), mask);
+            }
+
+            #[test]
+            fn cast() {
+                fn cast_impl<T: core_simd::MaskElement>()
+                where
+                    core_simd::Mask<$type, 8>: Into<core_simd::Mask<T, 8>>,
+                {
+                    let values = [true, false, false, true, false, false, true, false];
+                    let mask = core_simd::Mask::<$type, 8>::from_array(values);
+
+                    let cast_mask = mask.cast::<T>();
+                    assert_eq!(values, cast_mask.to_array());
+
+                    let into_mask: core_simd::Mask<T, 8> = mask.into();
+                    assert_eq!(values, into_mask.to_array());
+                }
+
+                cast_impl::<i8>();
+                cast_impl::<i16>();
+                cast_impl::<i32>();
+                cast_impl::<i64>();
+                cast_impl::<isize>();
+            }
+
+            #[cfg(feature = "generic_const_exprs")]
+            #[test]
+            fn roundtrip_bitmask_array_conversion() {
+                use core_simd::ToBitMaskArray;
+                let values = [
+                    true, false, false, true, false, false, true, false,
+                    true, true, false, false, false, false, false, true,
+                ];
+                let mask = core_simd::Mask::<$type, 16>::from_array(values);
+                let bitmask = mask.to_bitmask_array();
+                assert_eq!(bitmask, [0b01001001, 0b10000011]);
+                assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask_array(bitmask), mask);
+            }
+        }
+    }
+}
+
+mod mask_api {
+    test_mask_api! { i8 }
+    test_mask_api! { i16 }
+    test_mask_api! { i32 }
+    test_mask_api! { i64 }
+    test_mask_api! { isize }
+}
+
+#[test]
+fn convert() {
+    let values = [true, false, false, true, false, false, true, false];
+    assert_eq!(
+        core_simd::Mask::<i8, 8>::from_array(values),
+        core_simd::Mask::<i32, 8>::from_array(values).into()
+    );
+}
diff --git a/library/portable-simd/crates/core_simd/tests/ops_macros.rs b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
new file mode 100644
index 000000000..f759394d0
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
@@ -0,0 +1,607 @@
+/// Implements a test on a unary operation using proptest.
+///
+/// Compares the vector operation to the equivalent scalar operation.
+#[macro_export]
+macro_rules! impl_unary_op_test {
+    { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
+        test_helpers::test_lanes! {
+            fn $fn<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &<core_simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+                    &$scalar_fn,
+                    &|_| true,
+                );
+            }
+        }
+    };
+    { $scalar:ty, $trait:ident :: $fn:ident } => {
+        impl_unary_op_test! { $scalar, $trait::$fn, <$scalar as core::ops::$trait>::$fn }
+    };
+}
+
+/// Implements a test on a binary operation using proptest.
+///
+/// Compares the vector operation to the equivalent scalar operation.
+#[macro_export]
+macro_rules! impl_binary_op_test {
+    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr } => {
+        mod $fn {
+            use super::*;
+            use core_simd::Simd;
+
+            test_helpers::test_lanes! {
+                fn normal<const LANES: usize>() {
+                    test_helpers::test_binary_elementwise(
+                        &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+                        &$scalar_fn,
+                        &|_, _| true,
+                    );
+                }
+
+                fn assign<const LANES: usize>() {
+                    test_helpers::test_binary_elementwise(
+                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
+                        &$scalar_fn,
+                        &|_, _| true,
+                    );
+                }
+            }
+        }
+    };
+    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident } => {
+        impl_binary_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn }
+    };
+}
+
+/// Implements a test on a binary operation using proptest.
+///
+/// Like `impl_binary_op_test`, but allows providing a function for rejecting particular inputs
+/// (like the `proptest_assume` macro).
+///
+/// Compares the vector operation to the equivalent scalar operation.
+#[macro_export]
+macro_rules! impl_binary_checked_op_test {
+    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr, $check_fn:expr } => {
+        mod $fn {
+            use super::*;
+            use core_simd::Simd;
+
+            test_helpers::test_lanes! {
+                fn normal<const LANES: usize>() {
+                    test_helpers::test_binary_elementwise(
+                        &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+                        &$scalar_fn,
+                        &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
+                    );
+                }
+
+                fn assign<const LANES: usize>() {
+                    test_helpers::test_binary_elementwise(
+                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
+                        &$scalar_fn,
+                        &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
+                    )
+                }
+            }
+        }
+    };
+    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $check_fn:expr } => {
+        impl_binary_checked_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn, $check_fn }
+    };
+}
+
+#[macro_export]
+macro_rules! impl_common_integer_tests {
+    { $vector:ident, $scalar:ident } => {
+        test_helpers::test_lanes! {
+            fn reduce_sum<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).reduce_sum(),
+                        x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn reduce_product<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).reduce_product(),
+                        x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn reduce_and<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).reduce_and(),
+                        x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn reduce_or<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).reduce_or(),
+                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn reduce_xor<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).reduce_xor(),
+                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn reduce_max<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).reduce_max(),
+                        x.iter().copied().max().unwrap(),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn reduce_min<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).reduce_min(),
+                        x.iter().copied().min().unwrap(),
+                    );
+                    Ok(())
+                });
+            }
+        }
+    }
+}
+
+/// Implement tests for signed integers.
+#[macro_export]
+macro_rules! impl_signed_tests {
+    { $scalar:tt } => {
+        mod $scalar {
+            use core_simd::simd::SimdInt;
+            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+            type Scalar = $scalar;
+
+            impl_common_integer_tests! { Vector, Scalar }
+
+            test_helpers::test_lanes! {
+                fn neg<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &<Vector::<LANES> as core::ops::Neg>::neg,
+                        &<Scalar as core::ops::Neg>::neg,
+                        &|x| !x.contains(&Scalar::MIN),
+                    );
+                }
+
+                fn is_positive<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_positive,
+                        &Scalar::is_positive,
+                        &|_| true,
+                    );
+                }
+
+                fn is_negative<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_negative,
+                        &Scalar::is_negative,
+                        &|_| true,
+                    );
+                }
+
+                fn signum<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::signum,
+                        &Scalar::signum,
+                        &|_| true,
+                    )
+                }
+
+                fn div_min_may_overflow<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(-1);
+                    assert_eq!(a / b, a);
+                }
+
+                fn rem_min_may_overflow<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(-1);
+                    assert_eq!(a % b, Vector::<LANES>::splat(0));
+                }
+
+                fn simd_min<const LANES: usize>() {
+                    use core_simd::simd::SimdOrd;
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(0);
+                    assert_eq!(a.simd_min(b), a);
+                    let a = Vector::<LANES>::splat(Scalar::MAX);
+                    let b = Vector::<LANES>::splat(0);
+                    assert_eq!(a.simd_min(b), b);
+                }
+
+                fn simd_max<const LANES: usize>() {
+                    use core_simd::simd::SimdOrd;
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(0);
+                    assert_eq!(a.simd_max(b), b);
+                    let a = Vector::<LANES>::splat(Scalar::MAX);
+                    let b = Vector::<LANES>::splat(0);
+                    assert_eq!(a.simd_max(b), a);
+                }
+
+                fn simd_clamp<const LANES: usize>() {
+                    use core_simd::simd::SimdOrd;
+                    let min = Vector::<LANES>::splat(Scalar::MIN);
+                    let max = Vector::<LANES>::splat(Scalar::MAX);
+                    let zero = Vector::<LANES>::splat(0);
+                    let one = Vector::<LANES>::splat(1);
+                    let negone = Vector::<LANES>::splat(-1);
+                    assert_eq!(zero.simd_clamp(min, max), zero);
+                    assert_eq!(zero.simd_clamp(min, one), zero);
+                    assert_eq!(zero.simd_clamp(one, max), one);
+                    assert_eq!(zero.simd_clamp(min, negone), negone);
+                }
+            }
+
+            test_helpers::test_lanes_panic! {
+                fn div_by_all_zeros_panics<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(42);
+                    let b = Vector::<LANES>::splat(0);
+                    let _ = a / b;
+                }
+
+                fn div_by_one_zero_panics<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(42);
+                    let mut b = Vector::<LANES>::splat(21);
+                    b[0] = 0 as _;
+                    let _ = a / b;
+                }
+
+                fn rem_zero_panic<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(42);
+                    let b = Vector::<LANES>::splat(0);
+                    let _ = a % b;
+                }
+            }
+
+            test_helpers::test_lanes! {
+                fn div_neg_one_no_panic<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(42);
+                    let b = Vector::<LANES>::splat(-1);
+                    let _ = a / b;
+                }
+
+                fn rem_neg_one_no_panic<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(42);
+                    let b = Vector::<LANES>::splat(-1);
+                    let _ = a % b;
+                }
+            }
+
+            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
+            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
+            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
+
+            // Exclude Div and Rem panicking cases
+            impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
+            impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
+
+            impl_unary_op_test!(Scalar, Not::not);
+            impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
+            impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
+            impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
+        }
+    }
+}
+
+/// Implement tests for unsigned integers.
+#[macro_export]
+macro_rules! impl_unsigned_tests {
+    { $scalar:tt } => {
+        mod $scalar {
+            use core_simd::simd::SimdUint;
+            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+            type Scalar = $scalar;
+
+            impl_common_integer_tests! { Vector, Scalar }
+
+            test_helpers::test_lanes_panic! {
+                fn rem_zero_panic<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(42);
+                    let b = Vector::<LANES>::splat(0);
+                    let _ = a % b;
+                }
+            }
+
+            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
+            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
+            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
+
+            // Exclude Div and Rem panicking cases
+            impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |_, y| y != 0);
+            impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |_, y| y != 0);
+
+            impl_unary_op_test!(Scalar, Not::not);
+            impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
+            impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
+            impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
+        }
+    }
+}
+
+/// Implement tests for floating point numbers.
+#[macro_export]
+macro_rules! impl_float_tests {
+    { $scalar:tt, $int_scalar:tt } => {
+        mod $scalar {
+            use core_simd::SimdFloat;
+            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+            type Scalar = $scalar;
+
+            impl_unary_op_test!(Scalar, Neg::neg);
+            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign);
+            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign);
+            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign);
+            impl_binary_op_test!(Scalar, Div::div, DivAssign::div_assign);
+            impl_binary_op_test!(Scalar, Rem::rem, RemAssign::rem_assign);
+
+            test_helpers::test_lanes! {
+                fn is_sign_positive<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_sign_positive,
+                        &Scalar::is_sign_positive,
+                        &|_| true,
+                    );
+                }
+
+                fn is_sign_negative<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_sign_negative,
+                        &Scalar::is_sign_negative,
+                        &|_| true,
+                    );
+                }
+
+                fn is_finite<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_finite,
+                        &Scalar::is_finite,
+                        &|_| true,
+                    );
+                }
+
+                fn is_infinite<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_infinite,
+                        &Scalar::is_infinite,
+                        &|_| true,
+                    );
+                }
+
+                fn is_nan<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_nan,
+                        &Scalar::is_nan,
+                        &|_| true,
+                    );
+                }
+
+                fn is_normal<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_normal,
+                        &Scalar::is_normal,
+                        &|_| true,
+                    );
+                }
+
+                fn is_subnormal<const LANES: usize>() {
+                    test_helpers::test_unary_mask_elementwise(
+                        &Vector::<LANES>::is_subnormal,
+                        &Scalar::is_subnormal,
+                        &|_| true,
+                    );
+                }
+
+                fn abs<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::abs,
+                        &Scalar::abs,
+                        &|_| true,
+                    )
+                }
+
+                fn recip<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::recip,
+                        &Scalar::recip,
+                        &|_| true,
+                    )
+                }
+
+                fn to_degrees<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::to_degrees,
+                        &Scalar::to_degrees,
+                        &|_| true,
+                    )
+                }
+
+                fn to_radians<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::to_radians,
+                        &Scalar::to_radians,
+                        &|_| true,
+                    )
+                }
+
+                fn signum<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::signum,
+                        &Scalar::signum,
+                        &|_| true,
+                    )
+                }
+
+                fn copysign<const LANES: usize>() {
+                    test_helpers::test_binary_elementwise(
+                        &Vector::<LANES>::copysign,
+                        &Scalar::copysign,
+                        &|_, _| true,
+                    )
+                }
+
+                fn simd_min<const LANES: usize>() {
+                    // Regular conditions (both values aren't zero)
+                    test_helpers::test_binary_elementwise(
+                        &Vector::<LANES>::simd_min,
+                        &Scalar::min,
+                        // Reject the case where both values are zero with different signs
+                        &|a, b| {
+                            for (a, b) in a.iter().zip(b.iter()) {
+                                if *a == 0. && *b == 0. && a.signum() != b.signum() {
+                                    return false;
+                                }
+                            }
+                            true
+                        }
+                    );
+
+                    // Special case where both values are zero
+                    let p_zero = Vector::<LANES>::splat(0.);
+                    let n_zero = Vector::<LANES>::splat(-0.);
+                    assert!(p_zero.simd_min(n_zero).to_array().iter().all(|x| *x == 0.));
+                    assert!(n_zero.simd_min(p_zero).to_array().iter().all(|x| *x == 0.));
+                }
+
+                fn simd_max<const LANES: usize>() {
+                    // Regular conditions (both values aren't zero)
+                    test_helpers::test_binary_elementwise(
+                        &Vector::<LANES>::simd_max,
+                        &Scalar::max,
+                        // Reject the case where both values are zero with different signs
+                        &|a, b| {
+                            for (a, b) in a.iter().zip(b.iter()) {
+                                if *a == 0. && *b == 0. && a.signum() != b.signum() {
+                                    return false;
+                                }
+                            }
+                            true
+                        }
+                    );
+
+                    // Special case where both values are zero
+                    let p_zero = Vector::<LANES>::splat(0.);
+                    let n_zero = Vector::<LANES>::splat(-0.);
+                    assert!(p_zero.simd_max(n_zero).to_array().iter().all(|x| *x == 0.));
+                    assert!(n_zero.simd_max(p_zero).to_array().iter().all(|x| *x == 0.));
+                }
+
+                fn simd_clamp<const LANES: usize>() {
+                    test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
+                        for (min, max) in min.iter_mut().zip(max.iter_mut()) {
+                            if max < min {
+                                core::mem::swap(min, max);
+                            }
+                            if min.is_nan() {
+                                *min = Scalar::NEG_INFINITY;
+                            }
+                            if max.is_nan() {
+                                *max = Scalar::INFINITY;
+                            }
+                        }
+
+                        let mut result_scalar = [Scalar::default(); LANES];
+                        for i in 0..LANES {
+                            result_scalar[i] = value[i].clamp(min[i], max[i]);
+                        }
+                        let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
+                        test_helpers::prop_assert_biteq!(result_scalar, result_vector);
+                        Ok(())
+                    })
+                }
+
+                fn reduce_sum<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        test_helpers::prop_assert_biteq! (
+                            Vector::<LANES>::from_array(x).reduce_sum(),
+                            x.iter().sum(),
+                        );
+                        Ok(())
+                    });
+                }
+
+                fn reduce_product<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        test_helpers::prop_assert_biteq! (
+                            Vector::<LANES>::from_array(x).reduce_product(),
+                            x.iter().product(),
+                        );
+                        Ok(())
+                    });
+                }
+
+                fn reduce_max<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        let vmax = Vector::<LANES>::from_array(x).reduce_max();
+                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
+                        // 0 and -0 are treated the same
+                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+                            test_helpers::prop_assert_biteq!(vmax, smax);
+                        }
+                        Ok(())
+                    });
+                }
+
+                fn reduce_min<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        let vmax = Vector::<LANES>::from_array(x).reduce_min();
+                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
+                        // 0 and -0 are treated the same
+                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+                            test_helpers::prop_assert_biteq!(vmax, smax);
+                        }
+                        Ok(())
+                    });
+                }
+            }
+
+            #[cfg(feature = "std")]
+            mod std {
+                use std_float::StdFloat;
+
+                use super::*;
+                test_helpers::test_lanes! {
+                    fn sqrt<const LANES: usize>() {
+                        test_helpers::test_unary_elementwise(
+                            &Vector::<LANES>::sqrt,
+                            &Scalar::sqrt,
+                            &|_| true,
+                        )
+                    }
+
+                    fn mul_add<const LANES: usize>() {
+                        test_helpers::test_ternary_elementwise(
+                            &Vector::<LANES>::mul_add,
+                            &Scalar::mul_add,
+                            &|_, _, _| true,
+                        )
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/library/portable-simd/crates/core_simd/tests/round.rs b/library/portable-simd/crates/core_simd/tests/round.rs
new file mode 100644
index 000000000..484fd5bf4
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/round.rs
@@ -0,0 +1,85 @@
+#![feature(portable_simd)]
+
+macro_rules! float_rounding_test {
+    { $scalar:tt, $int_scalar:tt } => {
+        mod $scalar {
+            use std_float::StdFloat;
+
+            type Vector<const LANES: usize> = core_simd::Simd<$scalar, LANES>;
+            type Scalar = $scalar;
+            type IntScalar = $int_scalar;
+
+            test_helpers::test_lanes! {
+                fn ceil<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::ceil,
+                        &Scalar::ceil,
+                        &|_| true,
+                    )
+                }
+
+                fn floor<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::floor,
+                        &Scalar::floor,
+                        &|_| true,
+                    )
+                }
+
+                fn round<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::round,
+                        &Scalar::round,
+                        &|_| true,
+                    )
+                }
+
+                fn trunc<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::trunc,
+                        &Scalar::trunc,
+                        &|_| true,
+                    )
+                }
+
+                fn fract<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::fract,
+                        &Scalar::fract,
+                        &|_| true,
+                    )
+                }
+            }
+
+            test_helpers::test_lanes! {
+                fn to_int_unchecked<const LANES: usize>() {
+                    // The maximum integer that can be represented by the equivalently sized float has
+                    // all of the mantissa digits set to 1, pushed up to the MSB.
+                    const ALL_MANTISSA_BITS: IntScalar = ((1 << <Scalar>::MANTISSA_DIGITS) - 1);
+                    const MAX_REPRESENTABLE_VALUE: Scalar =
+                        (ALL_MANTISSA_BITS << (core::mem::size_of::<Scalar>() * 8 - <Scalar>::MANTISSA_DIGITS as usize - 1)) as Scalar;
+
+                    let mut runner = test_helpers::make_runner();
+                    runner.run(
+                        &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
+                        |x| {
+                            let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
+                            let result_2 = {
+                                let mut result: [IntScalar; LANES] = [0; LANES];
+                                for (i, o) in x.iter().zip(result.iter_mut()) {
+                                    *o = unsafe { i.to_int_unchecked::<IntScalar>() };
+                                }
+                                result
+                            };
+                            test_helpers::prop_assert_biteq!(result_1, result_2);
+                            Ok(())
+                        },
+                    ).unwrap();
+                }
+            }
+        }
+    }
+}
+
+float_rounding_test! { f32, i32 }
+float_rounding_test! { f64, i64 }
diff --git a/library/portable-simd/crates/core_simd/tests/swizzle.rs b/library/portable-simd/crates/core_simd/tests/swizzle.rs
new file mode 100644
index 000000000..51c63611a
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/swizzle.rs
@@ -0,0 +1,62 @@
+#![feature(portable_simd)]
+use core_simd::{Simd, Swizzle};
+
+#[cfg(target_arch = "wasm32")]
+use wasm_bindgen_test::*;
+
+#[cfg(target_arch = "wasm32")]
+wasm_bindgen_test_configure!(run_in_browser);
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn swizzle() {
+    struct Index;
+    impl Swizzle<4, 4> for Index {
+        const INDEX: [usize; 4] = [2, 1, 3, 0];
+    }
+    impl Swizzle<4, 2> for Index {
+        const INDEX: [usize; 2] = [1, 1];
+    }
+
+    let vector = Simd::from_array([2, 4, 1, 9]);
+    assert_eq!(Index::swizzle(vector).to_array(), [1, 4, 9, 2]);
+    assert_eq!(Index::swizzle(vector).to_array(), [4, 4]);
+}
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn reverse() {
+    let a = Simd::from_array([1, 2, 3, 4]);
+    assert_eq!(a.reverse().to_array(), [4, 3, 2, 1]);
+}
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn rotate() {
+    let a = Simd::from_array([1, 2, 3, 4]);
+    assert_eq!(a.rotate_lanes_left::<0>().to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.rotate_lanes_left::<1>().to_array(), [2, 3, 4, 1]);
+    assert_eq!(a.rotate_lanes_left::<2>().to_array(), [3, 4, 1, 2]);
+    assert_eq!(a.rotate_lanes_left::<3>().to_array(), [4, 1, 2, 3]);
+    assert_eq!(a.rotate_lanes_left::<4>().to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.rotate_lanes_left::<5>().to_array(), [2, 3, 4, 1]);
+    assert_eq!(a.rotate_lanes_right::<0>().to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.rotate_lanes_right::<1>().to_array(), [4, 1, 2, 3]);
+    assert_eq!(a.rotate_lanes_right::<2>().to_array(), [3, 4, 1, 2]);
+    assert_eq!(a.rotate_lanes_right::<3>().to_array(), [2, 3, 4, 1]);
+    assert_eq!(a.rotate_lanes_right::<4>().to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.rotate_lanes_right::<5>().to_array(), [4, 1, 2, 3]);
+}
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn interleave() {
+    let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
+    let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
+    let (lo, hi) = a.interleave(b);
+    assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]);
+    assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]);
+    let (even, odd) = lo.deinterleave(hi);
+    assert_eq!(even, a);
+    assert_eq!(odd, b);
+}
diff --git a/library/portable-simd/crates/core_simd/tests/to_bytes.rs b/library/portable-simd/crates/core_simd/tests/to_bytes.rs
new file mode 100644
index 000000000..debb4335e
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/to_bytes.rs
@@ -0,0 +1,14 @@
+#![feature(portable_simd, generic_const_exprs, adt_const_params)]
+#![allow(incomplete_features)]
+#![cfg(feature = "generic_const_exprs")]
+
+use core_simd::Simd;
+
+#[test]
+fn byte_convert() {
+    let int = Simd::<u32, 2>::from_array([0xdeadbeef, 0x8badf00d]);
+    let bytes = int.to_ne_bytes();
+    assert_eq!(int[0].to_ne_bytes(), bytes[..4]);
+    assert_eq!(int[1].to_ne_bytes(), bytes[4..]);
+    assert_eq!(Simd::<u32, 2>::from_ne_bytes(bytes), int);
+}
diff --git a/library/portable-simd/crates/core_simd/tests/u16_ops.rs b/library/portable-simd/crates/core_simd/tests/u16_ops.rs
new file mode 100644
index 000000000..9ae3bd6a4
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/u16_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_unsigned_tests! { u16 }
diff --git a/library/portable-simd/crates/core_simd/tests/u32_ops.rs b/library/portable-simd/crates/core_simd/tests/u32_ops.rs
new file mode 100644
index 000000000..de34b73d6
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/u32_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_unsigned_tests! { u32 }
diff --git a/library/portable-simd/crates/core_simd/tests/u64_ops.rs b/library/portable-simd/crates/core_simd/tests/u64_ops.rs
new file mode 100644
index 000000000..8ee5a318c
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/u64_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_unsigned_tests! { u64 }
diff --git a/library/portable-simd/crates/core_simd/tests/u8_ops.rs b/library/portable-simd/crates/core_simd/tests/u8_ops.rs
new file mode 100644
index 000000000..6d7211121
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/u8_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_unsigned_tests! { u8 }
diff --git a/library/portable-simd/crates/core_simd/tests/usize_ops.rs b/library/portable-simd/crates/core_simd/tests/usize_ops.rs
new file mode 100644
index 000000000..9c7b1687a
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/usize_ops.rs
@@ -0,0 +1,5 @@
+#![feature(portable_simd)]
+
+#[macro_use]
+mod ops_macros;
+impl_unsigned_tests! { usize }
diff --git a/library/portable-simd/crates/core_simd/webdriver.json b/library/portable-simd/crates/core_simd/webdriver.json
new file mode 100644
index 000000000..f1d5734f1
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/webdriver.json
@@ -0,0 +1,7 @@
+{
+    "goog:chromeOptions": {
+        "args": [
+            "--enable-features=WebAssemblySimd"
+        ]
+    }
+}