diff options
Diffstat (limited to 'library/portable-simd/crates')
75 files changed, 7971 insertions, 0 deletions
diff --git a/library/portable-simd/crates/core_simd/Cargo.toml b/library/portable-simd/crates/core_simd/Cargo.toml new file mode 100644 index 000000000..8a29cf156 --- /dev/null +++ b/library/portable-simd/crates/core_simd/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "core_simd" +version = "0.1.0" +edition = "2021" +homepage = "https://github.com/rust-lang/portable-simd" +repository = "https://github.com/rust-lang/portable-simd" +keywords = ["core", "simd", "intrinsics"] +categories = ["hardware-support", "no-std"] +license = "MIT OR Apache-2.0" + +[features] +default = ["as_crate"] +as_crate = [] +std = [] +generic_const_exprs = [] + +[target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen] +version = "0.2" + +[dev-dependencies.wasm-bindgen-test] +version = "0.3" + +[dev-dependencies.proptest] +version = "0.10" +default-features = false +features = ["alloc"] + +[dev-dependencies.test_helpers] +path = "../test_helpers" + +[dev-dependencies] +std_float = { path = "../std_float/", features = ["as_crate"] } diff --git a/library/portable-simd/crates/core_simd/LICENSE-APACHE b/library/portable-simd/crates/core_simd/LICENSE-APACHE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/library/portable-simd/crates/core_simd/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/library/portable-simd/crates/core_simd/LICENSE-MIT b/library/portable-simd/crates/core_simd/LICENSE-MIT new file mode 100644 index 000000000..0e9d2f43a --- /dev/null +++ b/library/portable-simd/crates/core_simd/LICENSE-MIT @@ -0,0 +1,19 @@ +Copyright (c) 2020 The Rust Project Developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs new file mode 100644 index 000000000..39f530f68 --- /dev/null +++ b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs @@ -0,0 +1,316 @@ +//! 4x4 matrix inverse +// Code ported from the `packed_simd` crate +// Run this code with `cargo test --example matrix_inversion` +#![feature(array_chunks, portable_simd)] +use core_simd::simd::*; +use Which::*; + +// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^) +#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)] +pub struct Matrix4x4([[f32; 4]; 4]); + +#[allow(clippy::too_many_lines)] +pub fn scalar_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> { + let m = m.0; + + #[rustfmt::skip] + let mut inv = [ + // row 0: + [ + // 0,0: + m[1][1] * m[2][2] * m[3][3] - + m[1][1] * m[2][3] * m[3][2] - + m[2][1] * m[1][2] * m[3][3] + + m[2][1] * m[1][3] * m[3][2] + + m[3][1] * m[1][2] * m[2][3] - + m[3][1] * m[1][3] * m[2][2], + // 0,1: + -m[0][1] * m[2][2] * m[3][3] + + m[0][1] * m[2][3] * m[3][2] + + m[2][1] * m[0][2] * m[3][3] - + m[2][1] * m[0][3] * m[3][2] - + m[3][1] * m[0][2] * m[2][3] + + m[3][1] * m[0][3] * m[2][2], + // 0,2: + m[0][1] * m[1][2] * m[3][3] - + m[0][1] * m[1][3] * m[3][2] - + m[1][1] * m[0][2] * m[3][3] + + m[1][1] * m[0][3] * m[3][2] + + m[3][1] * m[0][2] * m[1][3] - + m[3][1] * m[0][3] * m[1][2], + // 0,3: + -m[0][1] * m[1][2] * m[2][3] + + m[0][1] * m[1][3] * m[2][2] + + m[1][1] * m[0][2] * m[2][3] - + m[1][1] * m[0][3] * m[2][2] - + m[2][1] * m[0][2] * m[1][3] + + m[2][1] * m[0][3] * m[1][2], + ], + // row 1 + [ + // 1,0: + -m[1][0] * m[2][2] * m[3][3] + + m[1][0] * m[2][3] * m[3][2] + + m[2][0] * m[1][2] * m[3][3] - + m[2][0] * m[1][3] * m[3][2] - + m[3][0] * m[1][2] * m[2][3] + + m[3][0] * m[1][3] * m[2][2], + // 1,1: + m[0][0] * m[2][2] * m[3][3] - + m[0][0] * m[2][3] * m[3][2] - + m[2][0] * m[0][2] * m[3][3] + + m[2][0] * m[0][3] * m[3][2] + + m[3][0] * m[0][2] * m[2][3] - + m[3][0] * m[0][3] * m[2][2], + // 1,2: + -m[0][0] * m[1][2] * m[3][3] + + m[0][0] * m[1][3] * m[3][2] + + m[1][0] * m[0][2] * m[3][3] - + m[1][0] * m[0][3] * m[3][2] - + m[3][0] * m[0][2] * m[1][3] + + m[3][0] * m[0][3] * m[1][2], + // 1,3: + m[0][0] * m[1][2] * m[2][3] - + m[0][0] * m[1][3] * m[2][2] - + m[1][0] * m[0][2] * m[2][3] + + m[1][0] * m[0][3] * m[2][2] + + m[2][0] * m[0][2] * m[1][3] - + m[2][0] * m[0][3] * m[1][2], + ], + // row 2 + [ + // 2,0: + m[1][0] * m[2][1] * m[3][3] - + m[1][0] * m[2][3] * m[3][1] - + m[2][0] * m[1][1] * m[3][3] + + m[2][0] * m[1][3] * m[3][1] + + m[3][0] * m[1][1] * m[2][3] - + m[3][0] * m[1][3] * m[2][1], + // 2,1: + -m[0][0] * m[2][1] * m[3][3] + + m[0][0] * m[2][3] * m[3][1] + + m[2][0] * m[0][1] * m[3][3] - + m[2][0] * m[0][3] * m[3][1] - + m[3][0] * m[0][1] * m[2][3] + + m[3][0] * m[0][3] * m[2][1], + // 2,2: + m[0][0] * m[1][1] * m[3][3] - + m[0][0] * m[1][3] * m[3][1] - + m[1][0] * m[0][1] * m[3][3] + + m[1][0] * m[0][3] * m[3][1] + + m[3][0] * m[0][1] * m[1][3] - + m[3][0] * m[0][3] * m[1][1], + // 2,3: + -m[0][0] * m[1][1] * m[2][3] + + m[0][0] * m[1][3] * m[2][1] + + m[1][0] * m[0][1] * m[2][3] - + m[1][0] * m[0][3] * m[2][1] - + m[2][0] * m[0][1] * m[1][3] + + m[2][0] * m[0][3] * m[1][1], + ], + // row 3 + [ + // 3,0: + -m[1][0] * m[2][1] * m[3][2] + + m[1][0] * m[2][2] * m[3][1] + + m[2][0] * m[1][1] * m[3][2] - + m[2][0] * m[1][2] * m[3][1] - + m[3][0] * m[1][1] * m[2][2] + + m[3][0] * m[1][2] * m[2][1], + // 3,1: + m[0][0] * m[2][1] * m[3][2] - + m[0][0] * m[2][2] * m[3][1] - + m[2][0] * m[0][1] * m[3][2] + + m[2][0] * m[0][2] * m[3][1] + + m[3][0] * m[0][1] * m[2][2] - + m[3][0] * m[0][2] * m[2][1], + // 3,2: + -m[0][0] * m[1][1] * m[3][2] + + m[0][0] * m[1][2] * m[3][1] + + m[1][0] * m[0][1] * m[3][2] - + m[1][0] * m[0][2] * m[3][1] - + m[3][0] * m[0][1] * m[1][2] + + m[3][0] * m[0][2] * m[1][1], + // 3,3: + m[0][0] * m[1][1] * m[2][2] - + m[0][0] * m[1][2] * m[2][1] - + m[1][0] * m[0][1] * m[2][2] + + m[1][0] * m[0][2] * m[2][1] + + m[2][0] * m[0][1] * m[1][2] - + m[2][0] * m[0][2] * m[1][1], + ], + ]; + + let det = m[0][0] * inv[0][0] + m[0][1] * inv[1][0] + m[0][2] * inv[2][0] + m[0][3] * inv[3][0]; + if det == 0. { + return None; + } + + let det_inv = 1. / det; + + for row in &mut inv { + for elem in row.iter_mut() { + *elem *= det_inv; + } + } + + Some(Matrix4x4(inv)) +} + +pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> { + let m = m.0; + let m_0 = f32x4::from_array(m[0]); + let m_1 = f32x4::from_array(m[1]); + let m_2 = f32x4::from_array(m[2]); + let m_3 = f32x4::from_array(m[3]); + + const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)]; + const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)]; + const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)]; + const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)]; + + let tmp = simd_swizzle!(m_0, m_1, SHUFFLE01); + let row1 = simd_swizzle!(m_2, m_3, SHUFFLE01); + + let row0 = simd_swizzle!(tmp, row1, SHUFFLE02); + let row1 = simd_swizzle!(row1, tmp, SHUFFLE13); + + let tmp = simd_swizzle!(m_0, m_1, SHUFFLE23); + let row3 = simd_swizzle!(m_2, m_3, SHUFFLE23); + let row2 = simd_swizzle!(tmp, row3, SHUFFLE02); + let row3 = simd_swizzle!(row3, tmp, SHUFFLE13); + + let tmp = (row2 * row3).reverse().rotate_lanes_right::<2>(); + let minor0 = row1 * tmp; + let minor1 = row0 * tmp; + let tmp = tmp.rotate_lanes_right::<2>(); + let minor0 = (row1 * tmp) - minor0; + let minor1 = (row0 * tmp) - minor1; + let minor1 = minor1.rotate_lanes_right::<2>(); + + let tmp = (row1 * row2).reverse().rotate_lanes_right::<2>(); + let minor0 = (row3 * tmp) + minor0; + let minor3 = row0 * tmp; + let tmp = tmp.rotate_lanes_right::<2>(); + + let minor0 = minor0 - row3 * tmp; + let minor3 = row0 * tmp - minor3; + let minor3 = minor3.rotate_lanes_right::<2>(); + + let tmp = (row3 * row1.rotate_lanes_right::<2>()) + .reverse() + .rotate_lanes_right::<2>(); + let row2 = row2.rotate_lanes_right::<2>(); + let minor0 = row2 * tmp + minor0; + let minor2 = row0 * tmp; + let tmp = tmp.rotate_lanes_right::<2>(); + let minor0 = minor0 - row2 * tmp; + let minor2 = row0 * tmp - minor2; + let minor2 = minor2.rotate_lanes_right::<2>(); + + let tmp = (row0 * row1).reverse().rotate_lanes_right::<2>(); + let minor2 = minor2 + row3 * tmp; + let minor3 = row2 * tmp - minor3; + let tmp = tmp.rotate_lanes_right::<2>(); + let minor2 = row3 * tmp - minor2; + let minor3 = minor3 - row2 * tmp; + + let tmp = (row0 * row3).reverse().rotate_lanes_right::<2>(); + let minor1 = minor1 - row2 * tmp; + let minor2 = row1 * tmp + minor2; + let tmp = tmp.rotate_lanes_right::<2>(); + let minor1 = row2 * tmp + minor1; + let minor2 = minor2 - row1 * tmp; + + let tmp = (row0 * row2).reverse().rotate_lanes_right::<2>(); + let minor1 = row3 * tmp + minor1; + let minor3 = minor3 - row1 * tmp; + let tmp = tmp.rotate_lanes_right::<2>(); + let minor1 = minor1 - row3 * tmp; + let minor3 = row1 * tmp + minor3; + + let det = row0 * minor0; + let det = det.rotate_lanes_right::<2>() + det; + let det = det.reverse().rotate_lanes_right::<2>() + det; + + if det.reduce_sum() == 0. { + return None; + } + // calculate the reciprocal + let tmp = f32x4::splat(1.0) / det; + let det = tmp + tmp - det * tmp * tmp; + + let res0 = minor0 * det; + let res1 = minor1 * det; + let res2 = minor2 * det; + let res3 = minor3 * det; + + let mut m = m; + + m[0] = res0.to_array(); + m[1] = res1.to_array(); + m[2] = res2.to_array(); + m[3] = res3.to_array(); + + Some(Matrix4x4(m)) +} + +#[cfg(test)] +#[rustfmt::skip] +mod tests { + use super::*; + + #[test] + fn test() { + let tests: &[(Matrix4x4, Option<Matrix4x4>)] = &[ + // Identity: + (Matrix4x4([ + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.], + ]), + Some(Matrix4x4([ + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.], + ])) + ), + // None: + (Matrix4x4([ + [1., 2., 3., 4.], + [12., 11., 10., 9.], + [5., 6., 7., 8.], + [16., 15., 14., 13.], + ]), + None + ), + // Other: + (Matrix4x4([ + [1., 1., 1., 0.], + [0., 3., 1., 2.], + [2., 3., 1., 0.], + [1., 0., 2., 1.], + ]), + Some(Matrix4x4([ + [-3., -0.5, 1.5, 1.0], + [ 1., 0.25, -0.25, -0.5], + [ 3., 0.25, -1.25, -0.5], + [-3., 0.0, 1.0, 1.0], + ])) + ), + + + ]; + + for &(input, output) in tests { + assert_eq!(scalar_inv4x4(input), output); + assert_eq!(simd_inv4x4(input), output); + } + } +} + +fn main() { + // Empty main to make cargo happy +} diff --git a/library/portable-simd/crates/core_simd/examples/nbody.rs b/library/portable-simd/crates/core_simd/examples/nbody.rs new file mode 100644 index 000000000..df38a0096 --- /dev/null +++ b/library/portable-simd/crates/core_simd/examples/nbody.rs @@ -0,0 +1,193 @@ +#![feature(portable_simd)] +extern crate std_float; + +/// Benchmarks game nbody code +/// Taken from the `packed_simd` crate +/// Run this benchmark with `cargo test --example nbody` +mod nbody { + use core_simd::simd::*; + #[allow(unused)] // False positive? + use std_float::StdFloat; + + use std::f64::consts::PI; + const SOLAR_MASS: f64 = 4.0 * PI * PI; + const DAYS_PER_YEAR: f64 = 365.24; + + #[derive(Debug, Clone, Copy)] + struct Body { + pub x: f64x4, + pub v: f64x4, + pub mass: f64, + } + + const N_BODIES: usize = 5; + const BODIES: [Body; N_BODIES] = [ + // sun: + Body { + x: f64x4::from_array([0., 0., 0., 0.]), + v: f64x4::from_array([0., 0., 0., 0.]), + mass: SOLAR_MASS, + }, + // jupiter: + Body { + x: f64x4::from_array([ + 4.84143144246472090e+00, + -1.16032004402742839e+00, + -1.03622044471123109e-01, + 0., + ]), + v: f64x4::from_array([ + 1.66007664274403694e-03 * DAYS_PER_YEAR, + 7.69901118419740425e-03 * DAYS_PER_YEAR, + -6.90460016972063023e-05 * DAYS_PER_YEAR, + 0., + ]), + mass: 9.54791938424326609e-04 * SOLAR_MASS, + }, + // saturn: + Body { + x: f64x4::from_array([ + 8.34336671824457987e+00, + 4.12479856412430479e+00, + -4.03523417114321381e-01, + 0., + ]), + v: f64x4::from_array([ + -2.76742510726862411e-03 * DAYS_PER_YEAR, + 4.99852801234917238e-03 * DAYS_PER_YEAR, + 2.30417297573763929e-05 * DAYS_PER_YEAR, + 0., + ]), + mass: 2.85885980666130812e-04 * SOLAR_MASS, + }, + // uranus: + Body { + x: f64x4::from_array([ + 1.28943695621391310e+01, + -1.51111514016986312e+01, + -2.23307578892655734e-01, + 0., + ]), + v: f64x4::from_array([ + 2.96460137564761618e-03 * DAYS_PER_YEAR, + 2.37847173959480950e-03 * DAYS_PER_YEAR, + -2.96589568540237556e-05 * DAYS_PER_YEAR, + 0., + ]), + mass: 4.36624404335156298e-05 * SOLAR_MASS, + }, + // neptune: + Body { + x: f64x4::from_array([ + 1.53796971148509165e+01, + -2.59193146099879641e+01, + 1.79258772950371181e-01, + 0., + ]), + v: f64x4::from_array([ + 2.68067772490389322e-03 * DAYS_PER_YEAR, + 1.62824170038242295e-03 * DAYS_PER_YEAR, + -9.51592254519715870e-05 * DAYS_PER_YEAR, + 0., + ]), + mass: 5.15138902046611451e-05 * SOLAR_MASS, + }, + ]; + + fn offset_momentum(bodies: &mut [Body; N_BODIES]) { + let (sun, rest) = bodies.split_at_mut(1); + let sun = &mut sun[0]; + for body in rest { + let m_ratio = body.mass / SOLAR_MASS; + sun.v -= body.v * Simd::splat(m_ratio); + } + } + + fn energy(bodies: &[Body; N_BODIES]) -> f64 { + let mut e = 0.; + for i in 0..N_BODIES { + let bi = &bodies[i]; + e += bi.mass * (bi.v * bi.v).reduce_sum() * 0.5; + for bj in bodies.iter().take(N_BODIES).skip(i + 1) { + let dx = bi.x - bj.x; + e -= bi.mass * bj.mass / (dx * dx).reduce_sum().sqrt() + } + } + e + } + + fn advance(bodies: &mut [Body; N_BODIES], dt: f64) { + const N: usize = N_BODIES * (N_BODIES - 1) / 2; + + // compute distance between bodies: + let mut r = [f64x4::splat(0.); N]; + { + let mut i = 0; + for j in 0..N_BODIES { + for k in j + 1..N_BODIES { + r[i] = bodies[j].x - bodies[k].x; + i += 1; + } + } + } + + let mut mag = [0.0; N]; + for i in (0..N).step_by(2) { + let d2s = f64x2::from_array([ + (r[i] * r[i]).reduce_sum(), + (r[i + 1] * r[i + 1]).reduce_sum(), + ]); + let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt()); + mag[i] = dmags[0]; + mag[i + 1] = dmags[1]; + } + + let mut i = 0; + for j in 0..N_BODIES { + for k in j + 1..N_BODIES { + let f = r[i] * Simd::splat(mag[i]); + bodies[j].v -= f * Simd::splat(bodies[k].mass); + bodies[k].v += f * Simd::splat(bodies[j].mass); + i += 1 + } + } + for body in bodies { + body.x += Simd::splat(dt) * body.v + } + } + + pub fn run(n: usize) -> (f64, f64) { + let mut bodies = BODIES; + offset_momentum(&mut bodies); + let energy_before = energy(&bodies); + for _ in 0..n { + advance(&mut bodies, 0.01); + } + let energy_after = energy(&bodies); + + (energy_before, energy_after) + } +} + +#[cfg(test)] +mod tests { + // Good enough for demonstration purposes, not going for strictness here. + fn approx_eq_f64(a: f64, b: f64) -> bool { + (a - b).abs() < 0.00001 + } + #[test] + fn test() { + const OUTPUT: [f64; 2] = [-0.169075164, -0.169087605]; + let (energy_before, energy_after) = super::nbody::run(1000); + assert!(approx_eq_f64(energy_before, OUTPUT[0])); + assert!(approx_eq_f64(energy_after, OUTPUT[1])); + } +} + +fn main() { + { + let (energy_before, energy_after) = nbody::run(1000); + println!("Energy before: {energy_before}"); + println!("Energy after: {energy_after}"); + } +} diff --git a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs new file mode 100644 index 000000000..012182e09 --- /dev/null +++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs @@ -0,0 +1,77 @@ +#![feature(portable_simd)] + +use core_simd::simd::*; + +fn a(i: usize, j: usize) -> f64 { + ((i + j) * (i + j + 1) / 2 + i + 1) as f64 +} + +fn mult_av(v: &[f64], out: &mut [f64]) { + assert!(v.len() == out.len()); + assert!(v.len() % 2 == 0); + + for (i, out) in out.iter_mut().enumerate() { + let mut sum = f64x2::splat(0.0); + + let mut j = 0; + while j < v.len() { + let b = f64x2::from_slice(&v[j..]); + let a = f64x2::from_array([a(i, j), a(i, j + 1)]); + sum += b / a; + j += 2 + } + *out = sum.reduce_sum(); + } +} + +fn mult_atv(v: &[f64], out: &mut [f64]) { + assert!(v.len() == out.len()); + assert!(v.len() % 2 == 0); + + for (i, out) in out.iter_mut().enumerate() { + let mut sum = f64x2::splat(0.0); + + let mut j = 0; + while j < v.len() { + let b = f64x2::from_slice(&v[j..]); + let a = f64x2::from_array([a(j, i), a(j + 1, i)]); + sum += b / a; + j += 2 + } + *out = sum.reduce_sum(); + } +} + +fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { + mult_av(v, tmp); + mult_atv(tmp, out); +} + +pub fn spectral_norm(n: usize) -> f64 { + assert!(n % 2 == 0, "only even lengths are accepted"); + + let mut u = vec![1.0; n]; + let mut v = u.clone(); + let mut tmp = u.clone(); + + for _ in 0..10 { + mult_atav(&u, &mut v, &mut tmp); + mult_atav(&v, &mut u, &mut tmp); + } + (dot(&u, &v) / dot(&v, &v)).sqrt() +} + +fn dot(x: &[f64], y: &[f64]) -> f64 { + // This is auto-vectorized: + x.iter().zip(y).map(|(&x, &y)| x * y).sum() +} + +#[cfg(test)] +#[test] +fn test() { + assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991"); +} + +fn main() { + // Empty main to make cargo happy +} diff --git a/library/portable-simd/crates/core_simd/src/core_simd_docs.md b/library/portable-simd/crates/core_simd/src/core_simd_docs.md new file mode 100644 index 000000000..15e8ed025 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/core_simd_docs.md @@ -0,0 +1,4 @@ +Portable SIMD module. + +This module offers a portable abstraction for SIMD operations +that is not bound to any particular hardware architecture. diff --git a/library/portable-simd/crates/core_simd/src/elements.rs b/library/portable-simd/crates/core_simd/src/elements.rs new file mode 100644 index 000000000..701eb66b2 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/elements.rs @@ -0,0 +1,11 @@ +mod float; +mod int; +mod uint; + +mod sealed { + pub trait Sealed {} +} + +pub use float::*; +pub use int::*; +pub use uint::*; diff --git a/library/portable-simd/crates/core_simd/src/elements/float.rs b/library/portable-simd/crates/core_simd/src/elements/float.rs new file mode 100644 index 000000000..d60223270 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/elements/float.rs @@ -0,0 +1,357 @@ +use super::sealed::Sealed; +use crate::simd::{ + intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialEq, SimdPartialOrd, + SupportedLaneCount, +}; + +/// Operations on SIMD vectors of floats. +pub trait SimdFloat: Copy + Sealed { + /// Mask type used for manipulating this SIMD vector type. + type Mask; + + /// Scalar type contained by this SIMD vector type. + type Scalar; + + /// Bit representation of this SIMD vector type. + type Bits; + + /// Raw transmutation to an unsigned integer vector type with the + /// same size and number of lanes. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn to_bits(self) -> Self::Bits; + + /// Raw transmutation from an unsigned integer vector type with the + /// same size and number of lanes. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn from_bits(bits: Self::Bits) -> Self; + + /// Produces a vector where every lane has the absolute value of the + /// equivalently-indexed lane in `self`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn abs(self) -> Self; + + /// Takes the reciprocal (inverse) of each lane, `1/x`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn recip(self) -> Self; + + /// Converts each lane from radians to degrees. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn to_degrees(self) -> Self; + + /// Converts each lane from degrees to radians. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn to_radians(self) -> Self; + + /// Returns true for each lane if it has a positive sign, including + /// `+0.0`, `NaN`s with positive sign bit and positive infinity. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_sign_positive(self) -> Self::Mask; + + /// Returns true for each lane if it has a negative sign, including + /// `-0.0`, `NaN`s with negative sign bit and negative infinity. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_sign_negative(self) -> Self::Mask; + + /// Returns true for each lane if its value is `NaN`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_nan(self) -> Self::Mask; + + /// Returns true for each lane if its value is positive infinity or negative infinity. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_infinite(self) -> Self::Mask; + + /// Returns true for each lane if its value is neither infinite nor `NaN`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_finite(self) -> Self::Mask; + + /// Returns true for each lane if its value is subnormal. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_subnormal(self) -> Self::Mask; + + /// Returns true for each lane if its value is neither zero, infinite, + /// subnormal, nor `NaN`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_normal(self) -> Self::Mask; + + /// Replaces each lane with a number that represents its sign. + /// + /// * `1.0` if the number is positive, `+0.0`, or `INFINITY` + /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY` + /// * `NAN` if the number is `NAN` + #[must_use = "method returns a new vector and does not mutate the original value"] + fn signum(self) -> Self; + + /// Returns each lane with the magnitude of `self` and the sign of `sign`. + /// + /// For any lane containing a `NAN`, a `NAN` with the sign of `sign` is returned. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn copysign(self, sign: Self) -> Self; + + /// Returns the minimum of each lane. + /// + /// If one of the values is `NAN`, then the other value is returned. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_min(self, other: Self) -> Self; + + /// Returns the maximum of each lane. + /// + /// If one of the values is `NAN`, then the other value is returned. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_max(self, other: Self) -> Self; + + /// Restrict each lane to a certain interval unless it is NaN. + /// + /// For each lane in `self`, returns the corresponding lane in `max` if the lane is + /// greater than `max`, and the corresponding lane in `min` if the lane is less + /// than `min`. Otherwise returns the lane in `self`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_clamp(self, min: Self, max: Self) -> Self; + + /// Returns the sum of the lanes of the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{f32x2, SimdFloat}; + /// let v = f32x2::from_array([1., 2.]); + /// assert_eq!(v.reduce_sum(), 3.); + /// ``` + fn reduce_sum(self) -> Self::Scalar; + + /// Reducing multiply. Returns the product of the lanes of the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{f32x2, SimdFloat}; + /// let v = f32x2::from_array([3., 4.]); + /// assert_eq!(v.reduce_product(), 12.); + /// ``` + fn reduce_product(self) -> Self::Scalar; + + /// Returns the maximum lane in the vector. + /// + /// Returns values based on equality, so a vector containing both `0.` and `-0.` may + /// return either. + /// + /// This function will not return `NaN` unless all lanes are `NaN`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{f32x2, SimdFloat}; + /// let v = f32x2::from_array([1., 2.]); + /// assert_eq!(v.reduce_max(), 2.); + /// + /// // NaN values are skipped... + /// let v = f32x2::from_array([1., f32::NAN]); + /// assert_eq!(v.reduce_max(), 1.); + /// + /// // ...unless all values are NaN + /// let v = f32x2::from_array([f32::NAN, f32::NAN]); + /// assert!(v.reduce_max().is_nan()); + /// ``` + fn reduce_max(self) -> Self::Scalar; + + /// Returns the minimum lane in the vector. + /// + /// Returns values based on equality, so a vector containing both `0.` and `-0.` may + /// return either. + /// + /// This function will not return `NaN` unless all lanes are `NaN`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{f32x2, SimdFloat}; + /// let v = f32x2::from_array([3., 7.]); + /// assert_eq!(v.reduce_min(), 3.); + /// + /// // NaN values are skipped... + /// let v = f32x2::from_array([1., f32::NAN]); + /// assert_eq!(v.reduce_min(), 1.); + /// + /// // ...unless all values are NaN + /// let v = f32x2::from_array([f32::NAN, f32::NAN]); + /// assert!(v.reduce_min().is_nan()); + /// ``` + fn reduce_min(self) -> Self::Scalar; +} + +macro_rules! impl_trait { + { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => { + $( + impl<const LANES: usize> Sealed for Simd<$ty, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + } + + impl<const LANES: usize> SimdFloat for Simd<$ty, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>; + type Scalar = $ty; + type Bits = Simd<$bits_ty, LANES>; + + #[inline] + fn to_bits(self) -> Simd<$bits_ty, LANES> { + assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>()); + // Safety: transmuting between vector types is safe + unsafe { core::mem::transmute_copy(&self) } + } + + #[inline] + fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self { + assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>()); + // Safety: transmuting between vector types is safe + unsafe { core::mem::transmute_copy(&bits) } + } + + #[inline] + fn abs(self) -> Self { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_fabs(self) } + } + + #[inline] + fn recip(self) -> Self { + Self::splat(1.0) / self + } + + #[inline] + fn to_degrees(self) -> Self { + // to_degrees uses a special constant for better precision, so extract that constant + self * Self::splat(Self::Scalar::to_degrees(1.)) + } + + #[inline] + fn to_radians(self) -> Self { + self * Self::splat(Self::Scalar::to_radians(1.)) + } + + #[inline] + fn is_sign_positive(self) -> Self::Mask { + !self.is_sign_negative() + } + + #[inline] + fn is_sign_negative(self) -> Self::Mask { + let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1); + sign_bits.simd_gt(Simd::splat(0)) + } + + #[inline] + fn is_nan(self) -> Self::Mask { + self.simd_ne(self) + } + + #[inline] + fn is_infinite(self) -> Self::Mask { + self.abs().simd_eq(Self::splat(Self::Scalar::INFINITY)) + } + + #[inline] + fn is_finite(self) -> Self::Mask { + self.abs().simd_lt(Self::splat(Self::Scalar::INFINITY)) + } + + #[inline] + fn is_subnormal(self) -> Self::Mask { + self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0)) + } + + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_normal(self) -> Self::Mask { + !(self.abs().simd_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite()) + } + + #[inline] + fn signum(self) -> Self { + self.is_nan().select(Self::splat(Self::Scalar::NAN), Self::splat(1.0).copysign(self)) + } + + #[inline] + fn copysign(self, sign: Self) -> Self { + let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits(); + let magnitude = self.to_bits() & !Self::splat(-0.).to_bits(); + Self::from_bits(sign_bit | magnitude) + } + + #[inline] + fn simd_min(self, other: Self) -> Self { + // Safety: `self` and `other` are float vectors + unsafe { intrinsics::simd_fmin(self, other) } + } + + #[inline] + fn simd_max(self, other: Self) -> Self { + // Safety: `self` and `other` are floating point vectors + unsafe { intrinsics::simd_fmax(self, other) } + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + let mut x = self; + x = x.simd_lt(min).select(min, x); + x = x.simd_gt(max).select(max, x); + x + } + + #[inline] + fn reduce_sum(self) -> Self::Scalar { + // LLVM sum is inaccurate on i586 + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { + self.as_array().iter().sum() + } else { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_reduce_add_ordered(self, 0.) } + } + } + + #[inline] + fn reduce_product(self) -> Self::Scalar { + // LLVM product is inaccurate on i586 + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { + self.as_array().iter().product() + } else { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_reduce_mul_ordered(self, 1.) } + } + } + + #[inline] + fn reduce_max(self) -> Self::Scalar { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_reduce_max(self) } + } + + #[inline] + fn reduce_min(self) -> Self::Scalar { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_reduce_min(self) } + } + } + )* + } +} + +impl_trait! { f32 { bits: u32, mask: i32 }, f64 { bits: u64, mask: i64 } } diff --git a/library/portable-simd/crates/core_simd/src/elements/int.rs b/library/portable-simd/crates/core_simd/src/elements/int.rs new file mode 100644 index 000000000..9b8c37ed4 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/elements/int.rs @@ -0,0 +1,298 @@ +use super::sealed::Sealed; +use crate::simd::{ + intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialOrd, SupportedLaneCount, +}; + +/// Operations on SIMD vectors of signed integers. +pub trait SimdInt: Copy + Sealed { + /// Mask type used for manipulating this SIMD vector type. + type Mask; + + /// Scalar type contained by this SIMD vector type. + type Scalar; + + /// Lanewise saturating add. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; + /// use core::i32::{MIN, MAX}; + /// let x = Simd::from_array([MIN, 0, 1, MAX]); + /// let max = Simd::splat(MAX); + /// let unsat = x + max; + /// let sat = x.saturating_add(max); + /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2])); + /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX])); + /// ``` + fn saturating_add(self, second: Self) -> Self; + + /// Lanewise saturating subtract. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; + /// use core::i32::{MIN, MAX}; + /// let x = Simd::from_array([MIN, -2, -1, MAX]); + /// let max = Simd::splat(MAX); + /// let unsat = x - max; + /// let sat = x.saturating_sub(max); + /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0])); + /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0])); + fn saturating_sub(self, second: Self) -> Self; + + /// Lanewise absolute value, implemented in Rust. + /// Every lane becomes its absolute value. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; + /// use core::i32::{MIN, MAX}; + /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]); + /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0])); + /// ``` + fn abs(self) -> Self; + + /// Lanewise saturating absolute value, implemented in Rust. + /// As abs(), except the MIN value becomes MAX instead of itself. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; + /// use core::i32::{MIN, MAX}; + /// let xs = Simd::from_array([MIN, -2, 0, 3]); + /// let unsat = xs.abs(); + /// let sat = xs.saturating_abs(); + /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3])); + /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3])); + /// ``` + fn saturating_abs(self) -> Self; + + /// Lanewise saturating negation, implemented in Rust. + /// As neg(), except the MIN value becomes MAX instead of itself. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; + /// use core::i32::{MIN, MAX}; + /// let x = Simd::from_array([MIN, -2, 3, MAX]); + /// let unsat = -x; + /// let sat = x.saturating_neg(); + /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1])); + /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1])); + /// ``` + fn saturating_neg(self) -> Self; + + /// Returns true for each positive lane and false if it is zero or negative. + fn is_positive(self) -> Self::Mask; + + /// Returns true for each negative lane and false if it is zero or positive. + fn is_negative(self) -> Self::Mask; + + /// Returns numbers representing the sign of each lane. + /// * `0` if the number is zero + /// * `1` if the number is positive + /// * `-1` if the number is negative + fn signum(self) -> Self; + + /// Returns the sum of the lanes of the vector, with wrapping addition. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{i32x4, SimdInt}; + /// let v = i32x4::from_array([1, 2, 3, 4]); + /// assert_eq!(v.reduce_sum(), 10); + /// + /// // SIMD integer addition is always wrapping + /// let v = i32x4::from_array([i32::MAX, 1, 0, 0]); + /// assert_eq!(v.reduce_sum(), i32::MIN); + /// ``` + fn reduce_sum(self) -> Self::Scalar; + + /// Returns the product of the lanes of the vector, with wrapping multiplication. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{i32x4, SimdInt}; + /// let v = i32x4::from_array([1, 2, 3, 4]); + /// assert_eq!(v.reduce_product(), 24); + /// + /// // SIMD integer multiplication is always wrapping + /// let v = i32x4::from_array([i32::MAX, 2, 1, 1]); + /// assert!(v.reduce_product() < i32::MAX); + /// ``` + fn reduce_product(self) -> Self::Scalar; + + /// Returns the maximum lane in the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{i32x4, SimdInt}; + /// let v = i32x4::from_array([1, 2, 3, 4]); + /// assert_eq!(v.reduce_max(), 4); + /// ``` + fn reduce_max(self) -> Self::Scalar; + + /// Returns the minimum lane in the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{i32x4, SimdInt}; + /// let v = i32x4::from_array([1, 2, 3, 4]); + /// assert_eq!(v.reduce_min(), 1); + /// ``` + fn reduce_min(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "and" across the lanes of the vector. + fn reduce_and(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "or" across the lanes of the vector. + fn reduce_or(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "xor" across the lanes of the vector. + fn reduce_xor(self) -> Self::Scalar; +} + +macro_rules! impl_trait { + { $($ty:ty),* } => { + $( + impl<const LANES: usize> Sealed for Simd<$ty, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + } + + impl<const LANES: usize> SimdInt for Simd<$ty, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + type Mask = Mask<<$ty as SimdElement>::Mask, LANES>; + type Scalar = $ty; + + #[inline] + fn saturating_add(self, second: Self) -> Self { + // Safety: `self` is a vector + unsafe { intrinsics::simd_saturating_add(self, second) } + } + + #[inline] + fn saturating_sub(self, second: Self) -> Self { + // Safety: `self` is a vector + unsafe { intrinsics::simd_saturating_sub(self, second) } + } + + #[inline] + fn abs(self) -> Self { + const SHR: $ty = <$ty>::BITS as $ty - 1; + let m = self >> Simd::splat(SHR); + (self^m) - m + } + + #[inline] + fn saturating_abs(self) -> Self { + // arith shift for -1 or 0 mask based on sign bit, giving 2s complement + const SHR: $ty = <$ty>::BITS as $ty - 1; + let m = self >> Simd::splat(SHR); + (self^m).saturating_sub(m) + } + + #[inline] + fn saturating_neg(self) -> Self { + Self::splat(0).saturating_sub(self) + } + + #[inline] + fn is_positive(self) -> Self::Mask { + self.simd_gt(Self::splat(0)) + } + + #[inline] + fn is_negative(self) -> Self::Mask { + self.simd_lt(Self::splat(0)) + } + + #[inline] + fn signum(self) -> Self { + self.is_positive().select( + Self::splat(1), + self.is_negative().select(Self::splat(-1), Self::splat(0)) + ) + } + + #[inline] + fn reduce_sum(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_add_ordered(self, 0) } + } + + #[inline] + fn reduce_product(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) } + } + + #[inline] + fn reduce_max(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_max(self) } + } + + #[inline] + fn reduce_min(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_min(self) } + } + + #[inline] + fn reduce_and(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_and(self) } + } + + #[inline] + fn reduce_or(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_or(self) } + } + + #[inline] + fn reduce_xor(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_xor(self) } + } + } + )* + } +} + +impl_trait! { i8, i16, i32, i64, isize } diff --git a/library/portable-simd/crates/core_simd/src/elements/uint.rs b/library/portable-simd/crates/core_simd/src/elements/uint.rs new file mode 100644 index 000000000..21e7e76eb --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/elements/uint.rs @@ -0,0 +1,139 @@ +use super::sealed::Sealed; +use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount}; + +/// Operations on SIMD vectors of unsigned integers. +pub trait SimdUint: Copy + Sealed { + /// Scalar type contained by this SIMD vector type. + type Scalar; + + /// Lanewise saturating add. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdUint}; + /// use core::u32::MAX; + /// let x = Simd::from_array([2, 1, 0, MAX]); + /// let max = Simd::splat(MAX); + /// let unsat = x + max; + /// let sat = x.saturating_add(max); + /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1])); + /// assert_eq!(sat, max); + /// ``` + fn saturating_add(self, second: Self) -> Self; + + /// Lanewise saturating subtract. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdUint}; + /// use core::u32::MAX; + /// let x = Simd::from_array([2, 1, 0, MAX]); + /// let max = Simd::splat(MAX); + /// let unsat = x - max; + /// let sat = x.saturating_sub(max); + /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0])); + /// assert_eq!(sat, Simd::splat(0)); + fn saturating_sub(self, second: Self) -> Self; + + /// Returns the sum of the lanes of the vector, with wrapping addition. + fn reduce_sum(self) -> Self::Scalar; + + /// Returns the product of the lanes of the vector, with wrapping multiplication. + fn reduce_product(self) -> Self::Scalar; + + /// Returns the maximum lane in the vector. + fn reduce_max(self) -> Self::Scalar; + + /// Returns the minimum lane in the vector. + fn reduce_min(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "and" across the lanes of the vector. + fn reduce_and(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "or" across the lanes of the vector. + fn reduce_or(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "xor" across the lanes of the vector. + fn reduce_xor(self) -> Self::Scalar; +} + +macro_rules! impl_trait { + { $($ty:ty),* } => { + $( + impl<const LANES: usize> Sealed for Simd<$ty, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + } + + impl<const LANES: usize> SimdUint for Simd<$ty, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + type Scalar = $ty; + + #[inline] + fn saturating_add(self, second: Self) -> Self { + // Safety: `self` is a vector + unsafe { intrinsics::simd_saturating_add(self, second) } + } + + #[inline] + fn saturating_sub(self, second: Self) -> Self { + // Safety: `self` is a vector + unsafe { intrinsics::simd_saturating_sub(self, second) } + } + + #[inline] + fn reduce_sum(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_add_ordered(self, 0) } + } + + #[inline] + fn reduce_product(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) } + } + + #[inline] + fn reduce_max(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_max(self) } + } + + #[inline] + fn reduce_min(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_min(self) } + } + + #[inline] + fn reduce_and(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_and(self) } + } + + #[inline] + fn reduce_or(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_or(self) } + } + + #[inline] + fn reduce_xor(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_xor(self) } + } + } + )* + } +} + +impl_trait! { u8, u16, u32, u64, usize } diff --git a/library/portable-simd/crates/core_simd/src/eq.rs b/library/portable-simd/crates/core_simd/src/eq.rs new file mode 100644 index 000000000..c7111f720 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/eq.rs @@ -0,0 +1,73 @@ +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount}; + +/// Parallel `PartialEq`. +pub trait SimdPartialEq { + /// The mask type returned by each comparison. + type Mask; + + /// Test if each lane is equal to the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_eq(self, other: Self) -> Self::Mask; + + /// Test if each lane is equal to the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_ne(self, other: Self) -> Self::Mask; +} + +macro_rules! impl_number { + { $($number:ty),* } => { + $( + impl<const LANES: usize> SimdPartialEq for Simd<$number, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + type Mask = Mask<<$number as SimdElement>::Mask, LANES>; + + #[inline] + fn simd_eq(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } + } + + #[inline] + fn simd_ne(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } + } + } + )* + } +} + +impl_number! { f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize } + +macro_rules! impl_mask { + { $($integer:ty),* } => { + $( + impl<const LANES: usize> SimdPartialEq for Mask<$integer, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + type Mask = Self; + + #[inline] + fn simd_eq(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_eq(self.to_int(), other.to_int())) } + } + + #[inline] + fn simd_ne(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_ne(self.to_int(), other.to_int())) } + } + } + )* + } +} + +impl_mask! { i8, i16, i32, i64, isize } diff --git a/library/portable-simd/crates/core_simd/src/fmt.rs b/library/portable-simd/crates/core_simd/src/fmt.rs new file mode 100644 index 000000000..dbd9839c4 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/fmt.rs @@ -0,0 +1,39 @@ +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use core::fmt; + +macro_rules! impl_fmt_trait { + { $($trait:ident,)* } => { + $( + impl<T, const LANES: usize> fmt::$trait for Simd<T, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement + fmt::$trait, + { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[repr(transparent)] + struct Wrapper<'a, T: fmt::$trait>(&'a T); + + impl<T: fmt::$trait> fmt::Debug for Wrapper<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } + } + + f.debug_list() + .entries(self.as_array().iter().map(|x| Wrapper(x))) + .finish() + } + } + )* + } +} + +impl_fmt_trait! { + Debug, + Binary, + LowerExp, + UpperExp, + Octal, + LowerHex, + UpperHex, +} diff --git a/library/portable-simd/crates/core_simd/src/intrinsics.rs b/library/portable-simd/crates/core_simd/src/intrinsics.rs new file mode 100644 index 000000000..962c83a78 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs @@ -0,0 +1,153 @@ +//! This module contains the LLVM intrinsics bindings that provide the functionality for this +//! crate. +//! +//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html> +//! +//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef: +//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT +//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons, +//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either. +//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note: +//! this means that division by poison or undef is like division by zero, which means it inflicts... +//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program: +//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end". +//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior, +//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract. +//! +//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory. +//! +//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths. + +// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are +// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner. +// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function. +extern "platform-intrinsic" { + /// add/fadd + pub(crate) fn simd_add<T>(x: T, y: T) -> T; + + /// sub/fsub + pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T; + + /// mul/fmul + pub(crate) fn simd_mul<T>(x: T, y: T) -> T; + + /// udiv/sdiv/fdiv + /// ints and uints: {s,u}div incur UB if division by zero occurs. + /// ints: sdiv is UB for int::MIN / -1. + /// floats: fdiv is never UB, but may create NaNs or infinities. + pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T; + + /// urem/srem/frem + /// ints and uints: {s,u}rem incur UB if division by zero occurs. + /// ints: srem is UB for int::MIN / -1. + /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno. + pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T; + + /// shl + /// for (u)ints. poison if rhs >= lhs::BITS + pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T; + + /// ints: ashr + /// uints: lshr + /// poison if rhs >= lhs::BITS + pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T; + + /// and + pub(crate) fn simd_and<T>(x: T, y: T) -> T; + + /// or + pub(crate) fn simd_or<T>(x: T, y: T) -> T; + + /// xor + pub(crate) fn simd_xor<T>(x: T, y: T) -> T; + + /// getelementptr (without inbounds) + pub(crate) fn simd_arith_offset<T, U>(ptrs: T, offsets: U) -> T; + + /// fptoui/fptosi/uitofp/sitofp + /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5 + /// but the truncated value must fit in the target type or the result is poison. + /// use `simd_as` instead for a cast that performs a saturating conversion. + pub(crate) fn simd_cast<T, U>(x: T) -> U; + /// follows Rust's `T as U` semantics, including saturating float casts + /// which amounts to the same as `simd_cast` for many cases + pub(crate) fn simd_as<T, U>(x: T) -> U; + + /// neg/fneg + /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`. + /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it. + /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`. + pub(crate) fn simd_neg<T>(x: T) -> T; + + /// fabs + pub(crate) fn simd_fabs<T>(x: T) -> T; + + // minnum/maxnum + pub(crate) fn simd_fmin<T>(x: T, y: T) -> T; + pub(crate) fn simd_fmax<T>(x: T, y: T) -> T; + + // these return Simd<int, N> with the same BITS size as the inputs + pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U; + pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U; + pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U; + pub(crate) fn simd_le<T, U>(x: T, y: T) -> U; + pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U; + pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U; + + // shufflevector + // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s + pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V; + + /// llvm.masked.gather + /// like a loop of pointer reads + /// val: vector of values to select if a lane is masked + /// ptr: vector of pointers to read from + /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val) + /// note, the LLVM intrinsic accepts a mask vector of <N x i1> + /// FIXME: review this if/when we fix up our mask story in general? + pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T; + /// llvm.masked.scatter + /// like gather, but more spicy, as it writes instead of reads + pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V); + + // {s,u}add.sat + pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T; + + // {s,u}sub.sat + pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T; + + // reductions + // llvm.vector.reduce.{add,fadd} + pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U; + // llvm.vector.reduce.{mul,fmul} + pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U; + #[allow(unused)] + pub(crate) fn simd_reduce_all<T>(x: T) -> bool; + #[allow(unused)] + pub(crate) fn simd_reduce_any<T>(x: T) -> bool; + pub(crate) fn simd_reduce_max<T, U>(x: T) -> U; + pub(crate) fn simd_reduce_min<T, U>(x: T) -> U; + pub(crate) fn simd_reduce_and<T, U>(x: T) -> U; + pub(crate) fn simd_reduce_or<T, U>(x: T) -> U; + pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U; + + // truncate integer vector to bitmask + // `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and + // returns either an unsigned integer or array of `u8`. + // Every element in the vector becomes a single bit in the returned bitmask. + // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits. + // The bit order of the result depends on the byte endianness. LSB-first for little + // endian and MSB-first for big endian. + // + // UB if called on a vector with values other than 0 and -1. + #[allow(unused)] + pub(crate) fn simd_bitmask<T, U>(x: T) -> U; + + // select + // first argument is a vector of integers, -1 (all bits 1) is "true" + // logically equivalent to (yes & m) | (no & (m^-1), + // but you can use it on floats. + pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T; + #[allow(unused)] + pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T; +} diff --git a/library/portable-simd/crates/core_simd/src/iter.rs b/library/portable-simd/crates/core_simd/src/iter.rs new file mode 100644 index 000000000..3275b4db8 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/iter.rs @@ -0,0 +1,58 @@ +use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use core::{ + iter::{Product, Sum}, + ops::{Add, Mul}, +}; + +macro_rules! impl_traits { + { $type:ty } => { + impl<const LANES: usize> Sum<Self> for Simd<$type, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + fn sum<I: Iterator<Item = Self>>(iter: I) -> Self { + iter.fold(Simd::splat(0 as $type), Add::add) + } + } + + impl<const LANES: usize> Product<Self> for Simd<$type, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + fn product<I: Iterator<Item = Self>>(iter: I) -> Self { + iter.fold(Simd::splat(1 as $type), Mul::mul) + } + } + + impl<'a, const LANES: usize> Sum<&'a Self> for Simd<$type, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self { + iter.fold(Simd::splat(0 as $type), Add::add) + } + } + + impl<'a, const LANES: usize> Product<&'a Self> for Simd<$type, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self { + iter.fold(Simd::splat(1 as $type), Mul::mul) + } + } + } +} + +impl_traits! { f32 } +impl_traits! { f64 } +impl_traits! { u8 } +impl_traits! { u16 } +impl_traits! { u32 } +impl_traits! { u64 } +impl_traits! { usize } +impl_traits! { i8 } +impl_traits! { i16 } +impl_traits! { i32 } +impl_traits! { i64 } +impl_traits! { isize } diff --git a/library/portable-simd/crates/core_simd/src/lane_count.rs b/library/portable-simd/crates/core_simd/src/lane_count.rs new file mode 100644 index 000000000..63723e2ec --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/lane_count.rs @@ -0,0 +1,46 @@ +mod sealed { + pub trait Sealed {} +} +use sealed::Sealed; + +/// Specifies the number of lanes in a SIMD vector as a type. +pub struct LaneCount<const LANES: usize>; + +impl<const LANES: usize> LaneCount<LANES> { + /// The number of bytes in a bitmask with this many lanes. + pub const BITMASK_LEN: usize = (LANES + 7) / 8; +} + +/// Statically guarantees that a lane count is marked as supported. +/// +/// This trait is *sealed*: the list of implementors below is total. +/// Users do not have the ability to mark additional `LaneCount<N>` values as supported. +/// Only SIMD vectors with supported lane counts are constructable. +pub trait SupportedLaneCount: Sealed { + #[doc(hidden)] + type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>; +} + +impl<const LANES: usize> Sealed for LaneCount<LANES> {} + +impl SupportedLaneCount for LaneCount<1> { + type BitMask = [u8; 1]; +} +impl SupportedLaneCount for LaneCount<2> { + type BitMask = [u8; 1]; +} +impl SupportedLaneCount for LaneCount<4> { + type BitMask = [u8; 1]; +} +impl SupportedLaneCount for LaneCount<8> { + type BitMask = [u8; 1]; +} +impl SupportedLaneCount for LaneCount<16> { + type BitMask = [u8; 2]; +} +impl SupportedLaneCount for LaneCount<32> { + type BitMask = [u8; 4]; +} +impl SupportedLaneCount for LaneCount<64> { + type BitMask = [u8; 8]; +} diff --git a/library/portable-simd/crates/core_simd/src/lib.rs b/library/portable-simd/crates/core_simd/src/lib.rs new file mode 100644 index 000000000..715f258f6 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/lib.rs @@ -0,0 +1,22 @@ +#![no_std] +#![feature( + convert_float_to_int, + decl_macro, + intra_doc_pointers, + platform_intrinsics, + repr_simd, + simd_ffi, + staged_api, + stdsimd +)] +#![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))] +#![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))] +#![warn(missing_docs)] +#![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)] +#![unstable(feature = "portable_simd", issue = "86656")] +//! Portable SIMD module. + +#[path = "mod.rs"] +mod core_simd; +pub use self::core_simd::simd; +pub use simd::*; diff --git a/library/portable-simd/crates/core_simd/src/masks.rs b/library/portable-simd/crates/core_simd/src/masks.rs new file mode 100644 index 000000000..c36c336d8 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/masks.rs @@ -0,0 +1,595 @@ +//! Types and traits associated with masking lanes of vectors. +//! Types representing +#![allow(non_camel_case_types)] + +#[cfg_attr( + not(all(target_arch = "x86_64", target_feature = "avx512f")), + path = "masks/full_masks.rs" +)] +#[cfg_attr( + all(target_arch = "x86_64", target_feature = "avx512f"), + path = "masks/bitmask.rs" +)] +mod mask_impl; + +mod to_bitmask; +pub use to_bitmask::ToBitMask; + +#[cfg(feature = "generic_const_exprs")] +pub use to_bitmask::{bitmask_len, ToBitMaskArray}; + +use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; +use core::cmp::Ordering; +use core::{fmt, mem}; + +mod sealed { + use super::*; + + /// Not only does this seal the `MaskElement` trait, but these functions prevent other traits + /// from bleeding into the parent bounds. + /// + /// For example, `eq` could be provided by requiring `MaskElement: PartialEq`, but that would + /// prevent us from ever removing that bound, or from implementing `MaskElement` on + /// non-`PartialEq` types in the future. + pub trait Sealed { + fn valid<const LANES: usize>(values: Simd<Self, LANES>) -> bool + where + LaneCount<LANES>: SupportedLaneCount, + Self: SimdElement; + + fn eq(self, other: Self) -> bool; + + const TRUE: Self; + + const FALSE: Self; + } +} +use sealed::Sealed; + +/// Marker trait for types that may be used as SIMD mask elements. +/// +/// # Safety +/// Type must be a signed integer. +pub unsafe trait MaskElement: SimdElement + Sealed {} + +macro_rules! impl_element { + { $ty:ty } => { + impl Sealed for $ty { + fn valid<const LANES: usize>(value: Simd<Self, LANES>) -> bool + where + LaneCount<LANES>: SupportedLaneCount, + { + (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all() + } + + fn eq(self, other: Self) -> bool { self == other } + + const TRUE: Self = -1; + const FALSE: Self = 0; + } + + // Safety: this is a valid mask element type + unsafe impl MaskElement for $ty {} + } +} + +impl_element! { i8 } +impl_element! { i16 } +impl_element! { i32 } +impl_element! { i64 } +impl_element! { isize } + +/// A SIMD vector mask for `LANES` elements of width specified by `Element`. +/// +/// Masks represent boolean inclusion/exclusion on a per-lane basis. +/// +/// The layout of this type is unspecified. +#[repr(transparent)] +pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>) +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount; + +impl<T, const LANES: usize> Copy for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ +} + +impl<T, const LANES: usize> Clone for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn clone(&self) -> Self { + *self + } +} + +impl<T, const LANES: usize> Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + /// Construct a mask by setting all lanes to the given value. + pub fn splat(value: bool) -> Self { + Self(mask_impl::Mask::splat(value)) + } + + /// Converts an array of bools to a SIMD mask. + pub fn from_array(array: [bool; LANES]) -> Self { + // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of + // true: 0b_0000_0001 + // false: 0b_0000_0000 + // Thus, an array of bools is also a valid array of bytes: [u8; N] + // This would be hypothetically valid as an "in-place" transmute, + // but these are "dependently-sized" types, so copy elision it is! + unsafe { + let bytes: [u8; LANES] = mem::transmute_copy(&array); + let bools: Simd<i8, LANES> = + intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); + Mask::from_int_unchecked(intrinsics::simd_cast(bools)) + } + } + + /// Converts a SIMD mask to an array of bools. + pub fn to_array(self) -> [bool; LANES] { + // This follows mostly the same logic as from_array. + // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of + // true: 0b_0000_0001 + // false: 0b_0000_0000 + // Thus, an array of bools is also a valid array of bytes: [u8; N] + // Since our masks are equal to integers where all bits are set, + // we can simply convert them to i8s, and then bitand them by the + // bitpattern for Rust's "true" bool. + // This would be hypothetically valid as an "in-place" transmute, + // but these are "dependently-sized" types, so copy elision it is! + unsafe { + let mut bytes: Simd<i8, LANES> = intrinsics::simd_cast(self.to_int()); + bytes &= Simd::splat(1i8); + mem::transmute_copy(&bytes) + } + } + + /// Converts a vector of integers to a mask, where 0 represents `false` and -1 + /// represents `true`. + /// + /// # Safety + /// All lanes must be either 0 or -1. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self { + // Safety: the caller must confirm this invariant + unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) } + } + + /// Converts a vector of integers to a mask, where 0 represents `false` and -1 + /// represents `true`. + /// + /// # Panics + /// Panics if any lane is not 0 or -1. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn from_int(value: Simd<T, LANES>) -> Self { + assert!(T::valid(value), "all values must be either 0 or -1",); + // Safety: the validity has been checked + unsafe { Self::from_int_unchecked(value) } + } + + /// Converts the mask to a vector of integers, where 0 represents `false` and -1 + /// represents `true`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + pub fn to_int(self) -> Simd<T, LANES> { + self.0.to_int() + } + + /// Converts the mask to a mask of any other lane size. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn cast<U: MaskElement>(self) -> Mask<U, LANES> { + Mask(self.0.convert()) + } + + /// Tests the value of the specified lane. + /// + /// # Safety + /// `lane` must be less than `LANES`. + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub unsafe fn test_unchecked(&self, lane: usize) -> bool { + // Safety: the caller must confirm this invariant + unsafe { self.0.test_unchecked(lane) } + } + + /// Tests the value of the specified lane. + /// + /// # Panics + /// Panics if `lane` is greater than or equal to the number of lanes in the vector. + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub fn test(&self, lane: usize) -> bool { + assert!(lane < LANES, "lane index out of range"); + // Safety: the lane index has been checked + unsafe { self.test_unchecked(lane) } + } + + /// Sets the value of the specified lane. + /// + /// # Safety + /// `lane` must be less than `LANES`. + #[inline] + pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { + // Safety: the caller must confirm this invariant + unsafe { + self.0.set_unchecked(lane, value); + } + } + + /// Sets the value of the specified lane. + /// + /// # Panics + /// Panics if `lane` is greater than or equal to the number of lanes in the vector. + #[inline] + pub fn set(&mut self, lane: usize, value: bool) { + assert!(lane < LANES, "lane index out of range"); + // Safety: the lane index has been checked + unsafe { + self.set_unchecked(lane, value); + } + } + + /// Returns true if any lane is set, or false otherwise. + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub fn any(self) -> bool { + self.0.any() + } + + /// Returns true if all lanes are set, or false otherwise. + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub fn all(self) -> bool { + self.0.all() + } +} + +// vector/array conversion +impl<T, const LANES: usize> From<[bool; LANES]> for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn from(array: [bool; LANES]) -> Self { + Self::from_array(array) + } +} + +impl<T, const LANES: usize> From<Mask<T, LANES>> for [bool; LANES] +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn from(vector: Mask<T, LANES>) -> Self { + vector.to_array() + } +} + +impl<T, const LANES: usize> Default for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + #[must_use = "method returns a defaulted mask with all lanes set to false (0)"] + fn default() -> Self { + Self::splat(false) + } +} + +impl<T, const LANES: usize> PartialEq for Mask<T, LANES> +where + T: MaskElement + PartialEq, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl<T, const LANES: usize> PartialOrd for Mask<T, LANES> +where + T: MaskElement + PartialOrd, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + #[must_use = "method returns a new Ordering and does not mutate the original value"] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.0.partial_cmp(&other.0) + } +} + +impl<T, const LANES: usize> fmt::Debug for Mask<T, LANES> +where + T: MaskElement + fmt::Debug, + LaneCount<LANES>: SupportedLaneCount, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list() + .entries((0..LANES).map(|lane| self.test(lane))) + .finish() + } +} + +impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitand(self, rhs: Self) -> Self { + Self(self.0 & rhs.0) + } +} + +impl<T, const LANES: usize> core::ops::BitAnd<bool> for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitand(self, rhs: bool) -> Self { + self & Self::splat(rhs) + } +} + +impl<T, const LANES: usize> core::ops::BitAnd<Mask<T, LANES>> for bool +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Mask<T, LANES>; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitand(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> { + Mask::splat(self) & rhs + } +} + +impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitor(self, rhs: Self) -> Self { + Self(self.0 | rhs.0) + } +} + +impl<T, const LANES: usize> core::ops::BitOr<bool> for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitor(self, rhs: bool) -> Self { + self | Self::splat(rhs) + } +} + +impl<T, const LANES: usize> core::ops::BitOr<Mask<T, LANES>> for bool +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Mask<T, LANES>; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitor(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> { + Mask::splat(self) | rhs + } +} + +impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitxor(self, rhs: Self) -> Self::Output { + Self(self.0 ^ rhs.0) + } +} + +impl<T, const LANES: usize> core::ops::BitXor<bool> for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitxor(self, rhs: bool) -> Self::Output { + self ^ Self::splat(rhs) + } +} + +impl<T, const LANES: usize> core::ops::BitXor<Mask<T, LANES>> for bool +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Mask<T, LANES>; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitxor(self, rhs: Mask<T, LANES>) -> Self::Output { + Mask::splat(self) ^ rhs + } +} + +impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Mask<T, LANES>; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn not(self) -> Self::Output { + Self(!self.0) + } +} + +impl<T, const LANES: usize> core::ops::BitAndAssign for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + fn bitand_assign(&mut self, rhs: Self) { + self.0 = self.0 & rhs.0; + } +} + +impl<T, const LANES: usize> core::ops::BitAndAssign<bool> for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + fn bitand_assign(&mut self, rhs: bool) { + *self &= Self::splat(rhs); + } +} + +impl<T, const LANES: usize> core::ops::BitOrAssign for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + fn bitor_assign(&mut self, rhs: Self) { + self.0 = self.0 | rhs.0; + } +} + +impl<T, const LANES: usize> core::ops::BitOrAssign<bool> for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + fn bitor_assign(&mut self, rhs: bool) { + *self |= Self::splat(rhs); + } +} + +impl<T, const LANES: usize> core::ops::BitXorAssign for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + fn bitxor_assign(&mut self, rhs: Self) { + self.0 = self.0 ^ rhs.0; + } +} + +impl<T, const LANES: usize> core::ops::BitXorAssign<bool> for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + fn bitxor_assign(&mut self, rhs: bool) { + *self ^= Self::splat(rhs); + } +} + +/// A mask for SIMD vectors with eight elements of 8 bits. +pub type mask8x8 = Mask<i8, 8>; + +/// A mask for SIMD vectors with 16 elements of 8 bits. +pub type mask8x16 = Mask<i8, 16>; + +/// A mask for SIMD vectors with 32 elements of 8 bits. +pub type mask8x32 = Mask<i8, 32>; + +/// A mask for SIMD vectors with 64 elements of 8 bits. +pub type mask8x64 = Mask<i8, 64>; + +/// A mask for SIMD vectors with four elements of 16 bits. +pub type mask16x4 = Mask<i16, 4>; + +/// A mask for SIMD vectors with eight elements of 16 bits. +pub type mask16x8 = Mask<i16, 8>; + +/// A mask for SIMD vectors with 16 elements of 16 bits. +pub type mask16x16 = Mask<i16, 16>; + +/// A mask for SIMD vectors with 32 elements of 16 bits. +pub type mask16x32 = Mask<i16, 32>; + +/// A mask for SIMD vectors with two elements of 32 bits. +pub type mask32x2 = Mask<i32, 2>; + +/// A mask for SIMD vectors with four elements of 32 bits. +pub type mask32x4 = Mask<i32, 4>; + +/// A mask for SIMD vectors with eight elements of 32 bits. +pub type mask32x8 = Mask<i32, 8>; + +/// A mask for SIMD vectors with 16 elements of 32 bits. +pub type mask32x16 = Mask<i32, 16>; + +/// A mask for SIMD vectors with two elements of 64 bits. +pub type mask64x2 = Mask<i64, 2>; + +/// A mask for SIMD vectors with four elements of 64 bits. +pub type mask64x4 = Mask<i64, 4>; + +/// A mask for SIMD vectors with eight elements of 64 bits. +pub type mask64x8 = Mask<i64, 8>; + +/// A mask for SIMD vectors with two elements of pointer width. +pub type masksizex2 = Mask<isize, 2>; + +/// A mask for SIMD vectors with four elements of pointer width. +pub type masksizex4 = Mask<isize, 4>; + +/// A mask for SIMD vectors with eight elements of pointer width. +pub type masksizex8 = Mask<isize, 8>; + +macro_rules! impl_from { + { $from:ty => $($to:ty),* } => { + $( + impl<const LANES: usize> From<Mask<$from, LANES>> for Mask<$to, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + fn from(value: Mask<$from, LANES>) -> Self { + value.cast() + } + } + )* + } +} +impl_from! { i8 => i16, i32, i64, isize } +impl_from! { i16 => i32, i64, isize, i8 } +impl_from! { i32 => i64, isize, i8, i16 } +impl_from! { i64 => isize, i8, i16, i32 } +impl_from! { isize => i8, i16, i32, i64 } diff --git a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs new file mode 100644 index 000000000..365ecc0a3 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs @@ -0,0 +1,246 @@ +#![allow(unused_imports)] +use super::MaskElement; +use crate::simd::intrinsics; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; +use core::marker::PhantomData; + +/// A mask where each lane is represented by a single bit. +#[repr(transparent)] +pub struct Mask<T, const LANES: usize>( + <LaneCount<LANES> as SupportedLaneCount>::BitMask, + PhantomData<T>, +) +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount; + +impl<T, const LANES: usize> Copy for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ +} + +impl<T, const LANES: usize> Clone for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn clone(&self) -> Self { + *self + } +} + +impl<T, const LANES: usize> PartialEq for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn eq(&self, other: &Self) -> bool { + self.0.as_ref() == other.0.as_ref() + } +} + +impl<T, const LANES: usize> PartialOrd for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + self.0.as_ref().partial_cmp(other.0.as_ref()) + } +} + +impl<T, const LANES: usize> Eq for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ +} + +impl<T, const LANES: usize> Ord for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.0.as_ref().cmp(other.0.as_ref()) + } +} + +impl<T, const LANES: usize> Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn splat(value: bool) -> Self { + let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default(); + if value { + mask.as_mut().fill(u8::MAX) + } else { + mask.as_mut().fill(u8::MIN) + } + if LANES % 8 > 0 { + *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8); + } + Self(mask, PhantomData) + } + + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub unsafe fn test_unchecked(&self, lane: usize) -> bool { + (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0 + } + + #[inline] + pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { + unsafe { + self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8) + } + } + + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + pub fn to_int(self) -> Simd<T, LANES> { + unsafe { + intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE)) + } + } + + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self { + unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } + } + + #[cfg(feature = "generic_const_exprs")] + #[inline] + #[must_use = "method returns a new array and does not mutate the original value"] + pub fn to_bitmask_array<const N: usize>(self) -> [u8; N] { + assert!(core::mem::size_of::<Self>() == N); + + // Safety: converting an integer to an array of bytes of the same size is safe + unsafe { core::mem::transmute_copy(&self.0) } + } + + #[cfg(feature = "generic_const_exprs")] + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn from_bitmask_array<const N: usize>(bitmask: [u8; N]) -> Self { + assert!(core::mem::size_of::<Self>() == N); + + // Safety: converting an array of bytes to an integer of the same size is safe + Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData) + } + + #[inline] + pub fn to_bitmask_integer<U>(self) -> U + where + super::Mask<T, LANES>: ToBitMask<BitMask = U>, + { + // Safety: these are the same types + unsafe { core::mem::transmute_copy(&self.0) } + } + + #[inline] + pub fn from_bitmask_integer<U>(bitmask: U) -> Self + where + super::Mask<T, LANES>: ToBitMask<BitMask = U>, + { + // Safety: these are the same types + unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) } + } + + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn convert<U>(self) -> Mask<U, LANES> + where + U: MaskElement, + { + // Safety: bitmask layout does not depend on the element width + unsafe { core::mem::transmute_copy(&self) } + } + + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub fn any(self) -> bool { + self != Self::splat(false) + } + + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub fn all(self) -> bool { + self == Self::splat(true) + } +} + +impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, + <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitand(mut self, rhs: Self) -> Self { + for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { + *l &= r; + } + self + } +} + +impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, + <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitor(mut self, rhs: Self) -> Self { + for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { + *l |= r; + } + self + } +} + +impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitxor(mut self, rhs: Self) -> Self::Output { + for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { + *l ^= r; + } + self + } +} + +impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn not(mut self) -> Self::Output { + for x in self.0.as_mut() { + *x = !*x; + } + if LANES % 8 > 0 { + *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8); + } + self + } +} diff --git a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs new file mode 100644 index 000000000..adf0fcbea --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs @@ -0,0 +1,323 @@ +//! Masks that take up full SIMD vector registers. + +use super::MaskElement; +use crate::simd::intrinsics; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; + +#[cfg(feature = "generic_const_exprs")] +use crate::simd::ToBitMaskArray; + +#[repr(transparent)] +pub struct Mask<T, const LANES: usize>(Simd<T, LANES>) +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount; + +impl<T, const LANES: usize> Copy for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ +} + +impl<T, const LANES: usize> Clone for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn clone(&self) -> Self { + *self + } +} + +impl<T, const LANES: usize> PartialEq for Mask<T, LANES> +where + T: MaskElement + PartialEq, + LaneCount<LANES>: SupportedLaneCount, +{ + fn eq(&self, other: &Self) -> bool { + self.0.eq(&other.0) + } +} + +impl<T, const LANES: usize> PartialOrd for Mask<T, LANES> +where + T: MaskElement + PartialOrd, + LaneCount<LANES>: SupportedLaneCount, +{ + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + self.0.partial_cmp(&other.0) + } +} + +impl<T, const LANES: usize> Eq for Mask<T, LANES> +where + T: MaskElement + Eq, + LaneCount<LANES>: SupportedLaneCount, +{ +} + +impl<T, const LANES: usize> Ord for Mask<T, LANES> +where + T: MaskElement + Ord, + LaneCount<LANES>: SupportedLaneCount, +{ + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.0.cmp(&other.0) + } +} + +// Used for bitmask bit order workaround +pub(crate) trait ReverseBits { + // Reverse the least significant `n` bits of `self`. + // (Remaining bits must be 0.) + fn reverse_bits(self, n: usize) -> Self; +} + +macro_rules! impl_reverse_bits { + { $($int:ty),* } => { + $( + impl ReverseBits for $int { + #[inline(always)] + fn reverse_bits(self, n: usize) -> Self { + let rev = <$int>::reverse_bits(self); + let bitsize = core::mem::size_of::<$int>() * 8; + if n < bitsize { + // Shift things back to the right + rev >> (bitsize - n) + } else { + rev + } + } + } + )* + } +} + +impl_reverse_bits! { u8, u16, u32, u64 } + +impl<T, const LANES: usize> Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn splat(value: bool) -> Self { + Self(Simd::splat(if value { T::TRUE } else { T::FALSE })) + } + + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub unsafe fn test_unchecked(&self, lane: usize) -> bool { + T::eq(self.0[lane], T::TRUE) + } + + #[inline] + pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { + self.0[lane] = if value { T::TRUE } else { T::FALSE } + } + + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + pub fn to_int(self) -> Simd<T, LANES> { + self.0 + } + + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self { + Self(value) + } + + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn convert<U>(self) -> Mask<U, LANES> + where + U: MaskElement, + { + // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type. + unsafe { Mask(intrinsics::simd_cast(self.0)) } + } + + #[cfg(feature = "generic_const_exprs")] + #[inline] + #[must_use = "method returns a new array and does not mutate the original value"] + pub fn to_bitmask_array<const N: usize>(self) -> [u8; N] + where + super::Mask<T, LANES>: ToBitMaskArray, + [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized, + { + assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N); + + // Safety: N is the correct bitmask size + unsafe { + // Compute the bitmask + let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] = + intrinsics::simd_bitmask(self.0); + + // Transmute to the return type, previously asserted to be the same size + let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask); + + // LLVM assumes bit order should match endianness + if cfg!(target_endian = "big") { + for x in bitmask.as_mut() { + *x = x.reverse_bits(); + } + }; + + bitmask + } + } + + #[cfg(feature = "generic_const_exprs")] + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn from_bitmask_array<const N: usize>(mut bitmask: [u8; N]) -> Self + where + super::Mask<T, LANES>: ToBitMaskArray, + [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized, + { + assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N); + + // Safety: N is the correct bitmask size + unsafe { + // LLVM assumes bit order should match endianness + if cfg!(target_endian = "big") { + for x in bitmask.as_mut() { + *x = x.reverse_bits(); + } + } + + // Transmute to the bitmask type, previously asserted to be the same size + let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] = + core::mem::transmute_copy(&bitmask); + + // Compute the regular mask + Self::from_int_unchecked(intrinsics::simd_select_bitmask( + bitmask, + Self::splat(true).to_int(), + Self::splat(false).to_int(), + )) + } + } + + #[inline] + pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U + where + super::Mask<T, LANES>: ToBitMask<BitMask = U>, + { + // Safety: U is required to be the appropriate bitmask type + let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) }; + + // LLVM assumes bit order should match endianness + if cfg!(target_endian = "big") { + bitmask.reverse_bits(LANES) + } else { + bitmask + } + } + + #[inline] + pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self + where + super::Mask<T, LANES>: ToBitMask<BitMask = U>, + { + // LLVM assumes bit order should match endianness + let bitmask = if cfg!(target_endian = "big") { + bitmask.reverse_bits(LANES) + } else { + bitmask + }; + + // Safety: U is required to be the appropriate bitmask type + unsafe { + Self::from_int_unchecked(intrinsics::simd_select_bitmask( + bitmask, + Self::splat(true).to_int(), + Self::splat(false).to_int(), + )) + } + } + + #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] + pub fn any(self) -> bool { + // Safety: use `self` as an integer vector + unsafe { intrinsics::simd_reduce_any(self.to_int()) } + } + + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + pub fn all(self) -> bool { + // Safety: use `self` as an integer vector + unsafe { intrinsics::simd_reduce_all(self.to_int()) } + } +} + +impl<T, const LANES: usize> core::convert::From<Mask<T, LANES>> for Simd<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn from(value: Mask<T, LANES>) -> Self { + value.0 + } +} + +impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitand(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector + unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) } + } +} + +impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitor(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector + unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) } + } +} + +impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn bitxor(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector + unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) } + } +} + +impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + type Output = Self; + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn not(self) -> Self::Output { + Self::splat(true) ^ self + } +} diff --git a/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs new file mode 100644 index 000000000..65d3ce9be --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs @@ -0,0 +1,93 @@ +use super::{mask_impl, Mask, MaskElement}; +use crate::simd::{LaneCount, SupportedLaneCount}; + +mod sealed { + pub trait Sealed {} +} +pub use sealed::Sealed; + +impl<T, const LANES: usize> Sealed for Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ +} + +/// Converts masks to and from integer bitmasks. +/// +/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB. +pub trait ToBitMask: Sealed { + /// The integer bitmask type. + type BitMask; + + /// Converts a mask to a bitmask. + fn to_bitmask(self) -> Self::BitMask; + + /// Converts a bitmask to a mask. + fn from_bitmask(bitmask: Self::BitMask) -> Self; +} + +/// Converts masks to and from byte array bitmasks. +/// +/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte. +#[cfg(feature = "generic_const_exprs")] +pub trait ToBitMaskArray: Sealed { + /// The length of the bitmask array. + const BYTES: usize; + + /// Converts a mask to a bitmask. + fn to_bitmask_array(self) -> [u8; Self::BYTES]; + + /// Converts a bitmask to a mask. + fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self; +} + +macro_rules! impl_integer_intrinsic { + { $(impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => { + $( + impl<T: MaskElement> ToBitMask for Mask<T, $lanes> { + type BitMask = $int; + + fn to_bitmask(self) -> $int { + self.0.to_bitmask_integer() + } + + fn from_bitmask(bitmask: $int) -> Self { + Self(mask_impl::Mask::from_bitmask_integer(bitmask)) + } + } + )* + } +} + +impl_integer_intrinsic! { + impl ToBitMask<BitMask=u8> for Mask<_, 1> + impl ToBitMask<BitMask=u8> for Mask<_, 2> + impl ToBitMask<BitMask=u8> for Mask<_, 4> + impl ToBitMask<BitMask=u8> for Mask<_, 8> + impl ToBitMask<BitMask=u16> for Mask<_, 16> + impl ToBitMask<BitMask=u32> for Mask<_, 32> + impl ToBitMask<BitMask=u64> for Mask<_, 64> +} + +/// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes. +#[cfg(feature = "generic_const_exprs")] +pub const fn bitmask_len(lanes: usize) -> usize { + (lanes + 7) / 8 +} + +#[cfg(feature = "generic_const_exprs")] +impl<T: MaskElement, const LANES: usize> ToBitMaskArray for Mask<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, +{ + const BYTES: usize = bitmask_len(LANES); + + fn to_bitmask_array(self) -> [u8; Self::BYTES] { + self.0.to_bitmask_array() + } + + fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self { + Mask(mask_impl::Mask::from_bitmask_array(bitmask)) + } +} diff --git a/library/portable-simd/crates/core_simd/src/mod.rs b/library/portable-simd/crates/core_simd/src/mod.rs new file mode 100644 index 000000000..b472aa3ab --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/mod.rs @@ -0,0 +1,32 @@ +#[macro_use] +mod swizzle; + +pub(crate) mod intrinsics; + +#[cfg(feature = "generic_const_exprs")] +mod to_bytes; + +mod elements; +mod eq; +mod fmt; +mod iter; +mod lane_count; +mod masks; +mod ops; +mod ord; +mod select; +mod vector; +mod vendor; + +#[doc = include_str!("core_simd_docs.md")] +pub mod simd { + pub(crate) use crate::core_simd::intrinsics; + + pub use crate::core_simd::elements::*; + pub use crate::core_simd::eq::*; + pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; + pub use crate::core_simd::masks::*; + pub use crate::core_simd::ord::*; + pub use crate::core_simd::swizzle::*; + pub use crate::core_simd::vector::*; +} diff --git a/library/portable-simd/crates/core_simd/src/ops.rs b/library/portable-simd/crates/core_simd/src/ops.rs new file mode 100644 index 000000000..5a077a469 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/ops.rs @@ -0,0 +1,254 @@ +use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; +use core::ops::{Add, Mul}; +use core::ops::{BitAnd, BitOr, BitXor}; +use core::ops::{Div, Rem, Sub}; +use core::ops::{Shl, Shr}; + +mod assign; +mod deref; +mod unary; + +impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES> +where + T: SimdElement, + LaneCount<LANES>: SupportedLaneCount, + I: core::slice::SliceIndex<[T]>, +{ + type Output = I::Output; + fn index(&self, index: I) -> &Self::Output { + &self.as_array()[index] + } +} + +impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES> +where + T: SimdElement, + LaneCount<LANES>: SupportedLaneCount, + I: core::slice::SliceIndex<[T]>, +{ + fn index_mut(&mut self, index: I) -> &mut Self::Output { + &mut self.as_mut_array()[index] + } +} + +macro_rules! unsafe_base { + ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { + // Safety: $lhs and $rhs are vectors + unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } + }; +} + +/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. +/// It handles performing a bitand in addition to calling the shift operator, so that the result +/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if rhs >= <Int>::BITS +/// At worst, this will maybe add another instruction and cycle, +/// at best, it may open up more optimization opportunities, +/// or simply be elided entirely, especially for SIMD ISAs which default to this. +/// +// FIXME: Consider implementing this in cg_llvm instead? +// cg_clif defaults to this, and scalar MIR shifts also default to wrapping +macro_rules! wrap_bitshift { + ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { + #[allow(clippy::suspicious_arithmetic_impl)] + // Safety: $lhs and the bitand result are vectors + unsafe { + $crate::simd::intrinsics::$simd_call( + $lhs, + $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), + ) + } + }; +} + +/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic. +/// It guards against LLVM's UB conditions for integer div or rem using masks and selects, +/// thus guaranteeing a Rust value returns instead. +/// +/// | | LLVM | Rust +/// | :--------------: | :--- | :---------- +/// | N {/,%} 0 | UB | panic!() +/// | <$int>::MIN / -1 | UB | <$int>::MIN +/// | <$int>::MIN % -1 | UB | 0 +/// +macro_rules! int_divrem_guard { + ( $lhs:ident, + $rhs:ident, + { const PANIC_ZERO: &'static str = $zero:literal; + $simd_call:ident + }, + $int:ident ) => { + if $rhs.simd_eq(Simd::splat(0 as _)).any() { + panic!($zero); + } else { + // Prevent otherwise-UB overflow on the MIN / -1 case. + let rhs = if <$int>::MIN != 0 { + // This should, at worst, optimize to a few branchless logical ops + // Ideally, this entire conditional should evaporate + // Fire LLVM and implement those manually if it doesn't get the hint + ($lhs.simd_eq(Simd::splat(<$int>::MIN)) + // type inference can break here, so cut an SInt to size + & $rhs.simd_eq(Simd::splat(-1i64 as _))) + .select(Simd::splat(1 as _), $rhs) + } else { + // Nice base case to make it easy to const-fold away the other branch. + $rhs + }; + // Safety: $lhs and rhs are vectors + unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) } + } + }; +} + +macro_rules! for_base_types { + ( T = ($($scalar:ident),*); + type Lhs = Simd<T, N>; + type Rhs = Simd<T, N>; + type Output = $out:ty; + + impl $op:ident::$call:ident { + $macro_impl:ident $inner:tt + }) => { + $( + impl<const N: usize> $op<Self> for Simd<$scalar, N> + where + $scalar: SimdElement, + LaneCount<N>: SupportedLaneCount, + { + type Output = $out; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: Self) -> Self::Output { + $macro_impl!(self, rhs, $inner, $scalar) + } + })* + } +} + +// A "TokenTree muncher": takes a set of scalar types `T = {};` +// type parameters for the ops it implements, `Op::fn` names, +// and a macro that expands into an expr, substituting in an intrinsic. +// It passes that to for_base_types, which expands an impl for the types, +// using the expanded expr in the function, and recurses with itself. +// +// tl;dr impls a set of ops::{Traits} for a set of types +macro_rules! for_base_ops { + ( + T = $types:tt; + type Lhs = Simd<T, N>; + type Rhs = Simd<T, N>; + type Output = $out:ident; + impl $op:ident::$call:ident + $inner:tt + $($rest:tt)* + ) => { + for_base_types! { + T = $types; + type Lhs = Simd<T, N>; + type Rhs = Simd<T, N>; + type Output = $out; + impl $op::$call + $inner + } + for_base_ops! { + T = $types; + type Lhs = Simd<T, N>; + type Rhs = Simd<T, N>; + type Output = $out; + $($rest)* + } + }; + ($($done:tt)*) => { + // Done. + } +} + +// Integers can always accept add, mul, sub, bitand, bitor, and bitxor. +// For all of these operations, simd_* intrinsics apply wrapping logic. +for_base_ops! { + T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); + type Lhs = Simd<T, N>; + type Rhs = Simd<T, N>; + type Output = Self; + + impl Add::add { + unsafe_base { simd_add } + } + + impl Mul::mul { + unsafe_base { simd_mul } + } + + impl Sub::sub { + unsafe_base { simd_sub } + } + + impl BitAnd::bitand { + unsafe_base { simd_and } + } + + impl BitOr::bitor { + unsafe_base { simd_or } + } + + impl BitXor::bitxor { + unsafe_base { simd_xor } + } + + impl Div::div { + int_divrem_guard { + const PANIC_ZERO: &'static str = "attempt to divide by zero"; + simd_div + } + } + + impl Rem::rem { + int_divrem_guard { + const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; + simd_rem + } + } + + // The only question is how to handle shifts >= <Int>::BITS? + // Our current solution uses wrapping logic. + impl Shl::shl { + wrap_bitshift { simd_shl } + } + + impl Shr::shr { + wrap_bitshift { + // This automatically monomorphizes to lshr or ashr, depending, + // so it's fine to use it for both UInts and SInts. + simd_shr + } + } +} + +// We don't need any special precautions here: +// Floats always accept arithmetic ops, but may become NaN. +for_base_ops! { + T = (f32, f64); + type Lhs = Simd<T, N>; + type Rhs = Simd<T, N>; + type Output = Self; + + impl Add::add { + unsafe_base { simd_add } + } + + impl Mul::mul { + unsafe_base { simd_mul } + } + + impl Sub::sub { + unsafe_base { simd_sub } + } + + impl Div::div { + unsafe_base { simd_div } + } + + impl Rem::rem { + unsafe_base { simd_rem } + } +} diff --git a/library/portable-simd/crates/core_simd/src/ops/assign.rs b/library/portable-simd/crates/core_simd/src/ops/assign.rs new file mode 100644 index 000000000..d2b48614f --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/ops/assign.rs @@ -0,0 +1,124 @@ +//! Assignment operators +use super::*; +use core::ops::{AddAssign, MulAssign}; // commutative binary op-assignment +use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign}; // commutative bit binary op-assignment +use core::ops::{DivAssign, RemAssign, SubAssign}; // non-commutative binary op-assignment +use core::ops::{ShlAssign, ShrAssign}; // non-commutative bit binary op-assignment + +// Arithmetic + +macro_rules! assign_ops { + ($(impl<T, U, const LANES: usize> $assignTrait:ident<U> for Simd<T, LANES> + where + Self: $trait:ident, + { + fn $assign_call:ident(rhs: U) { + $call:ident + } + })*) => { + $(impl<T, U, const LANES: usize> $assignTrait<U> for Simd<T, LANES> + where + Self: $trait<U, Output = Self>, + T: SimdElement, + LaneCount<LANES>: SupportedLaneCount, + { + #[inline] + fn $assign_call(&mut self, rhs: U) { + *self = self.$call(rhs); + } + })* + } +} + +assign_ops! { + // Arithmetic + impl<T, U, const LANES: usize> AddAssign<U> for Simd<T, LANES> + where + Self: Add, + { + fn add_assign(rhs: U) { + add + } + } + + impl<T, U, const LANES: usize> MulAssign<U> for Simd<T, LANES> + where + Self: Mul, + { + fn mul_assign(rhs: U) { + mul + } + } + + impl<T, U, const LANES: usize> SubAssign<U> for Simd<T, LANES> + where + Self: Sub, + { + fn sub_assign(rhs: U) { + sub + } + } + + impl<T, U, const LANES: usize> DivAssign<U> for Simd<T, LANES> + where + Self: Div, + { + fn div_assign(rhs: U) { + div + } + } + impl<T, U, const LANES: usize> RemAssign<U> for Simd<T, LANES> + where + Self: Rem, + { + fn rem_assign(rhs: U) { + rem + } + } + + // Bitops + impl<T, U, const LANES: usize> BitAndAssign<U> for Simd<T, LANES> + where + Self: BitAnd, + { + fn bitand_assign(rhs: U) { + bitand + } + } + + impl<T, U, const LANES: usize> BitOrAssign<U> for Simd<T, LANES> + where + Self: BitOr, + { + fn bitor_assign(rhs: U) { + bitor + } + } + + impl<T, U, const LANES: usize> BitXorAssign<U> for Simd<T, LANES> + where + Self: BitXor, + { + fn bitxor_assign(rhs: U) { + bitxor + } + } + + impl<T, U, const LANES: usize> ShlAssign<U> for Simd<T, LANES> + where + Self: Shl, + { + fn shl_assign(rhs: U) { + shl + } + } + + impl<T, U, const LANES: usize> ShrAssign<U> for Simd<T, LANES> + where + Self: Shr, + { + fn shr_assign(rhs: U) { + shr + } + } +} diff --git a/library/portable-simd/crates/core_simd/src/ops/deref.rs b/library/portable-simd/crates/core_simd/src/ops/deref.rs new file mode 100644 index 000000000..9883a74c9 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/ops/deref.rs @@ -0,0 +1,124 @@ +//! This module hacks in "implicit deref" for Simd's operators. +//! Ideally, Rust would take care of this itself, +//! and method calls usually handle the LHS implicitly. +//! But this is not the case with arithmetic ops. +use super::*; + +macro_rules! deref_lhs { + (impl<T, const LANES: usize> $trait:ident for $simd:ty { + fn $call:ident + }) => { + impl<T, const LANES: usize> $trait<$simd> for &$simd + where + T: SimdElement, + $simd: $trait<$simd, Output = $simd>, + LaneCount<LANES>: SupportedLaneCount, + { + type Output = Simd<T, LANES>; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: $simd) -> Self::Output { + (*self).$call(rhs) + } + } + }; +} + +macro_rules! deref_rhs { + (impl<T, const LANES: usize> $trait:ident for $simd:ty { + fn $call:ident + }) => { + impl<T, const LANES: usize> $trait<&$simd> for $simd + where + T: SimdElement, + $simd: $trait<$simd, Output = $simd>, + LaneCount<LANES>: SupportedLaneCount, + { + type Output = Simd<T, LANES>; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: &$simd) -> Self::Output { + self.$call(*rhs) + } + } + }; +} + +macro_rules! deref_ops { + ($(impl<T, const LANES: usize> $trait:ident for $simd:ty { + fn $call:ident + })*) => { + $( + deref_rhs! { + impl<T, const LANES: usize> $trait for $simd { + fn $call + } + } + deref_lhs! { + impl<T, const LANES: usize> $trait for $simd { + fn $call + } + } + impl<'lhs, 'rhs, T, const LANES: usize> $trait<&'rhs $simd> for &'lhs $simd + where + T: SimdElement, + $simd: $trait<$simd, Output = $simd>, + LaneCount<LANES>: SupportedLaneCount, + { + type Output = $simd; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: &$simd) -> Self::Output { + (*self).$call(*rhs) + } + } + )* + } +} + +deref_ops! { + // Arithmetic + impl<T, const LANES: usize> Add for Simd<T, LANES> { + fn add + } + + impl<T, const LANES: usize> Mul for Simd<T, LANES> { + fn mul + } + + impl<T, const LANES: usize> Sub for Simd<T, LANES> { + fn sub + } + + impl<T, const LANES: usize> Div for Simd<T, LANES> { + fn div + } + + impl<T, const LANES: usize> Rem for Simd<T, LANES> { + fn rem + } + + // Bitops + impl<T, const LANES: usize> BitAnd for Simd<T, LANES> { + fn bitand + } + + impl<T, const LANES: usize> BitOr for Simd<T, LANES> { + fn bitor + } + + impl<T, const LANES: usize> BitXor for Simd<T, LANES> { + fn bitxor + } + + impl<T, const LANES: usize> Shl for Simd<T, LANES> { + fn shl + } + + impl<T, const LANES: usize> Shr for Simd<T, LANES> { + fn shr + } +} diff --git a/library/portable-simd/crates/core_simd/src/ops/unary.rs b/library/portable-simd/crates/core_simd/src/ops/unary.rs new file mode 100644 index 000000000..4ad022150 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/ops/unary.rs @@ -0,0 +1,78 @@ +use crate::simd::intrinsics; +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use core::ops::{Neg, Not}; // unary ops + +macro_rules! neg { + ($(impl<const LANES: usize> Neg for Simd<$scalar:ty, LANES>)*) => { + $(impl<const LANES: usize> Neg for Simd<$scalar, LANES> + where + $scalar: SimdElement, + LaneCount<LANES>: SupportedLaneCount, + { + type Output = Self; + + #[inline] + #[must_use = "operator returns a new vector without mutating the input"] + fn neg(self) -> Self::Output { + // Safety: `self` is a signed vector + unsafe { intrinsics::simd_neg(self) } + } + })* + } +} + +neg! { + impl<const LANES: usize> Neg for Simd<f32, LANES> + + impl<const LANES: usize> Neg for Simd<f64, LANES> + + impl<const LANES: usize> Neg for Simd<i8, LANES> + + impl<const LANES: usize> Neg for Simd<i16, LANES> + + impl<const LANES: usize> Neg for Simd<i32, LANES> + + impl<const LANES: usize> Neg for Simd<i64, LANES> + + impl<const LANES: usize> Neg for Simd<isize, LANES> +} + +macro_rules! not { + ($(impl<const LANES: usize> Not for Simd<$scalar:ty, LANES>)*) => { + $(impl<const LANES: usize> Not for Simd<$scalar, LANES> + where + $scalar: SimdElement, + LaneCount<LANES>: SupportedLaneCount, + { + type Output = Self; + + #[inline] + #[must_use = "operator returns a new vector without mutating the input"] + fn not(self) -> Self::Output { + self ^ (Simd::splat(!(0 as $scalar))) + } + })* + } +} + +not! { + impl<const LANES: usize> Not for Simd<i8, LANES> + + impl<const LANES: usize> Not for Simd<i16, LANES> + + impl<const LANES: usize> Not for Simd<i32, LANES> + + impl<const LANES: usize> Not for Simd<i64, LANES> + + impl<const LANES: usize> Not for Simd<isize, LANES> + + impl<const LANES: usize> Not for Simd<u8, LANES> + + impl<const LANES: usize> Not for Simd<u16, LANES> + + impl<const LANES: usize> Not for Simd<u32, LANES> + + impl<const LANES: usize> Not for Simd<u64, LANES> + + impl<const LANES: usize> Not for Simd<usize, LANES> +} diff --git a/library/portable-simd/crates/core_simd/src/ord.rs b/library/portable-simd/crates/core_simd/src/ord.rs new file mode 100644 index 000000000..9a87bc2e3 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/ord.rs @@ -0,0 +1,213 @@ +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; + +/// Parallel `PartialOrd`. +pub trait SimdPartialOrd: SimdPartialEq { + /// Test if each lane is less than the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_lt(self, other: Self) -> Self::Mask; + + /// Test if each lane is less than or equal to the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_le(self, other: Self) -> Self::Mask; + + /// Test if each lane is greater than the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_gt(self, other: Self) -> Self::Mask; + + /// Test if each lane is greater than or equal to the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_ge(self, other: Self) -> Self::Mask; +} + +/// Parallel `Ord`. +pub trait SimdOrd: SimdPartialOrd { + /// Returns the lane-wise maximum with `other`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_max(self, other: Self) -> Self; + + /// Returns the lane-wise minimum with `other`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_min(self, other: Self) -> Self; + + /// Restrict each lane to a certain interval. + /// + /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is + /// less than `min`. Otherwise returns `self`. + /// + /// # Panics + /// + /// Panics if `min > max` on any lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_clamp(self, min: Self, max: Self) -> Self; +} + +macro_rules! impl_integer { + { $($integer:ty),* } => { + $( + impl<const LANES: usize> SimdPartialOrd for Simd<$integer, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + #[inline] + fn simd_lt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + } + + #[inline] + fn simd_le(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + } + + #[inline] + fn simd_gt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + } + + #[inline] + fn simd_ge(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + } + } + + impl<const LANES: usize> SimdOrd for Simd<$integer, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + #[inline] + fn simd_max(self, other: Self) -> Self { + self.simd_lt(other).select(other, self) + } + + #[inline] + fn simd_min(self, other: Self) -> Self { + self.simd_gt(other).select(other, self) + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + self.simd_max(min).simd_min(max) + } + } + )* + } +} + +impl_integer! { u8, u16, u32, u64, usize, i8, i16, i32, i64, isize } + +macro_rules! impl_float { + { $($float:ty),* } => { + $( + impl<const LANES: usize> SimdPartialOrd for Simd<$float, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + #[inline] + fn simd_lt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + } + + #[inline] + fn simd_le(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + } + + #[inline] + fn simd_gt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + } + + #[inline] + fn simd_ge(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + } + } + )* + } +} + +impl_float! { f32, f64 } + +macro_rules! impl_mask { + { $($integer:ty),* } => { + $( + impl<const LANES: usize> SimdPartialOrd for Mask<$integer, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + #[inline] + fn simd_lt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_lt(self.to_int(), other.to_int())) } + } + + #[inline] + fn simd_le(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_le(self.to_int(), other.to_int())) } + } + + #[inline] + fn simd_gt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_gt(self.to_int(), other.to_int())) } + } + + #[inline] + fn simd_ge(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_ge(self.to_int(), other.to_int())) } + } + } + + impl<const LANES: usize> SimdOrd for Mask<$integer, LANES> + where + LaneCount<LANES>: SupportedLaneCount, + { + #[inline] + fn simd_max(self, other: Self) -> Self { + self.simd_gt(other).select_mask(other, self) + } + + #[inline] + fn simd_min(self, other: Self) -> Self { + self.simd_lt(other).select_mask(other, self) + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + self.simd_max(min).simd_min(max) + } + } + )* + } +} + +impl_mask! { i8, i16, i32, i64, isize } diff --git a/library/portable-simd/crates/core_simd/src/select.rs b/library/portable-simd/crates/core_simd/src/select.rs new file mode 100644 index 000000000..065c5987d --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/select.rs @@ -0,0 +1,59 @@ +use crate::simd::intrinsics; +use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; + +impl<T, const LANES: usize> Mask<T, LANES> +where + T: MaskElement, + LaneCount<LANES>: SupportedLaneCount, +{ + /// Choose lanes from two vectors. + /// + /// For each lane in the mask, choose the corresponding lane from `true_values` if + /// that lane mask is true, and `false_values` if that lane mask is false. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::{Simd, Mask}; + /// let a = Simd::from_array([0, 1, 2, 3]); + /// let b = Simd::from_array([4, 5, 6, 7]); + /// let mask = Mask::from_array([true, false, false, true]); + /// let c = mask.select(a, b); + /// assert_eq!(c.to_array(), [0, 5, 6, 3]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn select<U>( + self, + true_values: Simd<U, LANES>, + false_values: Simd<U, LANES>, + ) -> Simd<U, LANES> + where + U: SimdElement<Mask = T>, + { + // Safety: The mask has been cast to a vector of integers, + // and the operands to select between are vectors of the same type and length. + unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) } + } + + /// Choose lanes from two masks. + /// + /// For each lane in the mask, choose the corresponding lane from `true_values` if + /// that lane mask is true, and `false_values` if that lane mask is false. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Mask; + /// let a = Mask::<i32, 4>::from_array([true, true, false, false]); + /// let b = Mask::<i32, 4>::from_array([false, false, true, true]); + /// let mask = Mask::<i32, 4>::from_array([true, false, false, true]); + /// let c = mask.select_mask(a, b); + /// assert_eq!(c.to_array(), [true, false, true, false]); + /// ``` + #[inline] + #[must_use = "method returns a new mask and does not mutate the original inputs"] + pub fn select_mask(self, true_values: Self, false_values: Self) -> Self { + self & true_values | !self & false_values + } +} diff --git a/library/portable-simd/crates/core_simd/src/swizzle.rs b/library/portable-simd/crates/core_simd/src/swizzle.rs new file mode 100644 index 000000000..22999d249 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/swizzle.rs @@ -0,0 +1,385 @@ +use crate::simd::intrinsics; +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; + +/// Constructs a new SIMD vector by copying elements from selected lanes in other vectors. +/// +/// When swizzling one vector, lanes are selected by a `const` array of `usize`, +/// like [`Swizzle`]. +/// +/// When swizzling two vectors, lanes are selected by a `const` array of [`Which`], +/// like [`Swizzle2`]. +/// +/// # Examples +/// +/// With a single SIMD vector, the const array specifies lane indices in that vector: +/// ``` +/// # #![feature(portable_simd)] +/// # use core::simd::{u32x2, u32x4, simd_swizzle}; +/// let v = u32x4::from_array([10, 11, 12, 13]); +/// +/// // Keeping the same size +/// let r: u32x4 = simd_swizzle!(v, [3, 0, 1, 2]); +/// assert_eq!(r.to_array(), [13, 10, 11, 12]); +/// +/// // Changing the number of lanes +/// let r: u32x2 = simd_swizzle!(v, [3, 1]); +/// assert_eq!(r.to_array(), [13, 11]); +/// ``` +/// +/// With two input SIMD vectors, the const array uses `Which` to specify the source of each index: +/// ``` +/// # #![feature(portable_simd)] +/// # use core::simd::{u32x2, u32x4, simd_swizzle, Which}; +/// use Which::{First, Second}; +/// let a = u32x4::from_array([0, 1, 2, 3]); +/// let b = u32x4::from_array([4, 5, 6, 7]); +/// +/// // Keeping the same size +/// let r: u32x4 = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]); +/// assert_eq!(r.to_array(), [0, 1, 6, 7]); +/// +/// // Changing the number of lanes +/// let r: u32x2 = simd_swizzle!(a, b, [First(0), Second(0)]); +/// assert_eq!(r.to_array(), [0, 4]); +/// ``` +#[allow(unused_macros)] +pub macro simd_swizzle { + ( + $vector:expr, $index:expr $(,)? + ) => { + { + use $crate::simd::Swizzle; + struct Impl; + impl<const LANES: usize> Swizzle<LANES, {$index.len()}> for Impl { + const INDEX: [usize; {$index.len()}] = $index; + } + Impl::swizzle($vector) + } + }, + ( + $first:expr, $second:expr, $index:expr $(,)? + ) => { + { + use $crate::simd::{Which, Swizzle2}; + struct Impl; + impl<const LANES: usize> Swizzle2<LANES, {$index.len()}> for Impl { + const INDEX: [Which; {$index.len()}] = $index; + } + Impl::swizzle2($first, $second) + } + } +} + +/// Specifies a lane index into one of two SIMD vectors. +/// +/// This is an input type for [Swizzle2] and helper macros like [simd_swizzle]. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Which { + /// Index of a lane in the first input SIMD vector. + First(usize), + /// Index of a lane in the second input SIMD vector. + Second(usize), +} + +/// Create a vector from the elements of another vector. +pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> { + /// Map from the lanes of the input vector to the output vector. + const INDEX: [usize; OUTPUT_LANES]; + + /// Create a new vector from the lanes of `vector`. + /// + /// Lane `i` of the output is `vector[Self::INDEX[i]]`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES> + where + T: SimdElement, + LaneCount<INPUT_LANES>: SupportedLaneCount, + LaneCount<OUTPUT_LANES>: SupportedLaneCount, + { + // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32. + unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) } + } +} + +/// Create a vector from the elements of two other vectors. +pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> { + /// Map from the lanes of the input vectors to the output vector + const INDEX: [Which; OUTPUT_LANES]; + + /// Create a new vector from the lanes of `first` and `second`. + /// + /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is + /// `Second(j)`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + fn swizzle2<T>( + first: Simd<T, INPUT_LANES>, + second: Simd<T, INPUT_LANES>, + ) -> Simd<T, OUTPUT_LANES> + where + T: SimdElement, + LaneCount<INPUT_LANES>: SupportedLaneCount, + LaneCount<OUTPUT_LANES>: SupportedLaneCount, + { + // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32. + unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) } + } +} + +/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here. +/// This trait hides `INDEX_IMPL` from the public API. +trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> { + const INDEX_IMPL: [u32; OUTPUT_LANES]; +} + +impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES> + for T +where + T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized, +{ + const INDEX_IMPL: [u32; OUTPUT_LANES] = { + let mut output = [0; OUTPUT_LANES]; + let mut i = 0; + while i < OUTPUT_LANES { + let index = Self::INDEX[i]; + assert!(index as u32 as usize == index); + assert!(index < INPUT_LANES, "source lane exceeds input lane count",); + output[i] = index as u32; + i += 1; + } + output + }; +} + +/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here. +/// This trait hides `INDEX_IMPL` from the public API. +trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> { + const INDEX_IMPL: [u32; OUTPUT_LANES]; +} + +impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES> + for T +where + T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized, +{ + const INDEX_IMPL: [u32; OUTPUT_LANES] = { + let mut output = [0; OUTPUT_LANES]; + let mut i = 0; + while i < OUTPUT_LANES { + let (offset, index) = match Self::INDEX[i] { + Which::First(index) => (false, index), + Which::Second(index) => (true, index), + }; + assert!(index < INPUT_LANES, "source lane exceeds input lane count",); + + // lanes are indexed by the first vector, then second vector + let index = if offset { index + INPUT_LANES } else { index }; + assert!(index as u32 as usize == index); + output[i] = index as u32; + i += 1; + } + output + }; +} + +impl<T, const LANES: usize> Simd<T, LANES> +where + T: SimdElement, + LaneCount<LANES>: SupportedLaneCount, +{ + /// Reverse the order of the lanes in the vector. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn reverse(self) -> Self { + const fn reverse_index<const LANES: usize>() -> [usize; LANES] { + let mut index = [0; LANES]; + let mut i = 0; + while i < LANES { + index[i] = LANES - i - 1; + i += 1; + } + index + } + + struct Reverse; + + impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse { + const INDEX: [usize; LANES] = reverse_index::<LANES>(); + } + + Reverse::swizzle(self) + } + + /// Rotates the vector such that the first `OFFSET` elements of the slice move to the end + /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_lanes_left`, + /// the element previously in lane `OFFSET` will become the first element in the slice. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn rotate_lanes_left<const OFFSET: usize>(self) -> Self { + const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] { + let offset = OFFSET % LANES; + let mut index = [0; LANES]; + let mut i = 0; + while i < LANES { + index[i] = (i + offset) % LANES; + i += 1; + } + index + } + + struct Rotate<const OFFSET: usize>; + + impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> { + const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>(); + } + + Rotate::<OFFSET>::swizzle(self) + } + + /// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to + /// the end while the last `OFFSET` elements move to the front. After calling `rotate_lanes_right`, + /// the element previously at index `LANES - OFFSET` will become the first element in the slice. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn rotate_lanes_right<const OFFSET: usize>(self) -> Self { + const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] { + let offset = LANES - OFFSET % LANES; + let mut index = [0; LANES]; + let mut i = 0; + while i < LANES { + index[i] = (i + offset) % LANES; + i += 1; + } + index + } + + struct Rotate<const OFFSET: usize>; + + impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> { + const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>(); + } + + Rotate::<OFFSET>::swizzle(self) + } + + /// Interleave two vectors. + /// + /// Produces two vectors with lanes taken alternately from `self` and `other`. + /// + /// The first result contains the first `LANES / 2` lanes from `self` and `other`, + /// alternating, starting with the first lane of `self`. + /// + /// The second result contains the last `LANES / 2` lanes from `self` and `other`, + /// alternating, starting with the lane `LANES / 2` from the start of `self`. + /// + /// ``` + /// #![feature(portable_simd)] + /// # use core::simd::Simd; + /// let a = Simd::from_array([0, 1, 2, 3]); + /// let b = Simd::from_array([4, 5, 6, 7]); + /// let (x, y) = a.interleave(b); + /// assert_eq!(x.to_array(), [0, 4, 1, 5]); + /// assert_eq!(y.to_array(), [2, 6, 3, 7]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn interleave(self, other: Self) -> (Self, Self) { + const fn lo<const LANES: usize>() -> [Which; LANES] { + let mut idx = [Which::First(0); LANES]; + let mut i = 0; + while i < LANES { + let offset = i / 2; + idx[i] = if i % 2 == 0 { + Which::First(offset) + } else { + Which::Second(offset) + }; + i += 1; + } + idx + } + const fn hi<const LANES: usize>() -> [Which; LANES] { + let mut idx = [Which::First(0); LANES]; + let mut i = 0; + while i < LANES { + let offset = (LANES + i) / 2; + idx[i] = if i % 2 == 0 { + Which::First(offset) + } else { + Which::Second(offset) + }; + i += 1; + } + idx + } + + struct Lo; + struct Hi; + + impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo { + const INDEX: [Which; LANES] = lo::<LANES>(); + } + + impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi { + const INDEX: [Which; LANES] = hi::<LANES>(); + } + + (Lo::swizzle2(self, other), Hi::swizzle2(self, other)) + } + + /// Deinterleave two vectors. + /// + /// The first result takes every other lane of `self` and then `other`, starting with + /// the first lane. + /// + /// The second result takes every other lane of `self` and then `other`, starting with + /// the second lane. + /// + /// ``` + /// #![feature(portable_simd)] + /// # use core::simd::Simd; + /// let a = Simd::from_array([0, 4, 1, 5]); + /// let b = Simd::from_array([2, 6, 3, 7]); + /// let (x, y) = a.deinterleave(b); + /// assert_eq!(x.to_array(), [0, 1, 2, 3]); + /// assert_eq!(y.to_array(), [4, 5, 6, 7]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn deinterleave(self, other: Self) -> (Self, Self) { + const fn even<const LANES: usize>() -> [Which; LANES] { + let mut idx = [Which::First(0); LANES]; + let mut i = 0; + while i < LANES / 2 { + idx[i] = Which::First(2 * i); + idx[i + LANES / 2] = Which::Second(2 * i); + i += 1; + } + idx + } + const fn odd<const LANES: usize>() -> [Which; LANES] { + let mut idx = [Which::First(0); LANES]; + let mut i = 0; + while i < LANES / 2 { + idx[i] = Which::First(2 * i + 1); + idx[i + LANES / 2] = Which::Second(2 * i + 1); + i += 1; + } + idx + } + + struct Even; + struct Odd; + + impl<const LANES: usize> Swizzle2<LANES, LANES> for Even { + const INDEX: [Which; LANES] = even::<LANES>(); + } + + impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd { + const INDEX: [Which; LANES] = odd::<LANES>(); + } + + (Even::swizzle2(self, other), Odd::swizzle2(self, other)) + } +} diff --git a/library/portable-simd/crates/core_simd/src/to_bytes.rs b/library/portable-simd/crates/core_simd/src/to_bytes.rs new file mode 100644 index 000000000..b36b1a347 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs @@ -0,0 +1,41 @@ +macro_rules! impl_to_bytes { + { $ty:ty, $size:literal } => { + impl<const LANES: usize> crate::simd::Simd<$ty, LANES> + where + crate::simd::LaneCount<LANES>: crate::simd::SupportedLaneCount, + crate::simd::LaneCount<{{ $size * LANES }}>: crate::simd::SupportedLaneCount, + { + /// Return the memory representation of this integer as a byte array in native byte + /// order. + pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> { + // Safety: transmuting between vectors is safe + unsafe { core::mem::transmute_copy(&self) } + } + + /// Create a native endian integer value from its memory representation as a byte array + /// in native endianness. + pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self { + // Safety: transmuting between vectors is safe + unsafe { core::mem::transmute_copy(&bytes) } + } + } + } +} + +impl_to_bytes! { u8, 1 } +impl_to_bytes! { u16, 2 } +impl_to_bytes! { u32, 4 } +impl_to_bytes! { u64, 8 } +#[cfg(target_pointer_width = "32")] +impl_to_bytes! { usize, 4 } +#[cfg(target_pointer_width = "64")] +impl_to_bytes! { usize, 8 } + +impl_to_bytes! { i8, 1 } +impl_to_bytes! { i16, 2 } +impl_to_bytes! { i32, 4 } +impl_to_bytes! { i64, 8 } +#[cfg(target_pointer_width = "32")] +impl_to_bytes! { isize, 4 } +#[cfg(target_pointer_width = "64")] +impl_to_bytes! { isize, 8 } diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs new file mode 100644 index 000000000..78f56402e --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vector.rs @@ -0,0 +1,742 @@ +mod float; +mod int; +mod uint; + +pub use float::*; +pub use int::*; +pub use uint::*; + +// Vectors of pointers are not for public use at the current time. +pub(crate) mod ptr; + +use crate::simd::{ + intrinsics, LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount, Swizzle, +}; + +/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`. +/// +/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does. +/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation, +/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping +/// the two arrays together and mapping the operator over each lane. +/// +/// ```rust +/// # #![feature(array_zip, portable_simd)] +/// # use core::simd::{Simd}; +/// let a0: [i32; 4] = [-2, 0, 2, 4]; +/// let a1 = [10, 9, 8, 7]; +/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs); +/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs); +/// +/// // `Simd<T, N>` implements `From<[T; N]> +/// let (v0, v1) = (Simd::from(a0), Simd::from(a1)); +/// // Which means arrays implement `Into<Simd<T, N>>`. +/// assert_eq!(v0 + v1, zm_add.into()); +/// assert_eq!(v0 * v1, zm_mul.into()); +/// ``` +/// +/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`]. +/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior. +/// This means there is no warning on overflows, even in "debug" builds. +/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap, +/// and even "debug builds" are unlikely to tolerate the loss of performance. +/// You may want to consider using explicitly checked arithmetic if such is required. +/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable. +/// +/// [`Wrapping<T>`]: core::num::Wrapping +/// +/// # Layout +/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment. +/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`. +/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost, +/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide. +/// +/// # ABI "Features" +/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers, +/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it. +/// The need for this may be corrected in the future. +/// +/// # Safe SIMD with Unsafe Rust +/// +/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code. +/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from. +/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations, +/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`. +/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with +/// [`read_unaligned`] and [`write_unaligned`]. This is because: +/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment) +/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T` +/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior** +/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization +/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime +/// +/// By imposing less obligations, unaligned functions are less likely to make the program unsound, +/// and may be just as fast as stricter alternatives. +/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`, +/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail. +/// If these are not sufficient, then it is most ideal to design data structures to be already aligned +/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write. +/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first, +/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`]. +/// +/// [`transmute`]: core::mem::transmute +/// [raw pointers]: pointer +/// [`read_unaligned`]: pointer::read_unaligned +/// [`write_unaligned`]: pointer::write_unaligned +/// [`read`]: pointer::read +/// [`write`]: pointer::write +/// [as_simd]: slice::as_simd +#[repr(simd)] +pub struct Simd<T, const LANES: usize>([T; LANES]) +where + T: SimdElement, + LaneCount<LANES>: SupportedLaneCount; + +impl<T, const LANES: usize> Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement, +{ + /// Number of lanes in this vector. + pub const LANES: usize = LANES; + + /// Returns the number of lanes in this SIMD vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::u32x4; + /// let v = u32x4::splat(0); + /// assert_eq!(v.lanes(), 4); + /// ``` + pub const fn lanes(&self) -> usize { + LANES + } + + /// Constructs a new SIMD vector with all lanes set to the given value. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::u32x4; + /// let v = u32x4::splat(8); + /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); + /// ``` + pub fn splat(value: T) -> Self { + // This is preferred over `[value; LANES]`, since it's explicitly a splat: + // https://github.com/rust-lang/rust/issues/97804 + struct Splat; + impl<const LANES: usize> Swizzle<1, LANES> for Splat { + const INDEX: [usize; LANES] = [0; LANES]; + } + Splat::swizzle(Simd::<T, 1>::from([value])) + } + + /// Returns an array reference containing the entire SIMD vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::{Simd, u64x4}; + /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]); + /// assert_eq!(v.as_array(), &[0, 1, 2, 3]); + /// ``` + pub const fn as_array(&self) -> &[T; LANES] { + &self.0 + } + + /// Returns a mutable array reference containing the entire SIMD vector. + pub fn as_mut_array(&mut self) -> &mut [T; LANES] { + &mut self.0 + } + + /// Converts an array to a SIMD vector. + pub const fn from_array(array: [T; LANES]) -> Self { + Self(array) + } + + /// Converts a SIMD vector to an array. + pub const fn to_array(self) -> [T; LANES] { + self.0 + } + + /// Converts a slice to a SIMD vector containing `slice[..LANES]`. + /// + /// # Panics + /// + /// Panics if the slice's length is less than the vector's `Simd::LANES`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::u32x4; + /// let source = vec![1, 2, 3, 4, 5, 6]; + /// let v = u32x4::from_slice(&source); + /// assert_eq!(v.as_array(), &[1, 2, 3, 4]); + /// ``` + #[must_use] + pub const fn from_slice(slice: &[T]) -> Self { + assert!(slice.len() >= LANES, "slice length must be at least the number of lanes"); + let mut array = [slice[0]; LANES]; + let mut i = 0; + while i < LANES { + array[i] = slice[i]; + i += 1; + } + Self(array) + } + + /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type. + /// + /// This follows the semantics of Rust's `as` conversion for casting + /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`), + /// and from floats to integers (truncating, or saturating at the limits) for each lane, + /// or vice versa. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]); + /// let ints = floats.cast::<i32>(); + /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0])); + /// + /// // Formally equivalent, but `Simd::cast` can optimize better. + /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32))); + /// + /// // The float conversion does not round-trip. + /// let floats_again = ints.cast(); + /// assert_ne!(floats, floats_again); + /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0])); + /// ``` + #[must_use] + #[inline] + pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> { + // Safety: The input argument is a vector of a valid SIMD element type. + unsafe { intrinsics::simd_as(self) } + } + + /// Rounds toward zero and converts to the same-width integer type, assuming that + /// the value is finite and fits in that type. + /// + /// # Safety + /// The value must: + /// + /// * Not be NaN + /// * Not be infinite + /// * Be representable in the return type, after truncating off its fractional part + /// + /// If these requirements are infeasible or costly, consider using the safe function [cast], + /// which saturates on conversion. + /// + /// [cast]: Simd::cast + #[inline] + pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES> + where + T: core::convert::FloatToInt<I>, + I: SimdElement, + { + // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to + // an integer. + unsafe { intrinsics::simd_cast(self) } + } + + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. + /// If an index is out-of-bounds, the lane is instead selected from the `or` vector. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = Simd::from_array([9, 3, 0, 5]); + /// let alt = Simd::from_array([-5, -4, -3, -2]); + /// + /// let result = Simd::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds. + /// assert_eq!(result, Simd::from_array([-5, 13, 10, 15])); + /// ``` + #[must_use] + #[inline] + pub fn gather_or(slice: &[T], idxs: Simd<usize, LANES>, or: Self) -> Self { + Self::gather_select(slice, Mask::splat(true), idxs, or) + } + + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. + /// If an index is out-of-bounds, the lane is set to the default value for the type. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = Simd::from_array([9, 3, 0, 5]); + /// + /// let result = Simd::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds. + /// assert_eq!(result, Simd::from_array([0, 13, 10, 15])); + /// ``` + #[must_use] + #[inline] + pub fn gather_or_default(slice: &[T], idxs: Simd<usize, LANES>) -> Self + where + T: Default, + { + Self::gather_or(slice, idxs, Self::splat(T::default())) + } + + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. + /// The mask `enable`s all `true` lanes and disables all `false` lanes. + /// If an index is disabled or is out-of-bounds, the lane is selected from the `or` vector. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::{Simd, Mask}; + /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = Simd::from_array([9, 3, 0, 5]); + /// let alt = Simd::from_array([-5, -4, -3, -2]); + /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane. + /// + /// let result = Simd::gather_select(&vec, enable, idxs, alt); // Note the lane that is out-of-bounds. + /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2])); + /// ``` + #[must_use] + #[inline] + pub fn gather_select( + slice: &[T], + enable: Mask<isize, LANES>, + idxs: Simd<usize, LANES>, + or: Self, + ) -> Self { + let enable: Mask<isize, LANES> = enable & idxs.simd_lt(Simd::splat(slice.len())); + // Safety: We have masked-off out-of-bounds lanes. + unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) } + } + + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. + /// The mask `enable`s all `true` lanes and disables all `false` lanes. + /// If an index is disabled, the lane is selected from the `or` vector. + /// + /// # Safety + /// + /// Calling this function with an `enable`d out-of-bounds index is *[undefined behavior]* + /// even if the resulting value is not used. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdPartialOrd, Mask}; + /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = Simd::from_array([9, 3, 0, 5]); + /// let alt = Simd::from_array([-5, -4, -3, -2]); + /// let enable = Mask::from_array([true, true, true, false]); // Note the final mask lane. + /// // If this mask was used to gather, it would be unsound. Let's fix that. + /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); + /// + /// // We have masked the OOB lane, so it's safe to gather now. + /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) }; + /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2])); + /// ``` + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[must_use] + #[inline] + pub unsafe fn gather_select_unchecked( + slice: &[T], + enable: Mask<isize, LANES>, + idxs: Simd<usize, LANES>, + or: Self, + ) -> Self { + let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr()); + // Ferris forgive me, I have done pointer arithmetic here. + let ptrs = base_ptr.wrapping_add(idxs); + // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah + unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) } + } + + /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. + /// If two lanes in the scattered vector would write to the same index + /// only the last lane is guaranteed to actually be written. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = Simd::from_array([9, 3, 0, 0]); + /// let vals = Simd::from_array([-27, 82, -41, 124]); + /// + /// vals.scatter(&mut vec, idxs); // index 0 receives two writes. + /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]); + /// ``` + #[inline] + pub fn scatter(self, slice: &mut [T], idxs: Simd<usize, LANES>) { + self.scatter_select(slice, Mask::splat(true), idxs) + } + + /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`. + /// The mask `enable`s all `true` lanes and disables all `false` lanes. + /// If an enabled index is out-of-bounds, the lane is not written. + /// If two enabled lanes in the scattered vector would write to the same index, + /// only the last lane is guaranteed to actually be written. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = Simd::from_array([9, 3, 0, 0]); + /// let vals = Simd::from_array([-27, 82, -41, 124]); + /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane. + /// + /// vals.scatter_select(&mut vec, enable, idxs); // index 0's second write is masked, thus omitted. + /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); + /// ``` + #[inline] + pub fn scatter_select( + self, + slice: &mut [T], + enable: Mask<isize, LANES>, + idxs: Simd<usize, LANES>, + ) { + let enable: Mask<isize, LANES> = enable & idxs.simd_lt(Simd::splat(slice.len())); + // Safety: We have masked-off out-of-bounds lanes. + unsafe { self.scatter_select_unchecked(slice, enable, idxs) } + } + + /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`. + /// The mask `enable`s all `true` lanes and disables all `false` lanes. + /// If two enabled lanes in the scattered vector would write to the same index, + /// only the last lane is guaranteed to actually be written. + /// + /// # Safety + /// + /// Calling this function with an enabled out-of-bounds index is *[undefined behavior]*, + /// and may lead to memory corruption. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdPartialOrd, Mask}; + /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = Simd::from_array([9, 3, 0, 0]); + /// let vals = Simd::from_array([-27, 82, -41, 124]); + /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane. + /// // If this mask was used to scatter, it would be unsound. Let's fix that. + /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); + /// + /// // We have masked the OOB lane, so it's safe to scatter now. + /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); } + /// // index 0's second write is masked, thus was omitted. + /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); + /// ``` + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + pub unsafe fn scatter_select_unchecked( + self, + slice: &mut [T], + enable: Mask<isize, LANES>, + idxs: Simd<usize, LANES>, + ) { + // Safety: This block works with *mut T derived from &mut 'a [T], + // which means it is delicate in Rust's borrowing model, circa 2021: + // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts! + // Even though this block is largely safe methods, it must be exactly this way + // to prevent invalidating the raw ptrs while they're live. + // Thus, entering this block requires all values to use being already ready: + // 0. idxs we want to write to, which are used to construct the mask. + // 1. enable, which depends on an initial &'a [T] and the idxs. + // 2. actual values to scatter (self). + // 3. &mut [T] which will become our base ptr. + unsafe { + // Now Entering ☢️ *mut T Zone + let base_ptr = crate::simd::ptr::SimdMutPtr::splat(slice.as_mut_ptr()); + // Ferris forgive me, I have done pointer arithmetic here. + let ptrs = base_ptr.wrapping_add(idxs); + // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah + intrinsics::simd_scatter(self, ptrs, enable.to_int()) + // Cleared ☢️ *mut T Zone + } + } +} + +impl<T, const LANES: usize> Copy for Simd<T, LANES> +where + T: SimdElement, + LaneCount<LANES>: SupportedLaneCount, +{ +} + +impl<T, const LANES: usize> Clone for Simd<T, LANES> +where + T: SimdElement, + LaneCount<LANES>: SupportedLaneCount, +{ + fn clone(&self) -> Self { + *self + } +} + +impl<T, const LANES: usize> Default for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement + Default, +{ + #[inline] + fn default() -> Self { + Self::splat(T::default()) + } +} + +impl<T, const LANES: usize> PartialEq for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement + PartialEq, +{ + #[inline] + fn eq(&self, other: &Self) -> bool { + // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. + let mask = unsafe { + let tfvec: Simd<<T as SimdElement>::Mask, LANES> = intrinsics::simd_eq(*self, *other); + Mask::from_int_unchecked(tfvec) + }; + + // Two vectors are equal if all lanes tested true for vertical equality. + mask.all() + } + + #[allow(clippy::partialeq_ne_impl)] + #[inline] + fn ne(&self, other: &Self) -> bool { + // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. + let mask = unsafe { + let tfvec: Simd<<T as SimdElement>::Mask, LANES> = intrinsics::simd_ne(*self, *other); + Mask::from_int_unchecked(tfvec) + }; + + // Two vectors are non-equal if any lane tested true for vertical non-equality. + mask.any() + } +} + +impl<T, const LANES: usize> PartialOrd for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement + PartialOrd, +{ + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + // TODO use SIMD equality + self.to_array().partial_cmp(other.as_ref()) + } +} + +impl<T, const LANES: usize> Eq for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement + Eq, +{ +} + +impl<T, const LANES: usize> Ord for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement + Ord, +{ + #[inline] + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + // TODO use SIMD equality + self.to_array().cmp(other.as_ref()) + } +} + +impl<T, const LANES: usize> core::hash::Hash for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement + core::hash::Hash, +{ + #[inline] + fn hash<H>(&self, state: &mut H) + where + H: core::hash::Hasher, + { + self.as_array().hash(state) + } +} + +// array references +impl<T, const LANES: usize> AsRef<[T; LANES]> for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement, +{ + #[inline] + fn as_ref(&self) -> &[T; LANES] { + &self.0 + } +} + +impl<T, const LANES: usize> AsMut<[T; LANES]> for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement, +{ + #[inline] + fn as_mut(&mut self) -> &mut [T; LANES] { + &mut self.0 + } +} + +// slice references +impl<T, const LANES: usize> AsRef<[T]> for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement, +{ + #[inline] + fn as_ref(&self) -> &[T] { + &self.0 + } +} + +impl<T, const LANES: usize> AsMut<[T]> for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement, +{ + #[inline] + fn as_mut(&mut self) -> &mut [T] { + &mut self.0 + } +} + +// vector/array conversion +impl<T, const LANES: usize> From<[T; LANES]> for Simd<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement, +{ + fn from(array: [T; LANES]) -> Self { + Self(array) + } +} + +impl<T, const LANES: usize> From<Simd<T, LANES>> for [T; LANES] +where + LaneCount<LANES>: SupportedLaneCount, + T: SimdElement, +{ + fn from(vector: Simd<T, LANES>) -> Self { + vector.to_array() + } +} + +mod sealed { + pub trait Sealed {} +} +use sealed::Sealed; + +/// Marker trait for types that may be used as SIMD vector elements. +/// +/// # Safety +/// This trait, when implemented, asserts the compiler can monomorphize +/// `#[repr(simd)]` structs with the marked type as an element. +/// Strictly, it is valid to impl if the vector will not be miscompiled. +/// Practically, it is user-unfriendly to impl it if the vector won't compile, +/// even when no soundness guarantees are broken by allowing the user to try. +pub unsafe trait SimdElement: Sealed + Copy { + /// The mask element type corresponding to this element type. + type Mask: MaskElement; +} + +impl Sealed for u8 {} + +// Safety: u8 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for u8 { + type Mask = i8; +} + +impl Sealed for u16 {} + +// Safety: u16 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for u16 { + type Mask = i16; +} + +impl Sealed for u32 {} + +// Safety: u32 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for u32 { + type Mask = i32; +} + +impl Sealed for u64 {} + +// Safety: u64 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for u64 { + type Mask = i64; +} + +impl Sealed for usize {} + +// Safety: usize is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for usize { + type Mask = isize; +} + +impl Sealed for i8 {} + +// Safety: i8 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for i8 { + type Mask = i8; +} + +impl Sealed for i16 {} + +// Safety: i16 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for i16 { + type Mask = i16; +} + +impl Sealed for i32 {} + +// Safety: i32 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for i32 { + type Mask = i32; +} + +impl Sealed for i64 {} + +// Safety: i64 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for i64 { + type Mask = i64; +} + +impl Sealed for isize {} + +// Safety: isize is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for isize { + type Mask = isize; +} + +impl Sealed for f32 {} + +// Safety: f32 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for f32 { + type Mask = i32; +} + +impl Sealed for f64 {} + +// Safety: f64 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for f64 { + type Mask = i64; +} diff --git a/library/portable-simd/crates/core_simd/src/vector/float.rs b/library/portable-simd/crates/core_simd/src/vector/float.rs new file mode 100644 index 000000000..f836c99b1 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vector/float.rs @@ -0,0 +1,24 @@ +#![allow(non_camel_case_types)] + +use crate::simd::Simd; + +/// A 64-bit SIMD vector with two elements of type `f32`. +pub type f32x2 = Simd<f32, 2>; + +/// A 128-bit SIMD vector with four elements of type `f32`. +pub type f32x4 = Simd<f32, 4>; + +/// A 256-bit SIMD vector with eight elements of type `f32`. +pub type f32x8 = Simd<f32, 8>; + +/// A 512-bit SIMD vector with 16 elements of type `f32`. +pub type f32x16 = Simd<f32, 16>; + +/// A 128-bit SIMD vector with two elements of type `f64`. +pub type f64x2 = Simd<f64, 2>; + +/// A 256-bit SIMD vector with four elements of type `f64`. +pub type f64x4 = Simd<f64, 4>; + +/// A 512-bit SIMD vector with eight elements of type `f64`. +pub type f64x8 = Simd<f64, 8>; diff --git a/library/portable-simd/crates/core_simd/src/vector/int.rs b/library/portable-simd/crates/core_simd/src/vector/int.rs new file mode 100644 index 000000000..20e56c7dc --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vector/int.rs @@ -0,0 +1,63 @@ +#![allow(non_camel_case_types)] + +use crate::simd::Simd; + +/// A SIMD vector with two elements of type `isize`. +pub type isizex2 = Simd<isize, 2>; + +/// A SIMD vector with four elements of type `isize`. +pub type isizex4 = Simd<isize, 4>; + +/// A SIMD vector with eight elements of type `isize`. +pub type isizex8 = Simd<isize, 8>; + +/// A 32-bit SIMD vector with two elements of type `i16`. +pub type i16x2 = Simd<i16, 2>; + +/// A 64-bit SIMD vector with four elements of type `i16`. +pub type i16x4 = Simd<i16, 4>; + +/// A 128-bit SIMD vector with eight elements of type `i16`. +pub type i16x8 = Simd<i16, 8>; + +/// A 256-bit SIMD vector with 16 elements of type `i16`. +pub type i16x16 = Simd<i16, 16>; + +/// A 512-bit SIMD vector with 32 elements of type `i16`. +pub type i16x32 = Simd<i16, 32>; + +/// A 64-bit SIMD vector with two elements of type `i32`. +pub type i32x2 = Simd<i32, 2>; + +/// A 128-bit SIMD vector with four elements of type `i32`. +pub type i32x4 = Simd<i32, 4>; + +/// A 256-bit SIMD vector with eight elements of type `i32`. +pub type i32x8 = Simd<i32, 8>; + +/// A 512-bit SIMD vector with 16 elements of type `i32`. +pub type i32x16 = Simd<i32, 16>; + +/// A 128-bit SIMD vector with two elements of type `i64`. +pub type i64x2 = Simd<i64, 2>; + +/// A 256-bit SIMD vector with four elements of type `i64`. +pub type i64x4 = Simd<i64, 4>; + +/// A 512-bit SIMD vector with eight elements of type `i64`. +pub type i64x8 = Simd<i64, 8>; + +/// A 32-bit SIMD vector with four elements of type `i8`. +pub type i8x4 = Simd<i8, 4>; + +/// A 64-bit SIMD vector with eight elements of type `i8`. +pub type i8x8 = Simd<i8, 8>; + +/// A 128-bit SIMD vector with 16 elements of type `i8`. +pub type i8x16 = Simd<i8, 16>; + +/// A 256-bit SIMD vector with 32 elements of type `i8`. +pub type i8x32 = Simd<i8, 32>; + +/// A 512-bit SIMD vector with 64 elements of type `i8`. +pub type i8x64 = Simd<i8, 64>; diff --git a/library/portable-simd/crates/core_simd/src/vector/ptr.rs b/library/portable-simd/crates/core_simd/src/vector/ptr.rs new file mode 100644 index 000000000..fa756344d --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vector/ptr.rs @@ -0,0 +1,51 @@ +//! Private implementation details of public gather/scatter APIs. +use crate::simd::intrinsics; +use crate::simd::{LaneCount, Simd, SupportedLaneCount}; + +/// A vector of *const T. +#[derive(Debug, Copy, Clone)] +#[repr(simd)] +pub(crate) struct SimdConstPtr<T, const LANES: usize>([*const T; LANES]); + +impl<T, const LANES: usize> SimdConstPtr<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: Sized, +{ + #[inline] + #[must_use] + pub fn splat(ptr: *const T) -> Self { + Self([ptr; LANES]) + } + + #[inline] + #[must_use] + pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self { + // Safety: this intrinsic doesn't have a precondition + unsafe { intrinsics::simd_arith_offset(self, addend) } + } +} + +/// A vector of *mut T. Be very careful around potential aliasing. +#[derive(Debug, Copy, Clone)] +#[repr(simd)] +pub(crate) struct SimdMutPtr<T, const LANES: usize>([*mut T; LANES]); + +impl<T, const LANES: usize> SimdMutPtr<T, LANES> +where + LaneCount<LANES>: SupportedLaneCount, + T: Sized, +{ + #[inline] + #[must_use] + pub fn splat(ptr: *mut T) -> Self { + Self([ptr; LANES]) + } + + #[inline] + #[must_use] + pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self { + // Safety: this intrinsic doesn't have a precondition + unsafe { intrinsics::simd_arith_offset(self, addend) } + } +} diff --git a/library/portable-simd/crates/core_simd/src/vector/uint.rs b/library/portable-simd/crates/core_simd/src/vector/uint.rs new file mode 100644 index 000000000..b4a69c443 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vector/uint.rs @@ -0,0 +1,63 @@ +#![allow(non_camel_case_types)] + +use crate::simd::Simd; + +/// A SIMD vector with two elements of type `usize`. +pub type usizex2 = Simd<usize, 2>; + +/// A SIMD vector with four elements of type `usize`. +pub type usizex4 = Simd<usize, 4>; + +/// A SIMD vector with eight elements of type `usize`. +pub type usizex8 = Simd<usize, 8>; + +/// A 32-bit SIMD vector with two elements of type `u16`. +pub type u16x2 = Simd<u16, 2>; + +/// A 64-bit SIMD vector with four elements of type `u16`. +pub type u16x4 = Simd<u16, 4>; + +/// A 128-bit SIMD vector with eight elements of type `u16`. +pub type u16x8 = Simd<u16, 8>; + +/// A 256-bit SIMD vector with 16 elements of type `u16`. +pub type u16x16 = Simd<u16, 16>; + +/// A 512-bit SIMD vector with 32 elements of type `u16`. +pub type u16x32 = Simd<u16, 32>; + +/// A 64-bit SIMD vector with two elements of type `u32`. +pub type u32x2 = Simd<u32, 2>; + +/// A 128-bit SIMD vector with four elements of type `u32`. +pub type u32x4 = Simd<u32, 4>; + +/// A 256-bit SIMD vector with eight elements of type `u32`. +pub type u32x8 = Simd<u32, 8>; + +/// A 512-bit SIMD vector with 16 elements of type `u32`. +pub type u32x16 = Simd<u32, 16>; + +/// A 128-bit SIMD vector with two elements of type `u64`. +pub type u64x2 = Simd<u64, 2>; + +/// A 256-bit SIMD vector with four elements of type `u64`. +pub type u64x4 = Simd<u64, 4>; + +/// A 512-bit SIMD vector with eight elements of type `u64`. +pub type u64x8 = Simd<u64, 8>; + +/// A 32-bit SIMD vector with four elements of type `u8`. +pub type u8x4 = Simd<u8, 4>; + +/// A 64-bit SIMD vector with eight elements of type `u8`. +pub type u8x8 = Simd<u8, 8>; + +/// A 128-bit SIMD vector with 16 elements of type `u8`. +pub type u8x16 = Simd<u8, 16>; + +/// A 256-bit SIMD vector with 32 elements of type `u8`. +pub type u8x32 = Simd<u8, 32>; + +/// A 512-bit SIMD vector with 64 elements of type `u8`. +pub type u8x64 = Simd<u8, 64>; diff --git a/library/portable-simd/crates/core_simd/src/vendor.rs b/library/portable-simd/crates/core_simd/src/vendor.rs new file mode 100644 index 000000000..9fb70218c --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vendor.rs @@ -0,0 +1,31 @@ +/// Provides implementations of `From<$a> for $b` and `From<$b> for $a` that transmutes the value. +#[allow(unused)] +macro_rules! from_transmute { + { unsafe $a:ty => $b:ty } => { + from_transmute!{ @impl $a => $b } + from_transmute!{ @impl $b => $a } + }; + { @impl $from:ty => $to:ty } => { + impl core::convert::From<$from> for $to { + #[inline] + fn from(value: $from) -> $to { + // Safety: transmuting between vectors is safe, but the caller of this macro + // checks the invariants + unsafe { core::mem::transmute(value) } + } + } + }; +} + +/// Conversions to x86's SIMD types. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod x86; + +#[cfg(any(target_arch = "wasm32"))] +mod wasm32; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm",))] +mod arm; + +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod powerpc; diff --git a/library/portable-simd/crates/core_simd/src/vendor/arm.rs b/library/portable-simd/crates/core_simd/src/vendor/arm.rs new file mode 100644 index 000000000..ff3b69ccf --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vendor/arm.rs @@ -0,0 +1,76 @@ +#![allow(unused)] +use crate::simd::*; + +#[cfg(target_arch = "arm")] +use core::arch::arm::*; + +#[cfg(target_arch = "aarch64")] +use core::arch::aarch64::*; + +#[cfg(any( + target_arch = "aarch64", + all(target_arch = "arm", target_feature = "v7"), +))] +mod neon { + use super::*; + + from_transmute! { unsafe f32x2 => float32x2_t } + from_transmute! { unsafe f32x4 => float32x4_t } + + from_transmute! { unsafe u8x8 => uint8x8_t } + from_transmute! { unsafe u8x16 => uint8x16_t } + from_transmute! { unsafe i8x8 => int8x8_t } + from_transmute! { unsafe i8x16 => int8x16_t } + from_transmute! { unsafe u8x8 => poly8x8_t } + from_transmute! { unsafe u8x16 => poly8x16_t } + + from_transmute! { unsafe u16x4 => uint16x4_t } + from_transmute! { unsafe u16x8 => uint16x8_t } + from_transmute! { unsafe i16x4 => int16x4_t } + from_transmute! { unsafe i16x8 => int16x8_t } + from_transmute! { unsafe u16x4 => poly16x4_t } + from_transmute! { unsafe u16x8 => poly16x8_t } + + from_transmute! { unsafe u32x2 => uint32x2_t } + from_transmute! { unsafe u32x4 => uint32x4_t } + from_transmute! { unsafe i32x2 => int32x2_t } + from_transmute! { unsafe i32x4 => int32x4_t } + + from_transmute! { unsafe Simd<u64, 1> => uint64x1_t } + from_transmute! { unsafe u64x2 => uint64x2_t } + from_transmute! { unsafe Simd<i64, 1> => int64x1_t } + from_transmute! { unsafe i64x2 => int64x2_t } + from_transmute! { unsafe Simd<u64, 1> => poly64x1_t } + from_transmute! { unsafe u64x2 => poly64x2_t } +} + +#[cfg(any( + all(target_feature = "v5te", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp"), +))] +mod dsp { + use super::*; + + from_transmute! { unsafe Simd<u16, 2> => uint16x2_t } + from_transmute! { unsafe Simd<i16, 2> => int16x2_t } +} + +#[cfg(any( + all(target_feature = "v6", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp"), +))] +mod simd32 { + use super::*; + + from_transmute! { unsafe Simd<u8, 4> => uint8x4_t } + from_transmute! { unsafe Simd<i8, 4> => int8x4_t } +} + +#[cfg(target_arch = "aarch64")] +mod aarch64 { + use super::neon::*; + use super::*; + + from_transmute! { unsafe Simd<f64, 1> => float64x1_t } + from_transmute! { unsafe f64x2 => float64x2_t } +} diff --git a/library/portable-simd/crates/core_simd/src/vendor/powerpc.rs b/library/portable-simd/crates/core_simd/src/vendor/powerpc.rs new file mode 100644 index 000000000..92f97d471 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vendor/powerpc.rs @@ -0,0 +1,11 @@ +use crate::simd::*; + +#[cfg(target_arch = "powerpc")] +use core::arch::powerpc::*; + +#[cfg(target_arch = "powerpc64")] +use core::arch::powerpc64::*; + +from_transmute! { unsafe f64x2 => vector_double } +from_transmute! { unsafe i64x2 => vector_signed_long } +from_transmute! { unsafe u64x2 => vector_unsigned_long } diff --git a/library/portable-simd/crates/core_simd/src/vendor/wasm32.rs b/library/portable-simd/crates/core_simd/src/vendor/wasm32.rs new file mode 100644 index 000000000..ef3baf885 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vendor/wasm32.rs @@ -0,0 +1,30 @@ +use crate::simd::*; +use core::arch::wasm32::v128; + +from_transmute! { unsafe u8x16 => v128 } +from_transmute! { unsafe i8x16 => v128 } + +from_transmute! { unsafe u16x8 => v128 } +from_transmute! { unsafe i16x8 => v128 } + +from_transmute! { unsafe u32x4 => v128 } +from_transmute! { unsafe i32x4 => v128 } +from_transmute! { unsafe f32x4 => v128 } + +from_transmute! { unsafe u64x2 => v128 } +from_transmute! { unsafe i64x2 => v128 } +from_transmute! { unsafe f64x2 => v128 } + +#[cfg(target_pointer_width = "32")] +mod p32 { + use super::*; + from_transmute! { unsafe usizex4 => v128 } + from_transmute! { unsafe isizex4 => v128 } +} + +#[cfg(target_pointer_width = "64")] +mod p64 { + use super::*; + from_transmute! { unsafe usizex2 => v128 } + from_transmute! { unsafe isizex2 => v128 } +} diff --git a/library/portable-simd/crates/core_simd/src/vendor/x86.rs b/library/portable-simd/crates/core_simd/src/vendor/x86.rs new file mode 100644 index 000000000..0dd47015e --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vendor/x86.rs @@ -0,0 +1,63 @@ +use crate::simd::*; + +#[cfg(any(target_arch = "x86"))] +use core::arch::x86::*; + +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +from_transmute! { unsafe u8x16 => __m128i } +from_transmute! { unsafe u8x32 => __m256i } +from_transmute! { unsafe u8x64 => __m512i } +from_transmute! { unsafe i8x16 => __m128i } +from_transmute! { unsafe i8x32 => __m256i } +from_transmute! { unsafe i8x64 => __m512i } + +from_transmute! { unsafe u16x8 => __m128i } +from_transmute! { unsafe u16x16 => __m256i } +from_transmute! { unsafe u16x32 => __m512i } +from_transmute! { unsafe i16x8 => __m128i } +from_transmute! { unsafe i16x16 => __m256i } +from_transmute! { unsafe i16x32 => __m512i } + +from_transmute! { unsafe u32x4 => __m128i } +from_transmute! { unsafe u32x8 => __m256i } +from_transmute! { unsafe u32x16 => __m512i } +from_transmute! { unsafe i32x4 => __m128i } +from_transmute! { unsafe i32x8 => __m256i } +from_transmute! { unsafe i32x16 => __m512i } +from_transmute! { unsafe f32x4 => __m128 } +from_transmute! { unsafe f32x8 => __m256 } +from_transmute! { unsafe f32x16 => __m512 } + +from_transmute! { unsafe u64x2 => __m128i } +from_transmute! { unsafe u64x4 => __m256i } +from_transmute! { unsafe u64x8 => __m512i } +from_transmute! { unsafe i64x2 => __m128i } +from_transmute! { unsafe i64x4 => __m256i } +from_transmute! { unsafe i64x8 => __m512i } +from_transmute! { unsafe f64x2 => __m128d } +from_transmute! { unsafe f64x4 => __m256d } +from_transmute! { unsafe f64x8 => __m512d } + +#[cfg(target_pointer_width = "32")] +mod p32 { + use super::*; + from_transmute! { unsafe usizex4 => __m128i } + from_transmute! { unsafe usizex8 => __m256i } + from_transmute! { unsafe Simd<usize, 16> => __m512i } + from_transmute! { unsafe isizex4 => __m128i } + from_transmute! { unsafe isizex8 => __m256i } + from_transmute! { unsafe Simd<isize, 16> => __m512i } +} + +#[cfg(target_pointer_width = "64")] +mod p64 { + use super::*; + from_transmute! { unsafe usizex2 => __m128i } + from_transmute! { unsafe usizex4 => __m256i } + from_transmute! { unsafe usizex8 => __m512i } + from_transmute! { unsafe isizex2 => __m128i } + from_transmute! { unsafe isizex4 => __m256i } + from_transmute! { unsafe isizex8 => __m512i } +} diff --git a/library/portable-simd/crates/core_simd/tests/autoderef.rs b/library/portable-simd/crates/core_simd/tests/autoderef.rs new file mode 100644 index 000000000..9359da16e --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/autoderef.rs @@ -0,0 +1,22 @@ +// Test that we handle all our "auto-deref" cases correctly. +#![feature(portable_simd)] +use core_simd::f32x4; + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +#[cfg(target_arch = "wasm32")] +wasm_bindgen_test_configure!(run_in_browser); + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn deref() { + let x = f32x4::splat(1.0); + let y = f32x4::splat(2.0); + let a = &x; + let b = &y; + assert_eq!(f32x4::splat(3.0), x + y); + assert_eq!(f32x4::splat(3.0), x + b); + assert_eq!(f32x4::splat(3.0), a + y); + assert_eq!(f32x4::splat(3.0), a + b); +} diff --git a/library/portable-simd/crates/core_simd/tests/cast.rs b/library/portable-simd/crates/core_simd/tests/cast.rs new file mode 100644 index 000000000..ab5650f07 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/cast.rs @@ -0,0 +1,37 @@ +#![feature(portable_simd)] +macro_rules! cast_types { + ($start:ident, $($target:ident),*) => { + mod $start { + use core_simd::simd::Simd; + type Vector<const N: usize> = Simd<$start, N>; + $( + mod $target { + use super::*; + test_helpers::test_lanes! { + fn cast_as<const N: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<N>::cast::<$target>, + &|x| x as $target, + &|_| true, + ) + } + } + } + )* + } + }; +} + +// The hypothesis is that widening conversions aren't terribly interesting. +cast_types!(f32, f64, i8, u8, usize, isize); +cast_types!(f64, f32, i8, u8, usize, isize); +cast_types!(i8, u8, f32); +cast_types!(u8, i8, f32); +cast_types!(i16, u16, i8, u8, f32); +cast_types!(u16, i16, i8, u8, f32); +cast_types!(i32, u32, i8, u8, f32, f64); +cast_types!(u32, i32, i8, u8, f32, f64); +cast_types!(i64, u64, i8, u8, isize, usize, f32, f64); +cast_types!(u64, i64, i8, u8, isize, usize, f32, f64); +cast_types!(isize, usize, i8, u8, f32, f64); +cast_types!(usize, isize, i8, u8, f32, f64); diff --git a/library/portable-simd/crates/core_simd/tests/f32_ops.rs b/library/portable-simd/crates/core_simd/tests/f32_ops.rs new file mode 100644 index 000000000..414a832b1 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/f32_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_float_tests! { f32, i32 } diff --git a/library/portable-simd/crates/core_simd/tests/f64_ops.rs b/library/portable-simd/crates/core_simd/tests/f64_ops.rs new file mode 100644 index 000000000..e0a1fa33f --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/f64_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_float_tests! { f64, i64 } diff --git a/library/portable-simd/crates/core_simd/tests/i16_ops.rs b/library/portable-simd/crates/core_simd/tests/i16_ops.rs new file mode 100644 index 000000000..f6c5d74fb --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/i16_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_signed_tests! { i16 } diff --git a/library/portable-simd/crates/core_simd/tests/i32_ops.rs b/library/portable-simd/crates/core_simd/tests/i32_ops.rs new file mode 100644 index 000000000..69a831c52 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/i32_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_signed_tests! { i32 } diff --git a/library/portable-simd/crates/core_simd/tests/i64_ops.rs b/library/portable-simd/crates/core_simd/tests/i64_ops.rs new file mode 100644 index 000000000..37ac08117 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/i64_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_signed_tests! { i64 } diff --git a/library/portable-simd/crates/core_simd/tests/i8_ops.rs b/library/portable-simd/crates/core_simd/tests/i8_ops.rs new file mode 100644 index 000000000..11e4a5cd6 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/i8_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_signed_tests! { i8 } diff --git a/library/portable-simd/crates/core_simd/tests/isize_ops.rs b/library/portable-simd/crates/core_simd/tests/isize_ops.rs new file mode 100644 index 000000000..5cc9de2b7 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/isize_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_signed_tests! { isize } diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops.rs b/library/portable-simd/crates/core_simd/tests/mask_ops.rs new file mode 100644 index 000000000..f113b50cb --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/mask_ops.rs @@ -0,0 +1,3 @@ +#![feature(portable_simd)] + +mod mask_ops_impl; diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask16.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask16.rs new file mode 100644 index 000000000..0fe82fa68 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask16.rs @@ -0,0 +1,4 @@ +mask_tests! { mask16x4, 4 } +mask_tests! { mask16x8, 8 } +mask_tests! { mask16x16, 16 } +mask_tests! { mask16x32, 32 } diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask32.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask32.rs new file mode 100644 index 000000000..66d987a43 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask32.rs @@ -0,0 +1,4 @@ +mask_tests! { mask32x2, 2 } +mask_tests! { mask32x4, 4 } +mask_tests! { mask32x8, 8 } +mask_tests! { mask32x16, 16 } diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask64.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask64.rs new file mode 100644 index 000000000..a1f1f67b2 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask64.rs @@ -0,0 +1,3 @@ +mask_tests! { mask64x2, 2 } +mask_tests! { mask64x4, 4 } +mask_tests! { mask64x8, 8 } diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask8.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask8.rs new file mode 100644 index 000000000..9c06fbc04 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask8.rs @@ -0,0 +1,3 @@ +mask_tests! { mask8x8, 8 } +mask_tests! { mask8x16, 16 } +mask_tests! { mask8x32, 32 } diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask_macros.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask_macros.rs new file mode 100644 index 000000000..795f9e27c --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mask_macros.rs @@ -0,0 +1,225 @@ +macro_rules! mask_tests { + { $vector:ident, $lanes:literal } => { + #[cfg(test)] + mod $vector { + use core_simd::$vector as Vector; + const LANES: usize = $lanes; + + #[cfg(target_arch = "wasm32")] + use wasm_bindgen_test::*; + + #[cfg(target_arch = "wasm32")] + wasm_bindgen_test_configure!(run_in_browser); + + fn from_slice(slice: &[bool]) -> Vector { + let mut value = Vector::default(); + for (i, b) in slice.iter().take(LANES).enumerate() { + value.set(i, *b); + } + value + } + + fn apply_unary_lanewise(x: Vector, f: impl Fn(bool) -> bool) -> Vector { + let mut value = Vector::default(); + for i in 0..LANES { + value.set(i, f(x.test(i))); + } + value + } + + fn apply_binary_lanewise(x: Vector, y: Vector, f: impl Fn(bool, bool) -> bool) -> Vector { + let mut value = Vector::default(); + for i in 0..LANES { + value.set(i, f(x.test(i), y.test(i))); + } + value + } + + fn apply_binary_scalar_lhs_lanewise(x: bool, mut y: Vector, f: impl Fn(bool, bool) -> bool) -> Vector { + for i in 0..LANES { + y.set(i, f(x, y.test(i))); + } + y + } + + fn apply_binary_scalar_rhs_lanewise(mut x: Vector, y: bool, f: impl Fn(bool, bool) -> bool) -> Vector { + for i in 0..LANES { + x.set(i, f(x.test(i), y)); + } + x + } + + const A: [bool; 64] = [ + false, true, false, true, false, false, true, true, + false, true, false, true, false, false, true, true, + false, true, false, true, false, false, true, true, + false, true, false, true, false, false, true, true, + false, true, false, true, false, false, true, true, + false, true, false, true, false, false, true, true, + false, true, false, true, false, false, true, true, + false, true, false, true, false, false, true, true, + ]; + const B: [bool; 64] = [ + false, false, true, true, false, true, false, true, + false, false, true, true, false, true, false, true, + false, false, true, true, false, true, false, true, + false, false, true, true, false, true, false, true, + false, false, true, true, false, true, false, true, + false, false, true, true, false, true, false, true, + false, false, true, true, false, true, false, true, + false, false, true, true, false, true, false, true, + ]; + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitand() { + let a = from_slice(&A); + let b = from_slice(&B); + let expected = apply_binary_lanewise(a, b, core::ops::BitAnd::bitand); + assert_eq!(a & b, expected); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitand_assign() { + let mut a = from_slice(&A); + let b = from_slice(&B); + let expected = apply_binary_lanewise(a, b, core::ops::BitAnd::bitand); + a &= b; + assert_eq!(a, expected); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitand_scalar_rhs() { + let a = from_slice(&A); + let expected = a; + assert_eq!(a & true, expected); + assert_eq!(a & false, Vector::splat(false)); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitand_scalar_lhs() { + let a = from_slice(&A); + let expected = a; + assert_eq!(true & a, expected); + assert_eq!(false & a, Vector::splat(false)); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitand_assign_scalar() { + let mut a = from_slice(&A); + let expected = a; + a &= true; + assert_eq!(a, expected); + a &= false; + assert_eq!(a, Vector::splat(false)); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitor() { + let a = from_slice(&A); + let b = from_slice(&B); + let expected = apply_binary_lanewise(a, b, core::ops::BitOr::bitor); + assert_eq!(a | b, expected); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitor_assign() { + let mut a = from_slice(&A); + let b = from_slice(&B); + let expected = apply_binary_lanewise(a, b, core::ops::BitOr::bitor); + a |= b; + assert_eq!(a, expected); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitor_scalar_rhs() { + let a = from_slice(&A); + assert_eq!(a | false, a); + assert_eq!(a | true, Vector::splat(true)); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitor_scalar_lhs() { + let a = from_slice(&A); + assert_eq!(false | a, a); + assert_eq!(true | a, Vector::splat(true)); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitor_assign_scalar() { + let mut a = from_slice(&A); + let expected = a; + a |= false; + assert_eq!(a, expected); + a |= true; + assert_eq!(a, Vector::splat(true)); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitxor() { + let a = from_slice(&A); + let b = from_slice(&B); + let expected = apply_binary_lanewise(a, b, core::ops::BitXor::bitxor); + assert_eq!(a ^ b, expected); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitxor_assign() { + let mut a = from_slice(&A); + let b = from_slice(&B); + let expected = apply_binary_lanewise(a, b, core::ops::BitXor::bitxor); + a ^= b; + assert_eq!(a, expected); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitxor_scalar_rhs() { + let a = from_slice(&A); + let expected = apply_binary_scalar_rhs_lanewise(a, true, core::ops::BitXor::bitxor); + assert_eq!(a ^ false, a); + assert_eq!(a ^ true, expected); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitxor_scalar_lhs() { + let a = from_slice(&A); + let expected = apply_binary_scalar_lhs_lanewise(true, a, core::ops::BitXor::bitxor); + assert_eq!(false ^ a, a); + assert_eq!(true ^ a, expected); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitxor_assign_scalar() { + let mut a = from_slice(&A); + let expected_unset = a; + let expected_set = apply_binary_scalar_rhs_lanewise(a, true, core::ops::BitXor::bitxor); + a ^= false; + assert_eq!(a, expected_unset); + a ^= true; + assert_eq!(a, expected_set); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn not() { + let v = from_slice(&A); + let expected = apply_unary_lanewise(v, core::ops::Not::not); + assert_eq!(!v, expected); + } + } + } +} diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/masksize.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/masksize.rs new file mode 100644 index 000000000..e0a44d870 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/masksize.rs @@ -0,0 +1,3 @@ +mask_tests! { masksizex2, 2 } +mask_tests! { masksizex4, 4 } +mask_tests! { masksizex8, 8 } diff --git a/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mod.rs b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mod.rs new file mode 100644 index 000000000..b9ec8462a --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/mask_ops_impl/mod.rs @@ -0,0 +1,9 @@ +#[macro_use] +mod mask_macros; + +#[rustfmt::skip] +mod mask8; +mod mask16; +mod mask32; +mod mask64; +mod masksize; diff --git a/library/portable-simd/crates/core_simd/tests/masks.rs b/library/portable-simd/crates/core_simd/tests/masks.rs new file mode 100644 index 000000000..673d0db93 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/masks.rs @@ -0,0 +1,158 @@ +#![feature(portable_simd)] + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +#[cfg(target_arch = "wasm32")] +wasm_bindgen_test_configure!(run_in_browser); + +macro_rules! test_mask_api { + { $type:ident } => { + #[allow(non_snake_case)] + mod $type { + #[cfg(target_arch = "wasm32")] + use wasm_bindgen_test::*; + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn set_and_test() { + let values = [true, false, false, true, false, false, true, false]; + let mut mask = core_simd::Mask::<$type, 8>::splat(false); + for (lane, value) in values.iter().copied().enumerate() { + mask.set(lane, value); + } + for (lane, value) in values.iter().copied().enumerate() { + assert_eq!(mask.test(lane), value); + } + } + + #[test] + #[should_panic] + fn set_invalid_lane() { + let mut mask = core_simd::Mask::<$type, 8>::splat(false); + mask.set(8, true); + let _ = mask; + } + + #[test] + #[should_panic] + fn test_invalid_lane() { + let mask = core_simd::Mask::<$type, 8>::splat(false); + let _ = mask.test(8); + } + + #[test] + fn any() { + assert!(!core_simd::Mask::<$type, 8>::splat(false).any()); + assert!(core_simd::Mask::<$type, 8>::splat(true).any()); + let mut v = core_simd::Mask::<$type, 8>::splat(false); + v.set(2, true); + assert!(v.any()); + } + + #[test] + fn all() { + assert!(!core_simd::Mask::<$type, 8>::splat(false).all()); + assert!(core_simd::Mask::<$type, 8>::splat(true).all()); + let mut v = core_simd::Mask::<$type, 8>::splat(false); + v.set(2, true); + assert!(!v.all()); + } + + #[test] + fn roundtrip_int_conversion() { + let values = [true, false, false, true, false, false, true, false]; + let mask = core_simd::Mask::<$type, 8>::from_array(values); + let int = mask.to_int(); + assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]); + assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask); + } + + #[test] + fn roundtrip_bitmask_conversion() { + use core_simd::ToBitMask; + let values = [ + true, false, false, true, false, false, true, false, + true, true, false, false, false, false, false, true, + ]; + let mask = core_simd::Mask::<$type, 16>::from_array(values); + let bitmask = mask.to_bitmask(); + assert_eq!(bitmask, 0b1000001101001001); + assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask); + } + + #[test] + fn roundtrip_bitmask_conversion_short() { + use core_simd::ToBitMask; + + let values = [ + false, false, false, true, + ]; + let mask = core_simd::Mask::<$type, 4>::from_array(values); + let bitmask = mask.to_bitmask(); + assert_eq!(bitmask, 0b1000); + assert_eq!(core_simd::Mask::<$type, 4>::from_bitmask(bitmask), mask); + + let values = [true, false]; + let mask = core_simd::Mask::<$type, 2>::from_array(values); + let bitmask = mask.to_bitmask(); + assert_eq!(bitmask, 0b01); + assert_eq!(core_simd::Mask::<$type, 2>::from_bitmask(bitmask), mask); + } + + #[test] + fn cast() { + fn cast_impl<T: core_simd::MaskElement>() + where + core_simd::Mask<$type, 8>: Into<core_simd::Mask<T, 8>>, + { + let values = [true, false, false, true, false, false, true, false]; + let mask = core_simd::Mask::<$type, 8>::from_array(values); + + let cast_mask = mask.cast::<T>(); + assert_eq!(values, cast_mask.to_array()); + + let into_mask: core_simd::Mask<T, 8> = mask.into(); + assert_eq!(values, into_mask.to_array()); + } + + cast_impl::<i8>(); + cast_impl::<i16>(); + cast_impl::<i32>(); + cast_impl::<i64>(); + cast_impl::<isize>(); + } + + #[cfg(feature = "generic_const_exprs")] + #[test] + fn roundtrip_bitmask_array_conversion() { + use core_simd::ToBitMaskArray; + let values = [ + true, false, false, true, false, false, true, false, + true, true, false, false, false, false, false, true, + ]; + let mask = core_simd::Mask::<$type, 16>::from_array(values); + let bitmask = mask.to_bitmask_array(); + assert_eq!(bitmask, [0b01001001, 0b10000011]); + assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask_array(bitmask), mask); + } + } + } +} + +mod mask_api { + test_mask_api! { i8 } + test_mask_api! { i16 } + test_mask_api! { i32 } + test_mask_api! { i64 } + test_mask_api! { isize } +} + +#[test] +fn convert() { + let values = [true, false, false, true, false, false, true, false]; + assert_eq!( + core_simd::Mask::<i8, 8>::from_array(values), + core_simd::Mask::<i32, 8>::from_array(values).into() + ); +} diff --git a/library/portable-simd/crates/core_simd/tests/ops_macros.rs b/library/portable-simd/crates/core_simd/tests/ops_macros.rs new file mode 100644 index 000000000..f759394d0 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs @@ -0,0 +1,607 @@ +/// Implements a test on a unary operation using proptest. +/// +/// Compares the vector operation to the equivalent scalar operation. +#[macro_export] +macro_rules! impl_unary_op_test { + { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => { + test_helpers::test_lanes! { + fn $fn<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &<core_simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn, + &$scalar_fn, + &|_| true, + ); + } + } + }; + { $scalar:ty, $trait:ident :: $fn:ident } => { + impl_unary_op_test! { $scalar, $trait::$fn, <$scalar as core::ops::$trait>::$fn } + }; +} + +/// Implements a test on a binary operation using proptest. +/// +/// Compares the vector operation to the equivalent scalar operation. +#[macro_export] +macro_rules! impl_binary_op_test { + { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr } => { + mod $fn { + use super::*; + use core_simd::Simd; + + test_helpers::test_lanes! { + fn normal<const LANES: usize>() { + test_helpers::test_binary_elementwise( + &<Simd<$scalar, LANES> as core::ops::$trait>::$fn, + &$scalar_fn, + &|_, _| true, + ); + } + + fn assign<const LANES: usize>() { + test_helpers::test_binary_elementwise( + &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a }, + &$scalar_fn, + &|_, _| true, + ); + } + } + } + }; + { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident } => { + impl_binary_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn } + }; +} + +/// Implements a test on a binary operation using proptest. +/// +/// Like `impl_binary_op_test`, but allows providing a function for rejecting particular inputs +/// (like the `proptest_assume` macro). +/// +/// Compares the vector operation to the equivalent scalar operation. +#[macro_export] +macro_rules! impl_binary_checked_op_test { + { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr, $check_fn:expr } => { + mod $fn { + use super::*; + use core_simd::Simd; + + test_helpers::test_lanes! { + fn normal<const LANES: usize>() { + test_helpers::test_binary_elementwise( + &<Simd<$scalar, LANES> as core::ops::$trait>::$fn, + &$scalar_fn, + &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)), + ); + } + + fn assign<const LANES: usize>() { + test_helpers::test_binary_elementwise( + &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a }, + &$scalar_fn, + &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)), + ) + } + } + } + }; + { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $check_fn:expr } => { + impl_binary_checked_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn, $check_fn } + }; +} + +#[macro_export] +macro_rules! impl_common_integer_tests { + { $vector:ident, $scalar:ident } => { + test_helpers::test_lanes! { + fn reduce_sum<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::<LANES>::from_array(x).reduce_sum(), + x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add), + ); + Ok(()) + }); + } + + fn reduce_product<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::<LANES>::from_array(x).reduce_product(), + x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul), + ); + Ok(()) + }); + } + + fn reduce_and<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::<LANES>::from_array(x).reduce_and(), + x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand), + ); + Ok(()) + }); + } + + fn reduce_or<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::<LANES>::from_array(x).reduce_or(), + x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor), + ); + Ok(()) + }); + } + + fn reduce_xor<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::<LANES>::from_array(x).reduce_xor(), + x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor), + ); + Ok(()) + }); + } + + fn reduce_max<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::<LANES>::from_array(x).reduce_max(), + x.iter().copied().max().unwrap(), + ); + Ok(()) + }); + } + + fn reduce_min<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::<LANES>::from_array(x).reduce_min(), + x.iter().copied().min().unwrap(), + ); + Ok(()) + }); + } + } + } +} + +/// Implement tests for signed integers. +#[macro_export] +macro_rules! impl_signed_tests { + { $scalar:tt } => { + mod $scalar { + use core_simd::simd::SimdInt; + type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>; + type Scalar = $scalar; + + impl_common_integer_tests! { Vector, Scalar } + + test_helpers::test_lanes! { + fn neg<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &<Vector::<LANES> as core::ops::Neg>::neg, + &<Scalar as core::ops::Neg>::neg, + &|x| !x.contains(&Scalar::MIN), + ); + } + + fn is_positive<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_positive, + &Scalar::is_positive, + &|_| true, + ); + } + + fn is_negative<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_negative, + &Scalar::is_negative, + &|_| true, + ); + } + + fn signum<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::signum, + &Scalar::signum, + &|_| true, + ) + } + + fn div_min_may_overflow<const LANES: usize>() { + let a = Vector::<LANES>::splat(Scalar::MIN); + let b = Vector::<LANES>::splat(-1); + assert_eq!(a / b, a); + } + + fn rem_min_may_overflow<const LANES: usize>() { + let a = Vector::<LANES>::splat(Scalar::MIN); + let b = Vector::<LANES>::splat(-1); + assert_eq!(a % b, Vector::<LANES>::splat(0)); + } + + fn simd_min<const LANES: usize>() { + use core_simd::simd::SimdOrd; + let a = Vector::<LANES>::splat(Scalar::MIN); + let b = Vector::<LANES>::splat(0); + assert_eq!(a.simd_min(b), a); + let a = Vector::<LANES>::splat(Scalar::MAX); + let b = Vector::<LANES>::splat(0); + assert_eq!(a.simd_min(b), b); + } + + fn simd_max<const LANES: usize>() { + use core_simd::simd::SimdOrd; + let a = Vector::<LANES>::splat(Scalar::MIN); + let b = Vector::<LANES>::splat(0); + assert_eq!(a.simd_max(b), b); + let a = Vector::<LANES>::splat(Scalar::MAX); + let b = Vector::<LANES>::splat(0); + assert_eq!(a.simd_max(b), a); + } + + fn simd_clamp<const LANES: usize>() { + use core_simd::simd::SimdOrd; + let min = Vector::<LANES>::splat(Scalar::MIN); + let max = Vector::<LANES>::splat(Scalar::MAX); + let zero = Vector::<LANES>::splat(0); + let one = Vector::<LANES>::splat(1); + let negone = Vector::<LANES>::splat(-1); + assert_eq!(zero.simd_clamp(min, max), zero); + assert_eq!(zero.simd_clamp(min, one), zero); + assert_eq!(zero.simd_clamp(one, max), one); + assert_eq!(zero.simd_clamp(min, negone), negone); + } + } + + test_helpers::test_lanes_panic! { + fn div_by_all_zeros_panics<const LANES: usize>() { + let a = Vector::<LANES>::splat(42); + let b = Vector::<LANES>::splat(0); + let _ = a / b; + } + + fn div_by_one_zero_panics<const LANES: usize>() { + let a = Vector::<LANES>::splat(42); + let mut b = Vector::<LANES>::splat(21); + b[0] = 0 as _; + let _ = a / b; + } + + fn rem_zero_panic<const LANES: usize>() { + let a = Vector::<LANES>::splat(42); + let b = Vector::<LANES>::splat(0); + let _ = a % b; + } + } + + test_helpers::test_lanes! { + fn div_neg_one_no_panic<const LANES: usize>() { + let a = Vector::<LANES>::splat(42); + let b = Vector::<LANES>::splat(-1); + let _ = a / b; + } + + fn rem_neg_one_no_panic<const LANES: usize>() { + let a = Vector::<LANES>::splat(42); + let b = Vector::<LANES>::splat(-1); + let _ = a % b; + } + } + + impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add); + impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub); + impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul); + + // Exclude Div and Rem panicking cases + impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |x, y| y != 0 && !(x == Scalar::MIN && y == -1)); + impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |x, y| y != 0 && !(x == Scalar::MIN && y == -1)); + + impl_unary_op_test!(Scalar, Not::not); + impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign); + impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign); + impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign); + } + } +} + +/// Implement tests for unsigned integers. +#[macro_export] +macro_rules! impl_unsigned_tests { + { $scalar:tt } => { + mod $scalar { + use core_simd::simd::SimdUint; + type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>; + type Scalar = $scalar; + + impl_common_integer_tests! { Vector, Scalar } + + test_helpers::test_lanes_panic! { + fn rem_zero_panic<const LANES: usize>() { + let a = Vector::<LANES>::splat(42); + let b = Vector::<LANES>::splat(0); + let _ = a % b; + } + } + + impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add); + impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub); + impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul); + + // Exclude Div and Rem panicking cases + impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |_, y| y != 0); + impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |_, y| y != 0); + + impl_unary_op_test!(Scalar, Not::not); + impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign); + impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign); + impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign); + } + } +} + +/// Implement tests for floating point numbers. +#[macro_export] +macro_rules! impl_float_tests { + { $scalar:tt, $int_scalar:tt } => { + mod $scalar { + use core_simd::SimdFloat; + type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>; + type Scalar = $scalar; + + impl_unary_op_test!(Scalar, Neg::neg); + impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign); + impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign); + impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign); + impl_binary_op_test!(Scalar, Div::div, DivAssign::div_assign); + impl_binary_op_test!(Scalar, Rem::rem, RemAssign::rem_assign); + + test_helpers::test_lanes! { + fn is_sign_positive<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_sign_positive, + &Scalar::is_sign_positive, + &|_| true, + ); + } + + fn is_sign_negative<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_sign_negative, + &Scalar::is_sign_negative, + &|_| true, + ); + } + + fn is_finite<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_finite, + &Scalar::is_finite, + &|_| true, + ); + } + + fn is_infinite<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_infinite, + &Scalar::is_infinite, + &|_| true, + ); + } + + fn is_nan<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_nan, + &Scalar::is_nan, + &|_| true, + ); + } + + fn is_normal<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_normal, + &Scalar::is_normal, + &|_| true, + ); + } + + fn is_subnormal<const LANES: usize>() { + test_helpers::test_unary_mask_elementwise( + &Vector::<LANES>::is_subnormal, + &Scalar::is_subnormal, + &|_| true, + ); + } + + fn abs<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::abs, + &Scalar::abs, + &|_| true, + ) + } + + fn recip<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::recip, + &Scalar::recip, + &|_| true, + ) + } + + fn to_degrees<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::to_degrees, + &Scalar::to_degrees, + &|_| true, + ) + } + + fn to_radians<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::to_radians, + &Scalar::to_radians, + &|_| true, + ) + } + + fn signum<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::signum, + &Scalar::signum, + &|_| true, + ) + } + + fn copysign<const LANES: usize>() { + test_helpers::test_binary_elementwise( + &Vector::<LANES>::copysign, + &Scalar::copysign, + &|_, _| true, + ) + } + + fn simd_min<const LANES: usize>() { + // Regular conditions (both values aren't zero) + test_helpers::test_binary_elementwise( + &Vector::<LANES>::simd_min, + &Scalar::min, + // Reject the case where both values are zero with different signs + &|a, b| { + for (a, b) in a.iter().zip(b.iter()) { + if *a == 0. && *b == 0. && a.signum() != b.signum() { + return false; + } + } + true + } + ); + + // Special case where both values are zero + let p_zero = Vector::<LANES>::splat(0.); + let n_zero = Vector::<LANES>::splat(-0.); + assert!(p_zero.simd_min(n_zero).to_array().iter().all(|x| *x == 0.)); + assert!(n_zero.simd_min(p_zero).to_array().iter().all(|x| *x == 0.)); + } + + fn simd_max<const LANES: usize>() { + // Regular conditions (both values aren't zero) + test_helpers::test_binary_elementwise( + &Vector::<LANES>::simd_max, + &Scalar::max, + // Reject the case where both values are zero with different signs + &|a, b| { + for (a, b) in a.iter().zip(b.iter()) { + if *a == 0. && *b == 0. && a.signum() != b.signum() { + return false; + } + } + true + } + ); + + // Special case where both values are zero + let p_zero = Vector::<LANES>::splat(0.); + let n_zero = Vector::<LANES>::splat(-0.); + assert!(p_zero.simd_max(n_zero).to_array().iter().all(|x| *x == 0.)); + assert!(n_zero.simd_max(p_zero).to_array().iter().all(|x| *x == 0.)); + } + + fn simd_clamp<const LANES: usize>() { + test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { + for (min, max) in min.iter_mut().zip(max.iter_mut()) { + if max < min { + core::mem::swap(min, max); + } + if min.is_nan() { + *min = Scalar::NEG_INFINITY; + } + if max.is_nan() { + *max = Scalar::INFINITY; + } + } + + let mut result_scalar = [Scalar::default(); LANES]; + for i in 0..LANES { + result_scalar[i] = value[i].clamp(min[i], max[i]); + } + let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array(); + test_helpers::prop_assert_biteq!(result_scalar, result_vector); + Ok(()) + }) + } + + fn reduce_sum<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + Vector::<LANES>::from_array(x).reduce_sum(), + x.iter().sum(), + ); + Ok(()) + }); + } + + fn reduce_product<const LANES: usize>() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + Vector::<LANES>::from_array(x).reduce_product(), + x.iter().product(), + ); + Ok(()) + }); + } + + fn reduce_max<const LANES: usize>() { + test_helpers::test_1(&|x| { + let vmax = Vector::<LANES>::from_array(x).reduce_max(); + let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max); + // 0 and -0 are treated the same + if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { + test_helpers::prop_assert_biteq!(vmax, smax); + } + Ok(()) + }); + } + + fn reduce_min<const LANES: usize>() { + test_helpers::test_1(&|x| { + let vmax = Vector::<LANES>::from_array(x).reduce_min(); + let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min); + // 0 and -0 are treated the same + if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { + test_helpers::prop_assert_biteq!(vmax, smax); + } + Ok(()) + }); + } + } + + #[cfg(feature = "std")] + mod std { + use std_float::StdFloat; + + use super::*; + test_helpers::test_lanes! { + fn sqrt<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::sqrt, + &Scalar::sqrt, + &|_| true, + ) + } + + fn mul_add<const LANES: usize>() { + test_helpers::test_ternary_elementwise( + &Vector::<LANES>::mul_add, + &Scalar::mul_add, + &|_, _, _| true, + ) + } + } + } + } + } +} diff --git a/library/portable-simd/crates/core_simd/tests/round.rs b/library/portable-simd/crates/core_simd/tests/round.rs new file mode 100644 index 000000000..484fd5bf4 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/round.rs @@ -0,0 +1,85 @@ +#![feature(portable_simd)] + +macro_rules! float_rounding_test { + { $scalar:tt, $int_scalar:tt } => { + mod $scalar { + use std_float::StdFloat; + + type Vector<const LANES: usize> = core_simd::Simd<$scalar, LANES>; + type Scalar = $scalar; + type IntScalar = $int_scalar; + + test_helpers::test_lanes! { + fn ceil<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::ceil, + &Scalar::ceil, + &|_| true, + ) + } + + fn floor<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::floor, + &Scalar::floor, + &|_| true, + ) + } + + fn round<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::round, + &Scalar::round, + &|_| true, + ) + } + + fn trunc<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::trunc, + &Scalar::trunc, + &|_| true, + ) + } + + fn fract<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &Vector::<LANES>::fract, + &Scalar::fract, + &|_| true, + ) + } + } + + test_helpers::test_lanes! { + fn to_int_unchecked<const LANES: usize>() { + // The maximum integer that can be represented by the equivalently sized float has + // all of the mantissa digits set to 1, pushed up to the MSB. + const ALL_MANTISSA_BITS: IntScalar = ((1 << <Scalar>::MANTISSA_DIGITS) - 1); + const MAX_REPRESENTABLE_VALUE: Scalar = + (ALL_MANTISSA_BITS << (core::mem::size_of::<Scalar>() * 8 - <Scalar>::MANTISSA_DIGITS as usize - 1)) as Scalar; + + let mut runner = test_helpers::make_runner(); + runner.run( + &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE), + |x| { + let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() }; + let result_2 = { + let mut result: [IntScalar; LANES] = [0; LANES]; + for (i, o) in x.iter().zip(result.iter_mut()) { + *o = unsafe { i.to_int_unchecked::<IntScalar>() }; + } + result + }; + test_helpers::prop_assert_biteq!(result_1, result_2); + Ok(()) + }, + ).unwrap(); + } + } + } + } +} + +float_rounding_test! { f32, i32 } +float_rounding_test! { f64, i64 } diff --git a/library/portable-simd/crates/core_simd/tests/swizzle.rs b/library/portable-simd/crates/core_simd/tests/swizzle.rs new file mode 100644 index 000000000..51c63611a --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/swizzle.rs @@ -0,0 +1,62 @@ +#![feature(portable_simd)] +use core_simd::{Simd, Swizzle}; + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +#[cfg(target_arch = "wasm32")] +wasm_bindgen_test_configure!(run_in_browser); + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn swizzle() { + struct Index; + impl Swizzle<4, 4> for Index { + const INDEX: [usize; 4] = [2, 1, 3, 0]; + } + impl Swizzle<4, 2> for Index { + const INDEX: [usize; 2] = [1, 1]; + } + + let vector = Simd::from_array([2, 4, 1, 9]); + assert_eq!(Index::swizzle(vector).to_array(), [1, 4, 9, 2]); + assert_eq!(Index::swizzle(vector).to_array(), [4, 4]); +} + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn reverse() { + let a = Simd::from_array([1, 2, 3, 4]); + assert_eq!(a.reverse().to_array(), [4, 3, 2, 1]); +} + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn rotate() { + let a = Simd::from_array([1, 2, 3, 4]); + assert_eq!(a.rotate_lanes_left::<0>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_lanes_left::<1>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_lanes_left::<2>().to_array(), [3, 4, 1, 2]); + assert_eq!(a.rotate_lanes_left::<3>().to_array(), [4, 1, 2, 3]); + assert_eq!(a.rotate_lanes_left::<4>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_lanes_left::<5>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_lanes_right::<0>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_lanes_right::<1>().to_array(), [4, 1, 2, 3]); + assert_eq!(a.rotate_lanes_right::<2>().to_array(), [3, 4, 1, 2]); + assert_eq!(a.rotate_lanes_right::<3>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_lanes_right::<4>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_lanes_right::<5>().to_array(), [4, 1, 2, 3]); +} + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn interleave() { + let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]); + let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]); + let (lo, hi) = a.interleave(b); + assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]); + assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]); + let (even, odd) = lo.deinterleave(hi); + assert_eq!(even, a); + assert_eq!(odd, b); +} diff --git a/library/portable-simd/crates/core_simd/tests/to_bytes.rs b/library/portable-simd/crates/core_simd/tests/to_bytes.rs new file mode 100644 index 000000000..debb4335e --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/to_bytes.rs @@ -0,0 +1,14 @@ +#![feature(portable_simd, generic_const_exprs, adt_const_params)] +#![allow(incomplete_features)] +#![cfg(feature = "generic_const_exprs")] + +use core_simd::Simd; + +#[test] +fn byte_convert() { + let int = Simd::<u32, 2>::from_array([0xdeadbeef, 0x8badf00d]); + let bytes = int.to_ne_bytes(); + assert_eq!(int[0].to_ne_bytes(), bytes[..4]); + assert_eq!(int[1].to_ne_bytes(), bytes[4..]); + assert_eq!(Simd::<u32, 2>::from_ne_bytes(bytes), int); +} diff --git a/library/portable-simd/crates/core_simd/tests/u16_ops.rs b/library/portable-simd/crates/core_simd/tests/u16_ops.rs new file mode 100644 index 000000000..9ae3bd6a4 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/u16_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_unsigned_tests! { u16 } diff --git a/library/portable-simd/crates/core_simd/tests/u32_ops.rs b/library/portable-simd/crates/core_simd/tests/u32_ops.rs new file mode 100644 index 000000000..de34b73d6 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/u32_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_unsigned_tests! { u32 } diff --git a/library/portable-simd/crates/core_simd/tests/u64_ops.rs b/library/portable-simd/crates/core_simd/tests/u64_ops.rs new file mode 100644 index 000000000..8ee5a318c --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/u64_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_unsigned_tests! { u64 } diff --git a/library/portable-simd/crates/core_simd/tests/u8_ops.rs b/library/portable-simd/crates/core_simd/tests/u8_ops.rs new file mode 100644 index 000000000..6d7211121 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/u8_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_unsigned_tests! { u8 } diff --git a/library/portable-simd/crates/core_simd/tests/usize_ops.rs b/library/portable-simd/crates/core_simd/tests/usize_ops.rs new file mode 100644 index 000000000..9c7b1687a --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/usize_ops.rs @@ -0,0 +1,5 @@ +#![feature(portable_simd)] + +#[macro_use] +mod ops_macros; +impl_unsigned_tests! { usize } diff --git a/library/portable-simd/crates/core_simd/webdriver.json b/library/portable-simd/crates/core_simd/webdriver.json new file mode 100644 index 000000000..f1d5734f1 --- /dev/null +++ b/library/portable-simd/crates/core_simd/webdriver.json @@ -0,0 +1,7 @@ +{ + "goog:chromeOptions": { + "args": [ + "--enable-features=WebAssemblySimd" + ] + } +} diff --git a/library/portable-simd/crates/std_float/Cargo.toml b/library/portable-simd/crates/std_float/Cargo.toml new file mode 100644 index 000000000..84c69774c --- /dev/null +++ b/library/portable-simd/crates/std_float/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "std_float" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +core_simd = { path = "../core_simd", default-features = false } + +[features] +default = ["as_crate"] +as_crate = [] diff --git a/library/portable-simd/crates/std_float/src/lib.rs b/library/portable-simd/crates/std_float/src/lib.rs new file mode 100644 index 000000000..4bd4d4c05 --- /dev/null +++ b/library/portable-simd/crates/std_float/src/lib.rs @@ -0,0 +1,165 @@ +#![cfg_attr(feature = "as_crate", no_std)] // We are std! +#![cfg_attr( + feature = "as_crate", + feature(platform_intrinsics), + feature(portable_simd) +)] +#[cfg(not(feature = "as_crate"))] +use core::simd; +#[cfg(feature = "as_crate")] +use core_simd::simd; + +use simd::{LaneCount, Simd, SupportedLaneCount}; + +#[cfg(feature = "as_crate")] +mod experimental { + pub trait Sealed {} +} + +#[cfg(feature = "as_crate")] +use experimental as sealed; + +use crate::sealed::Sealed; + +// "platform intrinsics" are essentially "codegen intrinsics" +// each of these may be scalarized and lowered to a libm call +extern "platform-intrinsic" { + // ceil + fn simd_ceil<T>(x: T) -> T; + + // floor + fn simd_floor<T>(x: T) -> T; + + // round + fn simd_round<T>(x: T) -> T; + + // trunc + fn simd_trunc<T>(x: T) -> T; + + // fsqrt + fn simd_fsqrt<T>(x: T) -> T; + + // fma + fn simd_fma<T>(x: T, y: T, z: T) -> T; +} + +/// This trait provides a possibly-temporary implementation of float functions +/// that may, in the absence of hardware support, canonicalize to calling an +/// operating system's `math.h` dynamically-loaded library (also known as a +/// shared object). As these conditionally require runtime support, they +/// should only appear in binaries built assuming OS support: `std`. +/// +/// However, there is no reason SIMD types, in general, need OS support, +/// as for many architectures an embedded binary may simply configure that +/// support itself. This means these types must be visible in `core` +/// but have these functions available in `std`. +/// +/// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but +/// due to compiler limitations, it is harder to implement this approach for +/// abstract data types like [`Simd`]. From that need, this trait is born. +/// +/// It is possible this trait will be replaced in some manner in the future, +/// when either the compiler or its supporting runtime functions are improved. +/// For now this trait is available to permit experimentation with SIMD float +/// operations that may lack hardware support, such as `mul_add`. +pub trait StdFloat: Sealed + Sized { + /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, + /// yielding a more accurate result than an unfused multiply-add. + /// + /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target + /// architecture has a dedicated `fma` CPU instruction. However, this is not always + /// true, and will be heavily dependent on designing algorithms with specific target + /// hardware in mind. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn mul_add(self, a: Self, b: Self) -> Self { + unsafe { simd_fma(self, a, b) } + } + + /// Produces a vector where every lane has the square root value + /// of the equivalently-indexed lane in `self` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn sqrt(self) -> Self { + unsafe { simd_fsqrt(self) } + } + + /// Returns the smallest integer greater than or equal to each lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn ceil(self) -> Self { + unsafe { simd_ceil(self) } + } + + /// Returns the largest integer value less than or equal to each lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn floor(self) -> Self { + unsafe { simd_floor(self) } + } + + /// Rounds to the nearest integer value. Ties round toward zero. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn round(self) -> Self { + unsafe { simd_round(self) } + } + + /// Returns the floating point's integer value, with its fractional part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn trunc(self) -> Self { + unsafe { simd_trunc(self) } + } + + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn fract(self) -> Self; +} + +impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {} +impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {} + +// We can safely just use all the defaults. +impl<const N: usize> StdFloat for Simd<f32, N> +where + LaneCount<N>: SupportedLaneCount, +{ + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + +impl<const N: usize> StdFloat for Simd<f64, N> +where + LaneCount<N>: SupportedLaneCount, +{ + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use simd::*; + + #[test] + fn everything_works() { + let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]); + let x2 = x + x; + let _xc = x.ceil(); + let _xf = x.floor(); + let _xr = x.round(); + let _xt = x.trunc(); + let _xfma = x.mul_add(x, x); + let _xsqrt = x.sqrt(); + let _ = x2.abs() * x2; + } +} diff --git a/library/portable-simd/crates/test_helpers/Cargo.toml b/library/portable-simd/crates/test_helpers/Cargo.toml new file mode 100644 index 000000000..a04b0961d --- /dev/null +++ b/library/portable-simd/crates/test_helpers/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "test_helpers" +version = "0.1.0" +edition = "2021" +publish = false + +[dependencies.proptest] +version = "0.10" +default-features = false +features = ["alloc"] diff --git a/library/portable-simd/crates/test_helpers/src/array.rs b/library/portable-simd/crates/test_helpers/src/array.rs new file mode 100644 index 000000000..5ffc92269 --- /dev/null +++ b/library/portable-simd/crates/test_helpers/src/array.rs @@ -0,0 +1,97 @@ +//! Generic-length array strategy. + +// Adapted from proptest's array code +// Copyright 2017 Jason Lingle + +use core::{marker::PhantomData, mem::MaybeUninit}; +use proptest::{ + strategy::{NewTree, Strategy, ValueTree}, + test_runner::TestRunner, +}; + +#[must_use = "strategies do nothing unless used"] +#[derive(Clone, Copy, Debug)] +pub struct UniformArrayStrategy<S, T> { + strategy: S, + _marker: PhantomData<T>, +} + +impl<S, T> UniformArrayStrategy<S, T> { + pub const fn new(strategy: S) -> Self { + Self { + strategy, + _marker: PhantomData, + } + } +} + +pub struct ArrayValueTree<T> { + tree: T, + shrinker: usize, + last_shrinker: Option<usize>, +} + +impl<T, S, const LANES: usize> Strategy for UniformArrayStrategy<S, [T; LANES]> +where + T: core::fmt::Debug, + S: Strategy<Value = T>, +{ + type Tree = ArrayValueTree<[S::Tree; LANES]>; + type Value = [T; LANES]; + + fn new_tree(&self, runner: &mut TestRunner) -> NewTree<Self> { + let tree: [S::Tree; LANES] = unsafe { + let mut tree: [MaybeUninit<S::Tree>; LANES] = MaybeUninit::uninit().assume_init(); + for t in tree.iter_mut() { + *t = MaybeUninit::new(self.strategy.new_tree(runner)?) + } + core::mem::transmute_copy(&tree) + }; + Ok(ArrayValueTree { + tree, + shrinker: 0, + last_shrinker: None, + }) + } +} + +impl<T: ValueTree, const LANES: usize> ValueTree for ArrayValueTree<[T; LANES]> { + type Value = [T::Value; LANES]; + + fn current(&self) -> Self::Value { + unsafe { + let mut value: [MaybeUninit<T::Value>; LANES] = MaybeUninit::uninit().assume_init(); + for (tree_elem, value_elem) in self.tree.iter().zip(value.iter_mut()) { + *value_elem = MaybeUninit::new(tree_elem.current()); + } + core::mem::transmute_copy(&value) + } + } + + fn simplify(&mut self) -> bool { + while self.shrinker < LANES { + if self.tree[self.shrinker].simplify() { + self.last_shrinker = Some(self.shrinker); + return true; + } else { + self.shrinker += 1; + } + } + + false + } + + fn complicate(&mut self) -> bool { + if let Some(shrinker) = self.last_shrinker { + self.shrinker = shrinker; + if self.tree[shrinker].complicate() { + true + } else { + self.last_shrinker = None; + false + } + } else { + false + } + } +} diff --git a/library/portable-simd/crates/test_helpers/src/biteq.rs b/library/portable-simd/crates/test_helpers/src/biteq.rs new file mode 100644 index 000000000..00350e224 --- /dev/null +++ b/library/portable-simd/crates/test_helpers/src/biteq.rs @@ -0,0 +1,106 @@ +//! Compare numeric types by exact bit value. + +pub trait BitEq { + fn biteq(&self, other: &Self) -> bool; + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result; +} + +impl BitEq for bool { + fn biteq(&self, other: &Self) -> bool { + self == other + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} + +macro_rules! impl_integer_biteq { + { $($type:ty),* } => { + $( + impl BitEq for $type { + fn biteq(&self, other: &Self) -> bool { + self == other + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?} ({:x})", self, self) + } + } + )* + }; +} + +impl_integer_biteq! { u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize } + +macro_rules! impl_float_biteq { + { $($type:ty),* } => { + $( + impl BitEq for $type { + fn biteq(&self, other: &Self) -> bool { + if self.is_nan() && other.is_nan() { + true // exact nan bits don't matter + } else { + self.to_bits() == other.to_bits() + } + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?} ({:x})", self, self.to_bits()) + } + } + )* + }; +} + +impl_float_biteq! { f32, f64 } + +impl<T: BitEq, const N: usize> BitEq for [T; N] { + fn biteq(&self, other: &Self) -> bool { + self.iter() + .zip(other.iter()) + .fold(true, |value, (left, right)| value && left.biteq(right)) + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + #[repr(transparent)] + struct Wrapper<'a, T: BitEq>(&'a T); + + impl<T: BitEq> core::fmt::Debug for Wrapper<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + self.0.fmt(f) + } + } + + f.debug_list() + .entries(self.iter().map(|x| Wrapper(x))) + .finish() + } +} + +#[doc(hidden)] +pub struct BitEqWrapper<'a, T>(pub &'a T); + +impl<T: BitEq> PartialEq for BitEqWrapper<'_, T> { + fn eq(&self, other: &Self) -> bool { + self.0.biteq(other.0) + } +} + +impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + self.0.fmt(f) + } +} + +#[macro_export] +macro_rules! prop_assert_biteq { + { $a:expr, $b:expr $(,)? } => { + { + use $crate::biteq::BitEqWrapper; + let a = $a; + let b = $b; + proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b)); + } + } +} diff --git a/library/portable-simd/crates/test_helpers/src/lib.rs b/library/portable-simd/crates/test_helpers/src/lib.rs new file mode 100644 index 000000000..141bee18a --- /dev/null +++ b/library/portable-simd/crates/test_helpers/src/lib.rs @@ -0,0 +1,463 @@ +pub mod array; + +#[cfg(target_arch = "wasm32")] +pub mod wasm; + +#[macro_use] +pub mod biteq; + +/// Specifies the default strategy for testing a type. +/// +/// This strategy should be what "makes sense" to test. +pub trait DefaultStrategy { + type Strategy: proptest::strategy::Strategy<Value = Self>; + fn default_strategy() -> Self::Strategy; +} + +macro_rules! impl_num { + { $type:tt } => { + impl DefaultStrategy for $type { + type Strategy = proptest::num::$type::Any; + fn default_strategy() -> Self::Strategy { + proptest::num::$type::ANY + } + } + } +} + +impl_num! { i8 } +impl_num! { i16 } +impl_num! { i32 } +impl_num! { i64 } +impl_num! { isize } +impl_num! { u8 } +impl_num! { u16 } +impl_num! { u32 } +impl_num! { u64 } +impl_num! { usize } +impl_num! { f32 } +impl_num! { f64 } + +#[cfg(not(target_arch = "wasm32"))] +impl DefaultStrategy for u128 { + type Strategy = proptest::num::u128::Any; + fn default_strategy() -> Self::Strategy { + proptest::num::u128::ANY + } +} + +#[cfg(not(target_arch = "wasm32"))] +impl DefaultStrategy for i128 { + type Strategy = proptest::num::i128::Any; + fn default_strategy() -> Self::Strategy { + proptest::num::i128::ANY + } +} + +#[cfg(target_arch = "wasm32")] +impl DefaultStrategy for u128 { + type Strategy = crate::wasm::u128::Any; + fn default_strategy() -> Self::Strategy { + crate::wasm::u128::ANY + } +} + +#[cfg(target_arch = "wasm32")] +impl DefaultStrategy for i128 { + type Strategy = crate::wasm::i128::Any; + fn default_strategy() -> Self::Strategy { + crate::wasm::i128::ANY + } +} + +impl<T: core::fmt::Debug + DefaultStrategy, const LANES: usize> DefaultStrategy for [T; LANES] { + type Strategy = crate::array::UniformArrayStrategy<T::Strategy, Self>; + fn default_strategy() -> Self::Strategy { + Self::Strategy::new(T::default_strategy()) + } +} + +#[cfg(not(miri))] +pub fn make_runner() -> proptest::test_runner::TestRunner { + Default::default() +} +#[cfg(miri)] +pub fn make_runner() -> proptest::test_runner::TestRunner { + // Only run a few tests on Miri + proptest::test_runner::TestRunner::new(proptest::test_runner::Config::with_cases(4)) +} + +/// Test a function that takes a single value. +pub fn test_1<A: core::fmt::Debug + DefaultStrategy>( + f: &dyn Fn(A) -> proptest::test_runner::TestCaseResult, +) { + let mut runner = make_runner(); + runner.run(&A::default_strategy(), f).unwrap(); +} + +/// Test a function that takes two values. +pub fn test_2<A: core::fmt::Debug + DefaultStrategy, B: core::fmt::Debug + DefaultStrategy>( + f: &dyn Fn(A, B) -> proptest::test_runner::TestCaseResult, +) { + let mut runner = make_runner(); + runner + .run(&(A::default_strategy(), B::default_strategy()), |(a, b)| { + f(a, b) + }) + .unwrap(); +} + +/// Test a function that takes two values. +pub fn test_3< + A: core::fmt::Debug + DefaultStrategy, + B: core::fmt::Debug + DefaultStrategy, + C: core::fmt::Debug + DefaultStrategy, +>( + f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult, +) { + let mut runner = make_runner(); + runner + .run( + &( + A::default_strategy(), + B::default_strategy(), + C::default_strategy(), + ), + |(a, b, c)| f(a, b, c), + ) + .unwrap(); +} + +/// Test a unary vector function against a unary scalar function, applied elementwise. +#[inline(never)] +pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const LANES: usize>( + fv: &dyn Fn(Vector) -> VectorResult, + fs: &dyn Fn(Scalar) -> ScalarResult, + check: &dyn Fn([Scalar; LANES]) -> bool, +) where + Scalar: Copy + Default + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy, + Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_1(&|x: [Scalar; LANES]| { + proptest::prop_assume!(check(x)); + let result_1: [ScalarResult; LANES] = fv(x.into()).into(); + let result_2: [ScalarResult; LANES] = { + let mut result = [ScalarResult::default(); LANES]; + for (i, o) in x.iter().zip(result.iter_mut()) { + *o = fs(*i); + } + result + }; + crate::prop_assert_biteq!(result_1, result_2); + Ok(()) + }); +} + +/// Test a unary vector function against a unary scalar function, applied elementwise. +#[inline(never)] +pub fn test_unary_mask_elementwise<Scalar, Vector, Mask, const LANES: usize>( + fv: &dyn Fn(Vector) -> Mask, + fs: &dyn Fn(Scalar) -> bool, + check: &dyn Fn([Scalar; LANES]) -> bool, +) where + Scalar: Copy + Default + core::fmt::Debug + DefaultStrategy, + Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, + Mask: Into<[bool; LANES]> + From<[bool; LANES]> + Copy, +{ + test_1(&|x: [Scalar; LANES]| { + proptest::prop_assume!(check(x)); + let result_1: [bool; LANES] = fv(x.into()).into(); + let result_2: [bool; LANES] = { + let mut result = [false; LANES]; + for (i, o) in x.iter().zip(result.iter_mut()) { + *o = fs(*i); + } + result + }; + crate::prop_assert_biteq!(result_1, result_2); + Ok(()) + }); +} + +/// Test a binary vector function against a binary scalar function, applied elementwise. +#[inline(never)] +pub fn test_binary_elementwise< + Scalar1, + Scalar2, + ScalarResult, + Vector1, + Vector2, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Vector1, Vector2) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool, +) where + Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { + proptest::prop_assume!(check(x, y)); + let result_1: [ScalarResult; LANES] = fv(x.into(), y.into()).into(); + let result_2: [ScalarResult; LANES] = { + let mut result = [ScalarResult::default(); LANES]; + for ((i1, i2), o) in x.iter().zip(y.iter()).zip(result.iter_mut()) { + *o = fs(*i1, *i2); + } + result + }; + crate::prop_assert_biteq!(result_1, result_2); + Ok(()) + }); +} + +/// Test a binary vector-scalar function against a binary scalar function, applied elementwise. +#[inline(never)] +pub fn test_binary_scalar_rhs_elementwise< + Scalar1, + Scalar2, + ScalarResult, + Vector, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Vector, Scalar2) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult, + check: &dyn Fn([Scalar1; LANES], Scalar2) -> bool, +) where + Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy, + Vector: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_2(&|x: [Scalar1; LANES], y: Scalar2| { + proptest::prop_assume!(check(x, y)); + let result_1: [ScalarResult; LANES] = fv(x.into(), y).into(); + let result_2: [ScalarResult; LANES] = { + let mut result = [ScalarResult::default(); LANES]; + for (i, o) in x.iter().zip(result.iter_mut()) { + *o = fs(*i, y); + } + result + }; + crate::prop_assert_biteq!(result_1, result_2); + Ok(()) + }); +} + +/// Test a binary vector-scalar function against a binary scalar function, applied elementwise. +#[inline(never)] +pub fn test_binary_scalar_lhs_elementwise< + Scalar1, + Scalar2, + ScalarResult, + Vector, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Scalar1, Vector) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult, + check: &dyn Fn(Scalar1, [Scalar2; LANES]) -> bool, +) where + Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy, + Vector: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_2(&|x: Scalar1, y: [Scalar2; LANES]| { + proptest::prop_assume!(check(x, y)); + let result_1: [ScalarResult; LANES] = fv(x, y.into()).into(); + let result_2: [ScalarResult; LANES] = { + let mut result = [ScalarResult::default(); LANES]; + for (i, o) in y.iter().zip(result.iter_mut()) { + *o = fs(x, *i); + } + result + }; + crate::prop_assert_biteq!(result_1, result_2); + Ok(()) + }); +} + +/// Test a ternary vector function against a ternary scalar function, applied elementwise. +#[inline(never)] +pub fn test_ternary_elementwise< + Scalar1, + Scalar2, + Scalar3, + ScalarResult, + Vector1, + Vector2, + Vector3, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Vector1, Vector2, Vector3) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2, Scalar3) -> ScalarResult, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES], [Scalar3; LANES]) -> bool, +) where + Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy, + Scalar3: Copy + Default + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + Vector3: Into<[Scalar3; LANES]> + From<[Scalar3; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_3( + &|x: [Scalar1; LANES], y: [Scalar2; LANES], z: [Scalar3; LANES]| { + proptest::prop_assume!(check(x, y, z)); + let result_1: [ScalarResult; LANES] = fv(x.into(), y.into(), z.into()).into(); + let result_2: [ScalarResult; LANES] = { + let mut result = [ScalarResult::default(); LANES]; + for ((i1, (i2, i3)), o) in + x.iter().zip(y.iter().zip(z.iter())).zip(result.iter_mut()) + { + *o = fs(*i1, *i2, *i3); + } + result + }; + crate::prop_assert_biteq!(result_1, result_2); + Ok(()) + }, + ); +} + +/// Expand a const-generic test into separate tests for each possible lane count. +#[macro_export] +macro_rules! test_lanes { + { + $(fn $test:ident<const $lanes:ident: usize>() $body:tt)* + } => { + $( + mod $test { + use super::*; + + fn implementation<const $lanes: usize>() + where + core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount, + $body + + #[cfg(target_arch = "wasm32")] + wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + fn lanes_1() { + implementation::<1>(); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + fn lanes_2() { + implementation::<2>(); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + fn lanes_4() { + implementation::<4>(); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow + fn lanes_8() { + implementation::<8>(); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow + fn lanes_16() { + implementation::<16>(); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow + fn lanes_32() { + implementation::<32>(); + } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow + fn lanes_64() { + implementation::<64>(); + } + } + )* + } +} + +/// Expand a const-generic `#[should_panic]` test into separate tests for each possible lane count. +#[macro_export] +macro_rules! test_lanes_panic { + { + $(fn $test:ident<const $lanes:ident: usize>() $body:tt)* + } => { + $( + mod $test { + use super::*; + + fn implementation<const $lanes: usize>() + where + core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount, + $body + + #[test] + #[should_panic] + fn lanes_1() { + implementation::<1>(); + } + + #[test] + #[should_panic] + fn lanes_2() { + implementation::<2>(); + } + + #[test] + #[should_panic] + fn lanes_4() { + implementation::<4>(); + } + + #[test] + #[should_panic] + fn lanes_8() { + implementation::<8>(); + } + + #[test] + #[should_panic] + fn lanes_16() { + implementation::<16>(); + } + + #[test] + #[should_panic] + fn lanes_32() { + implementation::<32>(); + } + + #[test] + #[should_panic] + fn lanes_64() { + implementation::<64>(); + } + } + )* + } +} diff --git a/library/portable-simd/crates/test_helpers/src/wasm.rs b/library/portable-simd/crates/test_helpers/src/wasm.rs new file mode 100644 index 000000000..3f11d67cb --- /dev/null +++ b/library/portable-simd/crates/test_helpers/src/wasm.rs @@ -0,0 +1,51 @@ +//! Strategies for `u128` and `i128`, since proptest doesn't provide them for the wasm target. + +macro_rules! impl_num { + { $name:ident } => { + pub(crate) mod $name { + type InnerStrategy = crate::array::UniformArrayStrategy<proptest::num::u64::Any, [u64; 2]>; + use proptest::strategy::{Strategy, ValueTree, NewTree}; + + + #[must_use = "strategies do nothing unless used"] + #[derive(Clone, Copy, Debug)] + pub struct Any { + strategy: InnerStrategy, + } + + pub struct BinarySearch { + inner: <InnerStrategy as Strategy>::Tree, + } + + impl ValueTree for BinarySearch { + type Value = $name; + + fn current(&self) -> $name { + unsafe { core::mem::transmute(self.inner.current()) } + } + + fn simplify(&mut self) -> bool { + self.inner.simplify() + } + + fn complicate(&mut self) -> bool { + self.inner.complicate() + } + } + + impl Strategy for Any { + type Tree = BinarySearch; + type Value = $name; + + fn new_tree(&self, runner: &mut proptest::test_runner::TestRunner) -> NewTree<Self> { + Ok(BinarySearch { inner: self.strategy.new_tree(runner)? }) + } + } + + pub const ANY: Any = Any { strategy: InnerStrategy::new(proptest::num::u64::ANY) }; + } + } +} + +impl_num! { u128 } +impl_num! { i128 } |