summaryrefslogtreecommitdiffstats
path: root/library/portable-simd/crates/core_simd/examples
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--library/portable-simd/crates/core_simd/examples/matrix_inversion.rs316
-rw-r--r--library/portable-simd/crates/core_simd/examples/nbody.rs193
-rw-r--r--library/portable-simd/crates/core_simd/examples/spectral_norm.rs77
3 files changed, 586 insertions, 0 deletions
diff --git a/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
new file mode 100644
index 000000000..39f530f68
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
@@ -0,0 +1,316 @@
+//! 4x4 matrix inverse
+// Code ported from the `packed_simd` crate
+// Run this code with `cargo test --example matrix_inversion`
+#![feature(array_chunks, portable_simd)]
+use core_simd::simd::*;
+use Which::*;
+
+// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
+#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
+pub struct Matrix4x4([[f32; 4]; 4]);
+
+#[allow(clippy::too_many_lines)]
+pub fn scalar_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
+ let m = m.0;
+
+ #[rustfmt::skip]
+ let mut inv = [
+ // row 0:
+ [
+ // 0,0:
+ m[1][1] * m[2][2] * m[3][3] -
+ m[1][1] * m[2][3] * m[3][2] -
+ m[2][1] * m[1][2] * m[3][3] +
+ m[2][1] * m[1][3] * m[3][2] +
+ m[3][1] * m[1][2] * m[2][3] -
+ m[3][1] * m[1][3] * m[2][2],
+ // 0,1:
+ -m[0][1] * m[2][2] * m[3][3] +
+ m[0][1] * m[2][3] * m[3][2] +
+ m[2][1] * m[0][2] * m[3][3] -
+ m[2][1] * m[0][3] * m[3][2] -
+ m[3][1] * m[0][2] * m[2][3] +
+ m[3][1] * m[0][3] * m[2][2],
+ // 0,2:
+ m[0][1] * m[1][2] * m[3][3] -
+ m[0][1] * m[1][3] * m[3][2] -
+ m[1][1] * m[0][2] * m[3][3] +
+ m[1][1] * m[0][3] * m[3][2] +
+ m[3][1] * m[0][2] * m[1][3] -
+ m[3][1] * m[0][3] * m[1][2],
+ // 0,3:
+ -m[0][1] * m[1][2] * m[2][3] +
+ m[0][1] * m[1][3] * m[2][2] +
+ m[1][1] * m[0][2] * m[2][3] -
+ m[1][1] * m[0][3] * m[2][2] -
+ m[2][1] * m[0][2] * m[1][3] +
+ m[2][1] * m[0][3] * m[1][2],
+ ],
+ // row 1
+ [
+ // 1,0:
+ -m[1][0] * m[2][2] * m[3][3] +
+ m[1][0] * m[2][3] * m[3][2] +
+ m[2][0] * m[1][2] * m[3][3] -
+ m[2][0] * m[1][3] * m[3][2] -
+ m[3][0] * m[1][2] * m[2][3] +
+ m[3][0] * m[1][3] * m[2][2],
+ // 1,1:
+ m[0][0] * m[2][2] * m[3][3] -
+ m[0][0] * m[2][3] * m[3][2] -
+ m[2][0] * m[0][2] * m[3][3] +
+ m[2][0] * m[0][3] * m[3][2] +
+ m[3][0] * m[0][2] * m[2][3] -
+ m[3][0] * m[0][3] * m[2][2],
+ // 1,2:
+ -m[0][0] * m[1][2] * m[3][3] +
+ m[0][0] * m[1][3] * m[3][2] +
+ m[1][0] * m[0][2] * m[3][3] -
+ m[1][0] * m[0][3] * m[3][2] -
+ m[3][0] * m[0][2] * m[1][3] +
+ m[3][0] * m[0][3] * m[1][2],
+ // 1,3:
+ m[0][0] * m[1][2] * m[2][3] -
+ m[0][0] * m[1][3] * m[2][2] -
+ m[1][0] * m[0][2] * m[2][3] +
+ m[1][0] * m[0][3] * m[2][2] +
+ m[2][0] * m[0][2] * m[1][3] -
+ m[2][0] * m[0][3] * m[1][2],
+ ],
+ // row 2
+ [
+ // 2,0:
+ m[1][0] * m[2][1] * m[3][3] -
+ m[1][0] * m[2][3] * m[3][1] -
+ m[2][0] * m[1][1] * m[3][3] +
+ m[2][0] * m[1][3] * m[3][1] +
+ m[3][0] * m[1][1] * m[2][3] -
+ m[3][0] * m[1][3] * m[2][1],
+ // 2,1:
+ -m[0][0] * m[2][1] * m[3][3] +
+ m[0][0] * m[2][3] * m[3][1] +
+ m[2][0] * m[0][1] * m[3][3] -
+ m[2][0] * m[0][3] * m[3][1] -
+ m[3][0] * m[0][1] * m[2][3] +
+ m[3][0] * m[0][3] * m[2][1],
+ // 2,2:
+ m[0][0] * m[1][1] * m[3][3] -
+ m[0][0] * m[1][3] * m[3][1] -
+ m[1][0] * m[0][1] * m[3][3] +
+ m[1][0] * m[0][3] * m[3][1] +
+ m[3][0] * m[0][1] * m[1][3] -
+ m[3][0] * m[0][3] * m[1][1],
+ // 2,3:
+ -m[0][0] * m[1][1] * m[2][3] +
+ m[0][0] * m[1][3] * m[2][1] +
+ m[1][0] * m[0][1] * m[2][3] -
+ m[1][0] * m[0][3] * m[2][1] -
+ m[2][0] * m[0][1] * m[1][3] +
+ m[2][0] * m[0][3] * m[1][1],
+ ],
+ // row 3
+ [
+ // 3,0:
+ -m[1][0] * m[2][1] * m[3][2] +
+ m[1][0] * m[2][2] * m[3][1] +
+ m[2][0] * m[1][1] * m[3][2] -
+ m[2][0] * m[1][2] * m[3][1] -
+ m[3][0] * m[1][1] * m[2][2] +
+ m[3][0] * m[1][2] * m[2][1],
+ // 3,1:
+ m[0][0] * m[2][1] * m[3][2] -
+ m[0][0] * m[2][2] * m[3][1] -
+ m[2][0] * m[0][1] * m[3][2] +
+ m[2][0] * m[0][2] * m[3][1] +
+ m[3][0] * m[0][1] * m[2][2] -
+ m[3][0] * m[0][2] * m[2][1],
+ // 3,2:
+ -m[0][0] * m[1][1] * m[3][2] +
+ m[0][0] * m[1][2] * m[3][1] +
+ m[1][0] * m[0][1] * m[3][2] -
+ m[1][0] * m[0][2] * m[3][1] -
+ m[3][0] * m[0][1] * m[1][2] +
+ m[3][0] * m[0][2] * m[1][1],
+ // 3,3:
+ m[0][0] * m[1][1] * m[2][2] -
+ m[0][0] * m[1][2] * m[2][1] -
+ m[1][0] * m[0][1] * m[2][2] +
+ m[1][0] * m[0][2] * m[2][1] +
+ m[2][0] * m[0][1] * m[1][2] -
+ m[2][0] * m[0][2] * m[1][1],
+ ],
+ ];
+
+ let det = m[0][0] * inv[0][0] + m[0][1] * inv[1][0] + m[0][2] * inv[2][0] + m[0][3] * inv[3][0];
+ if det == 0. {
+ return None;
+ }
+
+ let det_inv = 1. / det;
+
+ for row in &mut inv {
+ for elem in row.iter_mut() {
+ *elem *= det_inv;
+ }
+ }
+
+ Some(Matrix4x4(inv))
+}
+
+pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
+ let m = m.0;
+ let m_0 = f32x4::from_array(m[0]);
+ let m_1 = f32x4::from_array(m[1]);
+ let m_2 = f32x4::from_array(m[2]);
+ let m_3 = f32x4::from_array(m[3]);
+
+ const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)];
+ const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)];
+ const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)];
+ const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)];
+
+ let tmp = simd_swizzle!(m_0, m_1, SHUFFLE01);
+ let row1 = simd_swizzle!(m_2, m_3, SHUFFLE01);
+
+ let row0 = simd_swizzle!(tmp, row1, SHUFFLE02);
+ let row1 = simd_swizzle!(row1, tmp, SHUFFLE13);
+
+ let tmp = simd_swizzle!(m_0, m_1, SHUFFLE23);
+ let row3 = simd_swizzle!(m_2, m_3, SHUFFLE23);
+ let row2 = simd_swizzle!(tmp, row3, SHUFFLE02);
+ let row3 = simd_swizzle!(row3, tmp, SHUFFLE13);
+
+ let tmp = (row2 * row3).reverse().rotate_lanes_right::<2>();
+ let minor0 = row1 * tmp;
+ let minor1 = row0 * tmp;
+ let tmp = tmp.rotate_lanes_right::<2>();
+ let minor0 = (row1 * tmp) - minor0;
+ let minor1 = (row0 * tmp) - minor1;
+ let minor1 = minor1.rotate_lanes_right::<2>();
+
+ let tmp = (row1 * row2).reverse().rotate_lanes_right::<2>();
+ let minor0 = (row3 * tmp) + minor0;
+ let minor3 = row0 * tmp;
+ let tmp = tmp.rotate_lanes_right::<2>();
+
+ let minor0 = minor0 - row3 * tmp;
+ let minor3 = row0 * tmp - minor3;
+ let minor3 = minor3.rotate_lanes_right::<2>();
+
+ let tmp = (row3 * row1.rotate_lanes_right::<2>())
+ .reverse()
+ .rotate_lanes_right::<2>();
+ let row2 = row2.rotate_lanes_right::<2>();
+ let minor0 = row2 * tmp + minor0;
+ let minor2 = row0 * tmp;
+ let tmp = tmp.rotate_lanes_right::<2>();
+ let minor0 = minor0 - row2 * tmp;
+ let minor2 = row0 * tmp - minor2;
+ let minor2 = minor2.rotate_lanes_right::<2>();
+
+ let tmp = (row0 * row1).reverse().rotate_lanes_right::<2>();
+ let minor2 = minor2 + row3 * tmp;
+ let minor3 = row2 * tmp - minor3;
+ let tmp = tmp.rotate_lanes_right::<2>();
+ let minor2 = row3 * tmp - minor2;
+ let minor3 = minor3 - row2 * tmp;
+
+ let tmp = (row0 * row3).reverse().rotate_lanes_right::<2>();
+ let minor1 = minor1 - row2 * tmp;
+ let minor2 = row1 * tmp + minor2;
+ let tmp = tmp.rotate_lanes_right::<2>();
+ let minor1 = row2 * tmp + minor1;
+ let minor2 = minor2 - row1 * tmp;
+
+ let tmp = (row0 * row2).reverse().rotate_lanes_right::<2>();
+ let minor1 = row3 * tmp + minor1;
+ let minor3 = minor3 - row1 * tmp;
+ let tmp = tmp.rotate_lanes_right::<2>();
+ let minor1 = minor1 - row3 * tmp;
+ let minor3 = row1 * tmp + minor3;
+
+ let det = row0 * minor0;
+ let det = det.rotate_lanes_right::<2>() + det;
+ let det = det.reverse().rotate_lanes_right::<2>() + det;
+
+ if det.reduce_sum() == 0. {
+ return None;
+ }
+ // calculate the reciprocal
+ let tmp = f32x4::splat(1.0) / det;
+ let det = tmp + tmp - det * tmp * tmp;
+
+ let res0 = minor0 * det;
+ let res1 = minor1 * det;
+ let res2 = minor2 * det;
+ let res3 = minor3 * det;
+
+ let mut m = m;
+
+ m[0] = res0.to_array();
+ m[1] = res1.to_array();
+ m[2] = res2.to_array();
+ m[3] = res3.to_array();
+
+ Some(Matrix4x4(m))
+}
+
+#[cfg(test)]
+#[rustfmt::skip]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test() {
+ let tests: &[(Matrix4x4, Option<Matrix4x4>)] = &[
+ // Identity:
+ (Matrix4x4([
+ [1., 0., 0., 0.],
+ [0., 1., 0., 0.],
+ [0., 0., 1., 0.],
+ [0., 0., 0., 1.],
+ ]),
+ Some(Matrix4x4([
+ [1., 0., 0., 0.],
+ [0., 1., 0., 0.],
+ [0., 0., 1., 0.],
+ [0., 0., 0., 1.],
+ ]))
+ ),
+ // None:
+ (Matrix4x4([
+ [1., 2., 3., 4.],
+ [12., 11., 10., 9.],
+ [5., 6., 7., 8.],
+ [16., 15., 14., 13.],
+ ]),
+ None
+ ),
+ // Other:
+ (Matrix4x4([
+ [1., 1., 1., 0.],
+ [0., 3., 1., 2.],
+ [2., 3., 1., 0.],
+ [1., 0., 2., 1.],
+ ]),
+ Some(Matrix4x4([
+ [-3., -0.5, 1.5, 1.0],
+ [ 1., 0.25, -0.25, -0.5],
+ [ 3., 0.25, -1.25, -0.5],
+ [-3., 0.0, 1.0, 1.0],
+ ]))
+ ),
+
+
+ ];
+
+ for &(input, output) in tests {
+ assert_eq!(scalar_inv4x4(input), output);
+ assert_eq!(simd_inv4x4(input), output);
+ }
+ }
+}
+
+fn main() {
+ // Empty main to make cargo happy
+}
diff --git a/library/portable-simd/crates/core_simd/examples/nbody.rs b/library/portable-simd/crates/core_simd/examples/nbody.rs
new file mode 100644
index 000000000..df38a0096
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/examples/nbody.rs
@@ -0,0 +1,193 @@
+#![feature(portable_simd)]
+extern crate std_float;
+
+/// Benchmarks game nbody code
+/// Taken from the `packed_simd` crate
+/// Run this benchmark with `cargo test --example nbody`
+mod nbody {
+ use core_simd::simd::*;
+ #[allow(unused)] // False positive?
+ use std_float::StdFloat;
+
+ use std::f64::consts::PI;
+ const SOLAR_MASS: f64 = 4.0 * PI * PI;
+ const DAYS_PER_YEAR: f64 = 365.24;
+
+ #[derive(Debug, Clone, Copy)]
+ struct Body {
+ pub x: f64x4,
+ pub v: f64x4,
+ pub mass: f64,
+ }
+
+ const N_BODIES: usize = 5;
+ const BODIES: [Body; N_BODIES] = [
+ // sun:
+ Body {
+ x: f64x4::from_array([0., 0., 0., 0.]),
+ v: f64x4::from_array([0., 0., 0., 0.]),
+ mass: SOLAR_MASS,
+ },
+ // jupiter:
+ Body {
+ x: f64x4::from_array([
+ 4.84143144246472090e+00,
+ -1.16032004402742839e+00,
+ -1.03622044471123109e-01,
+ 0.,
+ ]),
+ v: f64x4::from_array([
+ 1.66007664274403694e-03 * DAYS_PER_YEAR,
+ 7.69901118419740425e-03 * DAYS_PER_YEAR,
+ -6.90460016972063023e-05 * DAYS_PER_YEAR,
+ 0.,
+ ]),
+ mass: 9.54791938424326609e-04 * SOLAR_MASS,
+ },
+ // saturn:
+ Body {
+ x: f64x4::from_array([
+ 8.34336671824457987e+00,
+ 4.12479856412430479e+00,
+ -4.03523417114321381e-01,
+ 0.,
+ ]),
+ v: f64x4::from_array([
+ -2.76742510726862411e-03 * DAYS_PER_YEAR,
+ 4.99852801234917238e-03 * DAYS_PER_YEAR,
+ 2.30417297573763929e-05 * DAYS_PER_YEAR,
+ 0.,
+ ]),
+ mass: 2.85885980666130812e-04 * SOLAR_MASS,
+ },
+ // uranus:
+ Body {
+ x: f64x4::from_array([
+ 1.28943695621391310e+01,
+ -1.51111514016986312e+01,
+ -2.23307578892655734e-01,
+ 0.,
+ ]),
+ v: f64x4::from_array([
+ 2.96460137564761618e-03 * DAYS_PER_YEAR,
+ 2.37847173959480950e-03 * DAYS_PER_YEAR,
+ -2.96589568540237556e-05 * DAYS_PER_YEAR,
+ 0.,
+ ]),
+ mass: 4.36624404335156298e-05 * SOLAR_MASS,
+ },
+ // neptune:
+ Body {
+ x: f64x4::from_array([
+ 1.53796971148509165e+01,
+ -2.59193146099879641e+01,
+ 1.79258772950371181e-01,
+ 0.,
+ ]),
+ v: f64x4::from_array([
+ 2.68067772490389322e-03 * DAYS_PER_YEAR,
+ 1.62824170038242295e-03 * DAYS_PER_YEAR,
+ -9.51592254519715870e-05 * DAYS_PER_YEAR,
+ 0.,
+ ]),
+ mass: 5.15138902046611451e-05 * SOLAR_MASS,
+ },
+ ];
+
+ fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
+ let (sun, rest) = bodies.split_at_mut(1);
+ let sun = &mut sun[0];
+ for body in rest {
+ let m_ratio = body.mass / SOLAR_MASS;
+ sun.v -= body.v * Simd::splat(m_ratio);
+ }
+ }
+
+ fn energy(bodies: &[Body; N_BODIES]) -> f64 {
+ let mut e = 0.;
+ for i in 0..N_BODIES {
+ let bi = &bodies[i];
+ e += bi.mass * (bi.v * bi.v).reduce_sum() * 0.5;
+ for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
+ let dx = bi.x - bj.x;
+ e -= bi.mass * bj.mass / (dx * dx).reduce_sum().sqrt()
+ }
+ }
+ e
+ }
+
+ fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
+ const N: usize = N_BODIES * (N_BODIES - 1) / 2;
+
+ // compute distance between bodies:
+ let mut r = [f64x4::splat(0.); N];
+ {
+ let mut i = 0;
+ for j in 0..N_BODIES {
+ for k in j + 1..N_BODIES {
+ r[i] = bodies[j].x - bodies[k].x;
+ i += 1;
+ }
+ }
+ }
+
+ let mut mag = [0.0; N];
+ for i in (0..N).step_by(2) {
+ let d2s = f64x2::from_array([
+ (r[i] * r[i]).reduce_sum(),
+ (r[i + 1] * r[i + 1]).reduce_sum(),
+ ]);
+ let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
+ mag[i] = dmags[0];
+ mag[i + 1] = dmags[1];
+ }
+
+ let mut i = 0;
+ for j in 0..N_BODIES {
+ for k in j + 1..N_BODIES {
+ let f = r[i] * Simd::splat(mag[i]);
+ bodies[j].v -= f * Simd::splat(bodies[k].mass);
+ bodies[k].v += f * Simd::splat(bodies[j].mass);
+ i += 1
+ }
+ }
+ for body in bodies {
+ body.x += Simd::splat(dt) * body.v
+ }
+ }
+
+ pub fn run(n: usize) -> (f64, f64) {
+ let mut bodies = BODIES;
+ offset_momentum(&mut bodies);
+ let energy_before = energy(&bodies);
+ for _ in 0..n {
+ advance(&mut bodies, 0.01);
+ }
+ let energy_after = energy(&bodies);
+
+ (energy_before, energy_after)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ // Good enough for demonstration purposes, not going for strictness here.
+ fn approx_eq_f64(a: f64, b: f64) -> bool {
+ (a - b).abs() < 0.00001
+ }
+ #[test]
+ fn test() {
+ const OUTPUT: [f64; 2] = [-0.169075164, -0.169087605];
+ let (energy_before, energy_after) = super::nbody::run(1000);
+ assert!(approx_eq_f64(energy_before, OUTPUT[0]));
+ assert!(approx_eq_f64(energy_after, OUTPUT[1]));
+ }
+}
+
+fn main() {
+ {
+ let (energy_before, energy_after) = nbody::run(1000);
+ println!("Energy before: {energy_before}");
+ println!("Energy after: {energy_after}");
+ }
+}
diff --git a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
new file mode 100644
index 000000000..012182e09
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
@@ -0,0 +1,77 @@
+#![feature(portable_simd)]
+
+use core_simd::simd::*;
+
+fn a(i: usize, j: usize) -> f64 {
+ ((i + j) * (i + j + 1) / 2 + i + 1) as f64
+}
+
+fn mult_av(v: &[f64], out: &mut [f64]) {
+ assert!(v.len() == out.len());
+ assert!(v.len() % 2 == 0);
+
+ for (i, out) in out.iter_mut().enumerate() {
+ let mut sum = f64x2::splat(0.0);
+
+ let mut j = 0;
+ while j < v.len() {
+ let b = f64x2::from_slice(&v[j..]);
+ let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
+ sum += b / a;
+ j += 2
+ }
+ *out = sum.reduce_sum();
+ }
+}
+
+fn mult_atv(v: &[f64], out: &mut [f64]) {
+ assert!(v.len() == out.len());
+ assert!(v.len() % 2 == 0);
+
+ for (i, out) in out.iter_mut().enumerate() {
+ let mut sum = f64x2::splat(0.0);
+
+ let mut j = 0;
+ while j < v.len() {
+ let b = f64x2::from_slice(&v[j..]);
+ let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
+ sum += b / a;
+ j += 2
+ }
+ *out = sum.reduce_sum();
+ }
+}
+
+fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
+ mult_av(v, tmp);
+ mult_atv(tmp, out);
+}
+
+pub fn spectral_norm(n: usize) -> f64 {
+ assert!(n % 2 == 0, "only even lengths are accepted");
+
+ let mut u = vec![1.0; n];
+ let mut v = u.clone();
+ let mut tmp = u.clone();
+
+ for _ in 0..10 {
+ mult_atav(&u, &mut v, &mut tmp);
+ mult_atav(&v, &mut u, &mut tmp);
+ }
+ (dot(&u, &v) / dot(&v, &v)).sqrt()
+}
+
+fn dot(x: &[f64], y: &[f64]) -> f64 {
+ // This is auto-vectorized:
+ x.iter().zip(y).map(|(&x, &y)| x * y).sum()
+}
+
+#[cfg(test)]
+#[test]
+fn test() {
+ assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
+}
+
+fn main() {
+ // Empty main to make cargo happy
+}