From ef24de24a82fe681581cc130f342363c47c0969a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 7 Jun 2024 07:48:48 +0200 Subject: Merging upstream version 1.75.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/packed_simd/src/codegen/math/float.rs | 19 + vendor/packed_simd/src/codegen/math/float/abs.rs | 103 +++++ vendor/packed_simd/src/codegen/math/float/cos.rs | 103 +++++ .../packed_simd/src/codegen/math/float/cos_pi.rs | 87 ++++ vendor/packed_simd/src/codegen/math/float/exp.rs | 112 +++++ vendor/packed_simd/src/codegen/math/float/ln.rs | 112 +++++ .../packed_simd/src/codegen/math/float/macros.rs | 470 +++++++++++++++++++++ .../packed_simd/src/codegen/math/float/mul_add.rs | 109 +++++ .../packed_simd/src/codegen/math/float/mul_adde.rs | 60 +++ vendor/packed_simd/src/codegen/math/float/powf.rs | 112 +++++ vendor/packed_simd/src/codegen/math/float/sin.rs | 103 +++++ .../src/codegen/math/float/sin_cos_pi.rs | 188 +++++++++ .../packed_simd/src/codegen/math/float/sin_pi.rs | 87 ++++ vendor/packed_simd/src/codegen/math/float/sqrt.rs | 103 +++++ vendor/packed_simd/src/codegen/math/float/sqrte.rs | 67 +++ vendor/packed_simd/src/codegen/math/float/tanh.rs | 120 ++++++ 16 files changed, 1955 insertions(+) create mode 100644 vendor/packed_simd/src/codegen/math/float.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/abs.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/cos.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/cos_pi.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/exp.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/ln.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/macros.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/mul_add.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/mul_adde.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/powf.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/sin.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/sin_cos_pi.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/sin_pi.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/sqrt.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/sqrte.rs create mode 100644 vendor/packed_simd/src/codegen/math/float/tanh.rs (limited to 'vendor/packed_simd/src/codegen/math') diff --git a/vendor/packed_simd/src/codegen/math/float.rs b/vendor/packed_simd/src/codegen/math/float.rs new file mode 100644 index 000000000..10d21831f --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float.rs @@ -0,0 +1,19 @@ +//! Vertical floating-point math operations. +#![allow(clippy::useless_transmute)] + +#[macro_use] +pub(crate) mod macros; +pub(crate) mod abs; +pub(crate) mod cos; +pub(crate) mod cos_pi; +pub(crate) mod exp; +pub(crate) mod ln; +pub(crate) mod mul_add; +pub(crate) mod mul_adde; +pub(crate) mod powf; +pub(crate) mod sin; +pub(crate) mod sin_cos_pi; +pub(crate) mod sin_pi; +pub(crate) mod sqrt; +pub(crate) mod sqrte; +pub(crate) mod tanh; diff --git a/vendor/packed_simd/src/codegen/math/float/abs.rs b/vendor/packed_simd/src/codegen/math/float/abs.rs new file mode 100644 index 000000000..34aacc25b --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/abs.rs @@ -0,0 +1,103 @@ +//! Vertical floating-point `fabs` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors fabs + +use crate::*; + +pub(crate) trait Abs { + fn abs(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fabs.v2f32"] + fn fabs_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.fabs.v4f32"] + fn fabs_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.fabs.v8f32"] + fn fabs_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.fabs.v16f32"] + fn fabs_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit fabsgle elem vectors + #[link_name = "llvm.fabs.v1f64"] + fn fabs_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.fabs.v2f64"] + fn fabs_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.fabs.v4f64"] + fn fabs_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.fabs.v8f64"] + fn fabs_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.fabs.f32"] + fn fabs_f32(x: f32) -> f32; + #[link_name = "llvm.fabs.f64"] + fn fabs_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Abs, abs); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: fabs_f32); + impl_unary!(f32x4[f32; 4]: fabs_f32); + impl_unary!(f32x8[f32; 8]: fabs_f32); + impl_unary!(f32x16[f32; 16]: fabs_f32); + + impl_unary!(f64x2[f64; 2]: fabs_f64); + impl_unary!(f64x4[f64; 4]: fabs_f64); + impl_unary!(f64x8[f64; 8]: fabs_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2); + + impl_unary!(f32x4: Sleef_fabsf4_avx2128); + impl_unary!(f32x8: Sleef_fabsf8_avx2); + impl_unary!(f64x2: Sleef_fabsd2_avx2128); + impl_unary!(f64x4: Sleef_fabsd4_avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx); + impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx); + + impl_unary!(f32x4: Sleef_fabsf4_sse4); + impl_unary!(f32x8: Sleef_fabsf8_avx); + impl_unary!(f64x2: Sleef_fabsd2_sse4); + impl_unary!(f64x4: Sleef_fabsd4_avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4); + + impl_unary!(f32x4: Sleef_fabsf4_sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4); + impl_unary!(f64x2: Sleef_fabsd2_sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4); + } else { + impl_unary!(f32x2[f32; 2]: fabs_f32); + impl_unary!(f32x16: fabs_v16f32); + impl_unary!(f64x8: fabs_v8f64); + + impl_unary!(f32x4: fabs_v4f32); + impl_unary!(f32x8: fabs_v8f32); + impl_unary!(f64x2: fabs_v2f64); + impl_unary!(f64x4: fabs_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: fabs_f32); + impl_unary!(f32x4: fabs_v4f32); + impl_unary!(f32x8: fabs_v8f32); + impl_unary!(f32x16: fabs_v16f32); + + impl_unary!(f64x2: fabs_v2f64); + impl_unary!(f64x4: fabs_v4f64); + impl_unary!(f64x8: fabs_v8f64); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/cos.rs b/vendor/packed_simd/src/codegen/math/float/cos.rs new file mode 100644 index 000000000..dec390cb7 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/cos.rs @@ -0,0 +1,103 @@ +//! Vertical floating-point `cos` +#![allow(unused)] + +// FIXME 64-bit 1 elem vector cos + +use crate::*; + +pub(crate) trait Cos { + fn cos(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.cos.v2f32"] + fn cos_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.cos.v4f32"] + fn cos_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.cos.v8f32"] + fn cos_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.cos.v16f32"] + fn cos_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit cosgle elem vectors + #[link_name = "llvm.cos.v1f64"] + fn cos_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.cos.v2f64"] + fn cos_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.cos.v4f64"] + fn cos_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.cos.v8f64"] + fn cos_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.cos.f32"] + fn cos_f32(x: f32) -> f32; + #[link_name = "llvm.cos.f64"] + fn cos_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Cos, cos); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: cos_f32); + impl_unary!(f32x4[f32; 4]: cos_f32); + impl_unary!(f32x8[f32; 8]: cos_f32); + impl_unary!(f32x16[f32; 16]: cos_f32); + + impl_unary!(f64x2[f64; 2]: cos_f64); + impl_unary!(f64x4[f64; 4]: cos_f64); + impl_unary!(f64x8[f64; 8]: cos_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2); + + impl_unary!(f32x4: Sleef_cosf4_u10avx2128); + impl_unary!(f32x8: Sleef_cosf8_u10avx2); + impl_unary!(f64x2: Sleef_cosd2_u10avx2128); + impl_unary!(f64x4: Sleef_cosd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx); + + impl_unary!(f32x4: Sleef_cosf4_u10sse4); + impl_unary!(f32x8: Sleef_cosf8_u10avx); + impl_unary!(f64x2: Sleef_cosd2_u10sse4); + impl_unary!(f64x4: Sleef_cosd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4); + + impl_unary!(f32x4: Sleef_cosf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4); + impl_unary!(f64x2: Sleef_cosd2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4); + } else { + impl_unary!(f32x2[f32; 2]: cos_f32); + impl_unary!(f32x16: cos_v16f32); + impl_unary!(f64x8: cos_v8f64); + + impl_unary!(f32x4: cos_v4f32); + impl_unary!(f32x8: cos_v8f32); + impl_unary!(f64x2: cos_v2f64); + impl_unary!(f64x4: cos_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: cos_f32); + impl_unary!(f32x4: cos_v4f32); + impl_unary!(f32x8: cos_v8f32); + impl_unary!(f32x16: cos_v16f32); + + impl_unary!(f64x2: cos_v2f64); + impl_unary!(f64x4: cos_v4f64); + impl_unary!(f64x8: cos_v8f64); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/cos_pi.rs b/vendor/packed_simd/src/codegen/math/float/cos_pi.rs new file mode 100644 index 000000000..e283280ee --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/cos_pi.rs @@ -0,0 +1,87 @@ +//! Vertical floating-point `cos` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors cos_pi + +use crate::*; + +pub(crate) trait CosPi { + fn cos_pi(self) -> Self; +} + +gen_unary_impl_table!(CosPi, cos_pi); + +macro_rules! impl_def { + ($vid:ident, $PI:path) => { + impl CosPi for $vid { + #[inline] + fn cos_pi(self) -> Self { + (self * Self::splat($PI)).cos() + } + } + }; +} +macro_rules! impl_def32 { + ($vid:ident) => { + impl_def!($vid, crate::f32::consts::PI); + }; +} +macro_rules! impl_def64 { + ($vid:ident) => { + impl_def!($vid, crate::f64::consts::PI); + }; +} + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2); + + impl_unary!(f32x4: Sleef_cospif4_u05avx2128); + impl_unary!(f32x8: Sleef_cospif8_u05avx2); + impl_unary!(f64x2: Sleef_cospid2_u05avx2128); + impl_unary!(f64x4: Sleef_cospid4_u05avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx); + impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx); + + impl_unary!(f32x4: Sleef_cospif4_u05sse4); + impl_unary!(f32x8: Sleef_cospif8_u05avx); + impl_unary!(f64x2: Sleef_cospid2_u05sse4); + impl_unary!(f64x4: Sleef_cospid4_u05avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4); + + impl_unary!(f32x4: Sleef_cospif4_u05sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4); + impl_unary!(f64x2: Sleef_cospid2_u05sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4); + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } + } + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/exp.rs b/vendor/packed_simd/src/codegen/math/float/exp.rs new file mode 100644 index 000000000..a7b20580e --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/exp.rs @@ -0,0 +1,112 @@ +//! Vertical floating-point `exp` +#![allow(unused)] + +// FIXME 64-bit expgle elem vectors misexpg + +use crate::*; + +pub(crate) trait Exp { + fn exp(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.exp.v2f32"] + fn exp_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.exp.v4f32"] + fn exp_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.exp.v8f32"] + fn exp_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.exp.v16f32"] + fn exp_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit expgle elem vectors + #[link_name = "llvm.exp.v1f64"] + fn exp_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.exp.v2f64"] + fn exp_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.exp.v4f64"] + fn exp_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.exp.v8f64"] + fn exp_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.exp.f32"] + fn exp_f32(x: f32) -> f32; + #[link_name = "llvm.exp.f64"] + fn exp_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Exp, exp); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: exp_f32); + impl_unary!(f32x4[f32; 4]: exp_f32); + impl_unary!(f32x8[f32; 8]: exp_f32); + impl_unary!(f32x16[f32; 16]: exp_f32); + + impl_unary!(f64x2[f64; 2]: exp_f64); + impl_unary!(f64x4[f64; 4]: exp_f64); + impl_unary!(f64x8[f64; 8]: exp_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2); + + impl_unary!(f32x4: Sleef_expf4_u10avx2128); + impl_unary!(f32x8: Sleef_expf8_u10avx2); + impl_unary!(f64x2: Sleef_expd2_u10avx2128); + impl_unary!(f64x4: Sleef_expd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx); + + impl_unary!(f32x4: Sleef_expf4_u10sse4); + impl_unary!(f32x8: Sleef_expf8_u10avx); + impl_unary!(f64x2: Sleef_expd2_u10sse4); + impl_unary!(f64x4: Sleef_expd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4); + + impl_unary!(f32x4: Sleef_expf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4); + impl_unary!(f64x2: Sleef_expd2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4); + } else if #[cfg(target_feature = "sse2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2); + impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2); + impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2); + + impl_unary!(f32x4: Sleef_expf4_u10sse2); + impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2); + impl_unary!(f64x2: Sleef_expd2_u10sse2); + impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2); + } else { + impl_unary!(f32x2[f32; 2]: exp_f32); + impl_unary!(f32x16: exp_v16f32); + impl_unary!(f64x8: exp_v8f64); + + impl_unary!(f32x4: exp_v4f32); + impl_unary!(f32x8: exp_v8f32); + impl_unary!(f64x2: exp_v2f64); + impl_unary!(f64x4: exp_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: exp_f32); + impl_unary!(f32x4: exp_v4f32); + impl_unary!(f32x8: exp_v8f32); + impl_unary!(f32x16: exp_v16f32); + + impl_unary!(f64x2: exp_v2f64); + impl_unary!(f64x4: exp_v4f64); + impl_unary!(f64x8: exp_v8f64); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/ln.rs b/vendor/packed_simd/src/codegen/math/float/ln.rs new file mode 100644 index 000000000..a5e38cb40 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/ln.rs @@ -0,0 +1,112 @@ +//! Vertical floating-point `ln` +#![allow(unused)] + +// FIXME 64-bit lngle elem vectors mislng + +use crate::*; + +pub(crate) trait Ln { + fn ln(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.log.v2f32"] + fn ln_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.log.v4f32"] + fn ln_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.log.v8f32"] + fn ln_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.log.v16f32"] + fn ln_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit lngle elem vectors + #[link_name = "llvm.log.v1f64"] + fn ln_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.log.v2f64"] + fn ln_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.log.v4f64"] + fn ln_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.log.v8f64"] + fn ln_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.log.f32"] + fn ln_f32(x: f32) -> f32; + #[link_name = "llvm.log.f64"] + fn ln_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Ln, ln); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: ln_f32); + impl_unary!(f32x4[f32; 4]: ln_f32); + impl_unary!(f32x8[f32; 8]: ln_f32); + impl_unary!(f32x16[f32; 16]: ln_f32); + + impl_unary!(f64x2[f64; 2]: ln_f64); + impl_unary!(f64x4[f64; 4]: ln_f64); + impl_unary!(f64x8[f64; 8]: ln_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2); + + impl_unary!(f32x4: Sleef_logf4_u10avx2128); + impl_unary!(f32x8: Sleef_logf8_u10avx2); + impl_unary!(f64x2: Sleef_logd2_u10avx2128); + impl_unary!(f64x4: Sleef_logd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx); + + impl_unary!(f32x4: Sleef_logf4_u10sse4); + impl_unary!(f32x8: Sleef_logf8_u10avx); + impl_unary!(f64x2: Sleef_logd2_u10sse4); + impl_unary!(f64x4: Sleef_logd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4); + + impl_unary!(f32x4: Sleef_logf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4); + impl_unary!(f64x2: Sleef_logd2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4); + } else if #[cfg(target_feature = "sse2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2); + impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2); + impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2); + + impl_unary!(f32x4: Sleef_logf4_u10sse2); + impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2); + impl_unary!(f64x2: Sleef_logd2_u10sse2); + impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2); + } else { + impl_unary!(f32x2[f32; 2]: ln_f32); + impl_unary!(f32x16: ln_v16f32); + impl_unary!(f64x8: ln_v8f64); + + impl_unary!(f32x4: ln_v4f32); + impl_unary!(f32x8: ln_v8f32); + impl_unary!(f64x2: ln_v2f64); + impl_unary!(f64x4: ln_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: ln_f32); + impl_unary!(f32x4: ln_v4f32); + impl_unary!(f32x8: ln_v8f32); + impl_unary!(f32x16: ln_v16f32); + + impl_unary!(f64x2: ln_v2f64); + impl_unary!(f64x4: ln_v4f64); + impl_unary!(f64x8: ln_v8f64); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/macros.rs b/vendor/packed_simd/src/codegen/math/float/macros.rs new file mode 100644 index 000000000..8daee1afe --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/macros.rs @@ -0,0 +1,470 @@ +//! Utility macros +#![allow(unused)] + +macro_rules! impl_unary_ { + // implementation mapping 1:1 + (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::transmute; + transmute($fun(transmute(self))) + } + } + } + }; + // implementation mapping 1:1 for when `$fun` is a generic function + // like some of the fp math rustc intrinsics (e.g. `fn fun(x: T) -> T`). + (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::transmute; + transmute($fun(self.0)) + } + } + } + }; + (scalar | $trait_id:ident, $trait_method:ident, + $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + union U { + vec: $vec_id, + scalars: [$sid; $scount], + } + let mut scalars = U { vec: self }.scalars; + for i in &mut scalars { + *i = $fun(*i); + } + U { scalars }.vec + } + } + } + }; + // implementation calling fun twice on each of the vector halves: + (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vech_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + halves: [$vech_id; 2], + } + + let mut halves = U { vec: self }.halves; + + *halves.get_unchecked_mut(0) = transmute($fun(transmute(*halves.get_unchecked(0)))); + *halves.get_unchecked_mut(1) = transmute($fun(transmute(*halves.get_unchecked(1)))); + + U { halves }.vec + } + } + } + }; + // implementation calling fun four times on each of the vector quarters: + (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vecq_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + quarters: [$vecq_id; 4], + } + + let mut quarters = U { vec: self }.quarters; + + *quarters.get_unchecked_mut(0) = transmute($fun(transmute(*quarters.get_unchecked(0)))); + *quarters.get_unchecked_mut(1) = transmute($fun(transmute(*quarters.get_unchecked(1)))); + *quarters.get_unchecked_mut(2) = transmute($fun(transmute(*quarters.get_unchecked(2)))); + *quarters.get_unchecked_mut(3) = transmute($fun(transmute(*quarters.get_unchecked(3)))); + + U { quarters }.vec + } + } + } + }; + // implementation calling fun once on a vector twice as large: + (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vect_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::{transmute, uninitialized}; + + union U { + vec: [$vec_id; 2], + twice: $vect_id, + } + + let twice = U { vec: [self, uninitialized()] }.twice; + let twice = transmute($fun(transmute(twice))); + + *(U { twice }.vec.get_unchecked(0)) + } + } + } + }; +} + +macro_rules! gen_unary_impl_table { + ($trait_id:ident, $trait_method:ident) => { + macro_rules! impl_unary { + ($vid:ident: $fun:ident) => { + impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[g]: $fun:ident) => { + impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { + impl_unary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun); + }; + ($vid:ident[s]: $fun:ident) => { + impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[h => $vid_h:ident]: $fun:ident) => { + impl_unary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun); + }; + ($vid:ident[q => $vid_q:ident]: $fun:ident) => { + impl_unary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun); + }; + ($vid:ident[t => $vid_t:ident]: $fun:ident) => { + impl_unary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun); + }; + } + }; +} + +macro_rules! impl_tertiary_ { + // implementation mapping 1:1 + (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + use crate::mem::transmute; + transmute($fun(transmute(self), transmute(y), transmute(z))) + } + } + } + }; + (scalar | $trait_id:ident, $trait_method:ident, + $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + union U { + vec: $vec_id, + scalars: [$sid; $scount], + } + let mut x = U { vec: self }.scalars; + let y = U { vec: y }.scalars; + let z = U { vec: z }.scalars; + for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) { + *i = $fun(*i, *y, *z); + } + U { vec: x }.vec + } + } + } + }; + // implementation calling fun twice on each of the vector halves: + (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vech_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + halves: [$vech_id; 2], + } + + let mut x_halves = U { vec: self }.halves; + let y_halves = U { vec: y }.halves; + let z_halves = U { vec: z }.halves; + + *x_halves.get_unchecked_mut(0) = transmute($fun( + transmute(*x_halves.get_unchecked(0)), + transmute(*y_halves.get_unchecked(0)), + transmute(*z_halves.get_unchecked(0)), + )); + *x_halves.get_unchecked_mut(1) = transmute($fun( + transmute(*x_halves.get_unchecked(1)), + transmute(*y_halves.get_unchecked(1)), + transmute(*z_halves.get_unchecked(1)), + )); + + U { halves: x_halves }.vec + } + } + } + }; + // implementation calling fun four times on each of the vector quarters: + (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vecq_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + quarters: [$vecq_id; 4], + } + + let mut x_quarters = U { vec: self }.quarters; + let y_quarters = U { vec: y }.quarters; + let z_quarters = U { vec: z }.quarters; + + *x_quarters.get_unchecked_mut(0) = transmute($fun( + transmute(*x_quarters.get_unchecked(0)), + transmute(*y_quarters.get_unchecked(0)), + transmute(*z_quarters.get_unchecked(0)), + )); + + *x_quarters.get_unchecked_mut(1) = transmute($fun( + transmute(*x_quarters.get_unchecked(1)), + transmute(*y_quarters.get_unchecked(1)), + transmute(*z_quarters.get_unchecked(1)), + )); + + *x_quarters.get_unchecked_mut(2) = transmute($fun( + transmute(*x_quarters.get_unchecked(2)), + transmute(*y_quarters.get_unchecked(2)), + transmute(*z_quarters.get_unchecked(2)), + )); + + *x_quarters.get_unchecked_mut(3) = transmute($fun( + transmute(*x_quarters.get_unchecked(3)), + transmute(*y_quarters.get_unchecked(3)), + transmute(*z_quarters.get_unchecked(3)), + )); + + U { quarters: x_quarters }.vec + } + } + } + }; + // implementation calling fun once on a vector twice as large: + (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vect_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + use crate::mem::{transmute, uninitialized}; + + union U { + vec: [$vec_id; 2], + twice: $vect_id, + } + + let x_twice = U { vec: [self, uninitialized()] }.twice; + let y_twice = U { vec: [y, uninitialized()] }.twice; + let z_twice = U { vec: [z, uninitialized()] }.twice; + let twice: $vect_id = + transmute($fun(transmute(x_twice), transmute(y_twice), transmute(z_twice))); + + *(U { twice }.vec.get_unchecked(0)) + } + } + } + }; +} + +macro_rules! gen_tertiary_impl_table { + ($trait_id:ident, $trait_method:ident) => { + macro_rules! impl_tertiary { + ($vid:ident: $fun:ident) => { + impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { + impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun); + }; + ($vid:ident[s]: $fun:ident) => { + impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[h => $vid_h:ident]: $fun:ident) => { + impl_tertiary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun); + }; + ($vid:ident[q => $vid_q:ident]: $fun:ident) => { + impl_tertiary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun); + }; + ($vid:ident[t => $vid_t:ident]: $fun:ident) => { + impl_tertiary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun); + }; + } + }; +} + +macro_rules! impl_binary_ { + // implementation mapping 1:1 + (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + use crate::mem::transmute; + transmute($fun(transmute(self), transmute(y))) + } + } + } + }; + (scalar | $trait_id:ident, $trait_method:ident, + $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + union U { + vec: $vec_id, + scalars: [$sid; $scount], + } + let mut x = U { vec: self }.scalars; + let y = U { vec: y }.scalars; + for (x, y) in x.iter_mut().zip(&y) { + *x = $fun(*x, *y); + } + U { scalars: x }.vec + } + } + } + }; + // implementation calling fun twice on each of the vector halves: + (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vech_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + halves: [$vech_id; 2], + } + + let mut x_halves = U { vec: self }.halves; + let y_halves = U { vec: y }.halves; + + *x_halves.get_unchecked_mut(0) = transmute($fun( + transmute(*x_halves.get_unchecked(0)), + transmute(*y_halves.get_unchecked(0)), + )); + *x_halves.get_unchecked_mut(1) = transmute($fun( + transmute(*x_halves.get_unchecked(1)), + transmute(*y_halves.get_unchecked(1)), + )); + + U { halves: x_halves }.vec + } + } + } + }; + // implementation calling fun four times on each of the vector quarters: + (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vecq_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + quarters: [$vecq_id; 4], + } + + let mut x_quarters = U { vec: self }.quarters; + let y_quarters = U { vec: y }.quarters; + + *x_quarters.get_unchecked_mut(0) = transmute($fun( + transmute(*x_quarters.get_unchecked(0)), + transmute(*y_quarters.get_unchecked(0)), + )); + + *x_quarters.get_unchecked_mut(1) = transmute($fun( + transmute(*x_quarters.get_unchecked(1)), + transmute(*y_quarters.get_unchecked(1)), + )); + + *x_quarters.get_unchecked_mut(2) = transmute($fun( + transmute(*x_quarters.get_unchecked(2)), + transmute(*y_quarters.get_unchecked(2)), + )); + + *x_quarters.get_unchecked_mut(3) = transmute($fun( + transmute(*x_quarters.get_unchecked(3)), + transmute(*y_quarters.get_unchecked(3)), + )); + + U { quarters: x_quarters }.vec + } + } + } + }; + // implementation calling fun once on a vector twice as large: + (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vect_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + use crate::mem::{transmute, uninitialized}; + + union U { + vec: [$vec_id; 2], + twice: $vect_id, + } + + let x_twice = U { vec: [self, uninitialized()] }.twice; + let y_twice = U { vec: [y, uninitialized()] }.twice; + let twice: $vect_id = transmute($fun(transmute(x_twice), transmute(y_twice))); + + *(U { twice }.vec.get_unchecked(0)) + } + } + } + }; +} + +macro_rules! gen_binary_impl_table { + ($trait_id:ident, $trait_method:ident) => { + macro_rules! impl_binary { + ($vid:ident: $fun:ident) => { + impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { + impl_binary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun); + }; + ($vid:ident[s]: $fun:ident) => { + impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[h => $vid_h:ident]: $fun:ident) => { + impl_binary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun); + }; + ($vid:ident[q => $vid_q:ident]: $fun:ident) => { + impl_binary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun); + }; + ($vid:ident[t => $vid_t:ident]: $fun:ident) => { + impl_binary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun); + }; + } + }; +} diff --git a/vendor/packed_simd/src/codegen/math/float/mul_add.rs b/vendor/packed_simd/src/codegen/math/float/mul_add.rs new file mode 100644 index 000000000..d37f30fa8 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/mul_add.rs @@ -0,0 +1,109 @@ +//! Vertical floating-point `mul_add` +#![allow(unused)] +use crate::*; + +// FIXME: 64-bit 1 element mul_add + +pub(crate) trait MulAdd { + fn mul_add(self, y: Self, z: Self) -> Self; +} + +#[cfg(not(target_arch = "s390x"))] +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fma.v2f32"] + fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; + #[link_name = "llvm.fma.v4f32"] + fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; + #[link_name = "llvm.fma.v8f32"] + fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; + #[link_name = "llvm.fma.v16f32"] + fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; + /* FIXME 64-bit single elem vectors + #[link_name = "llvm.fma.v1f64"] + fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; + */ + #[link_name = "llvm.fma.v2f64"] + fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; + #[link_name = "llvm.fma.v4f64"] + fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; + #[link_name = "llvm.fma.v8f64"] + fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; +} + +gen_tertiary_impl_table!(MulAdd, mul_add); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + macro_rules! impl_broken { + ($id:ident) => { + impl MulAdd for $id { + #[inline] + fn mul_add(self, y: Self, z: Self) -> Self { + self * y + z + } + } + }; + } + + impl_broken!(f32x2); + impl_broken!(f32x4); + impl_broken!(f32x8); + impl_broken!(f32x16); + + impl_broken!(f64x2); + impl_broken!(f64x4); + impl_broken!(f64x8); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128); + impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2); + impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2); + + impl_tertiary!(f32x4: Sleef_fmaf4_avx2128); + impl_tertiary!(f32x8: Sleef_fmaf8_avx2); + impl_tertiary!(f64x2: Sleef_fmad2_avx2128); + impl_tertiary!(f64x4: Sleef_fmad4_avx2); + } else if #[cfg(target_feature = "avx")] { + impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); + impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx); + impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx); + + impl_tertiary!(f32x4: Sleef_fmaf4_sse4); + impl_tertiary!(f32x8: Sleef_fmaf8_avx); + impl_tertiary!(f64x2: Sleef_fmad2_sse4); + impl_tertiary!(f64x4: Sleef_fmad4_avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); + impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4); + impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4); + + impl_tertiary!(f32x4: Sleef_fmaf4_sse4); + impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4); + impl_tertiary!(f64x2: Sleef_fmad2_sse4); + impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4); + } else { + impl_tertiary!(f32x2: fma_v2f32); + impl_tertiary!(f32x16: fma_v16f32); + impl_tertiary!(f64x8: fma_v8f64); + + impl_tertiary!(f32x4: fma_v4f32); + impl_tertiary!(f32x8: fma_v8f32); + impl_tertiary!(f64x2: fma_v2f64); + impl_tertiary!(f64x4: fma_v4f64); + } + } + } else { + impl_tertiary!(f32x2: fma_v2f32); + impl_tertiary!(f32x4: fma_v4f32); + impl_tertiary!(f32x8: fma_v8f32); + impl_tertiary!(f32x16: fma_v16f32); + // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors + impl_tertiary!(f64x2: fma_v2f64); + impl_tertiary!(f64x4: fma_v4f64); + impl_tertiary!(f64x8: fma_v8f64); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/mul_adde.rs b/vendor/packed_simd/src/codegen/math/float/mul_adde.rs new file mode 100644 index 000000000..c0baeacec --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/mul_adde.rs @@ -0,0 +1,60 @@ +//! Approximation for floating-point `mul_add` +use crate::*; + +// FIXME: 64-bit 1 element mul_adde + +pub(crate) trait MulAddE { + fn mul_adde(self, y: Self, z: Self) -> Self; +} + +#[cfg(not(target_arch = "s390x"))] +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fmuladd.v2f32"] + fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; + #[link_name = "llvm.fmuladd.v4f32"] + fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; + #[link_name = "llvm.fmuladd.v8f32"] + fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; + #[link_name = "llvm.fmuladd.v16f32"] + fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; + /* FIXME 64-bit single elem vectors + #[link_name = "llvm.fmuladd.v1f64"] + fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; + */ + #[link_name = "llvm.fmuladd.v2f64"] + fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; + #[link_name = "llvm.fmuladd.v4f64"] + fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; + #[link_name = "llvm.fmuladd.v8f64"] + fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; +} + +macro_rules! impl_mul_adde { + ($id:ident : $fn:ident) => { + impl MulAddE for $id { + #[inline] + fn mul_adde(self, y: Self, z: Self) -> Self { + #[cfg(not(target_arch = "s390x"))] + { + use crate::mem::transmute; + unsafe { transmute($fn(transmute(self), transmute(y), transmute(z))) } + } + #[cfg(target_arch = "s390x")] + { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + self * y + z + } + } + } + }; +} + +impl_mul_adde!(f32x2: fmuladd_v2f32); +impl_mul_adde!(f32x4: fmuladd_v4f32); +impl_mul_adde!(f32x8: fmuladd_v8f32); +impl_mul_adde!(f32x16: fmuladd_v16f32); +// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors +impl_mul_adde!(f64x2: fmuladd_v2f64); +impl_mul_adde!(f64x4: fmuladd_v4f64); +impl_mul_adde!(f64x8: fmuladd_v8f64); diff --git a/vendor/packed_simd/src/codegen/math/float/powf.rs b/vendor/packed_simd/src/codegen/math/float/powf.rs new file mode 100644 index 000000000..89ca52e96 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/powf.rs @@ -0,0 +1,112 @@ +//! Vertical floating-point `powf` +#![allow(unused)] + +// FIXME 64-bit powfgle elem vectors mispowfg + +use crate::*; + +pub(crate) trait Powf { + fn powf(self, x: Self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.pow.v2f32"] + fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2; + #[link_name = "llvm.pow.v4f32"] + fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4; + #[link_name = "llvm.pow.v8f32"] + fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8; + #[link_name = "llvm.pow.v16f32"] + fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16; + /* FIXME 64-bit powfgle elem vectors + #[link_name = "llvm.pow.v1f64"] + fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1; + */ + #[link_name = "llvm.pow.v2f64"] + fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2; + #[link_name = "llvm.pow.v4f64"] + fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4; + #[link_name = "llvm.pow.v8f64"] + fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8; + + #[link_name = "llvm.pow.f32"] + fn powf_f32(x: f32, y: f32) -> f32; + #[link_name = "llvm.pow.f64"] + fn powf_f64(x: f64, y: f64) -> f64; +} + +gen_binary_impl_table!(Powf, powf); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_binary!(f32x2[f32; 2]: powf_f32); + impl_binary!(f32x4[f32; 4]: powf_f32); + impl_binary!(f32x8[f32; 8]: powf_f32); + impl_binary!(f32x16[f32; 16]: powf_f32); + + impl_binary!(f64x2[f64; 2]: powf_f64); + impl_binary!(f64x4[f64; 4]: powf_f64); + impl_binary!(f64x8[f64; 8]: powf_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128); + impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2); + impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2); + + impl_binary!(f32x4: Sleef_powf4_u10avx2128); + impl_binary!(f32x8: Sleef_powf8_u10avx2); + impl_binary!(f64x2: Sleef_powd2_u10avx2128); + impl_binary!(f64x4: Sleef_powd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); + impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx); + impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx); + + impl_binary!(f32x4: Sleef_powf4_u10sse4); + impl_binary!(f32x8: Sleef_powf8_u10avx); + impl_binary!(f64x2: Sleef_powd2_u10sse4); + impl_binary!(f64x4: Sleef_powd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); + impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4); + impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4); + + impl_binary!(f32x4: Sleef_powf4_u10sse4); + impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4); + impl_binary!(f64x2: Sleef_powd2_u10sse4); + impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4); + } else if #[cfg(target_feature = "sse2")] { + impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2); + impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2); + impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2); + + impl_binary!(f32x4: Sleef_powf4_u10sse2); + impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2); + impl_binary!(f64x2: Sleef_powd2_u10sse2); + impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2); + } else { + impl_binary!(f32x2[f32; 2]: powf_f32); + impl_binary!(f32x4: powf_v4f32); + impl_binary!(f32x8: powf_v8f32); + impl_binary!(f32x16: powf_v16f32); + + impl_binary!(f64x2: powf_v2f64); + impl_binary!(f64x4: powf_v4f64); + impl_binary!(f64x8: powf_v8f64); + } + } + } else { + impl_binary!(f32x2[f32; 2]: powf_f32); + impl_binary!(f32x4: powf_v4f32); + impl_binary!(f32x8: powf_v8f32); + impl_binary!(f32x16: powf_v16f32); + + impl_binary!(f64x2: powf_v2f64); + impl_binary!(f64x4: powf_v4f64); + impl_binary!(f64x8: powf_v8f64); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/sin.rs b/vendor/packed_simd/src/codegen/math/float/sin.rs new file mode 100644 index 000000000..d88141590 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/sin.rs @@ -0,0 +1,103 @@ +//! Vertical floating-point `sin` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sin + +use crate::*; + +pub(crate) trait Sin { + fn sin(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sin.v2f32"] + fn sin_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sin.v4f32"] + fn sin_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sin.v8f32"] + fn sin_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sin.v16f32"] + fn sin_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit single elem vectors + #[link_name = "llvm.sin.v1f64"] + fn sin_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.sin.v2f64"] + fn sin_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sin.v4f64"] + fn sin_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sin.v8f64"] + fn sin_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.sin.f32"] + fn sin_f32(x: f32) -> f32; + #[link_name = "llvm.sin.f64"] + fn sin_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Sin, sin); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: sin_f32); + impl_unary!(f32x4[f32; 4]: sin_f32); + impl_unary!(f32x8[f32; 8]: sin_f32); + impl_unary!(f32x16[f32; 16]: sin_f32); + + impl_unary!(f64x2[f64; 2]: sin_f64); + impl_unary!(f64x4[f64; 4]: sin_f64); + impl_unary!(f64x8[f64; 8]: sin_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2); + + impl_unary!(f32x4: Sleef_sinf4_u10avx2128); + impl_unary!(f32x8: Sleef_sinf8_u10avx2); + impl_unary!(f64x2: Sleef_sind2_u10avx2128); + impl_unary!(f64x4: Sleef_sind4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx); + + impl_unary!(f32x4: Sleef_sinf4_u10sse4); + impl_unary!(f32x8: Sleef_sinf8_u10avx); + impl_unary!(f64x2: Sleef_sind2_u10sse4); + impl_unary!(f64x4: Sleef_sind4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4); + + impl_unary!(f32x4: Sleef_sinf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4); + impl_unary!(f64x2: Sleef_sind2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4); + } else { + impl_unary!(f32x2[f32; 2]: sin_f32); + impl_unary!(f32x16: sin_v16f32); + impl_unary!(f64x8: sin_v8f64); + + impl_unary!(f32x4: sin_v4f32); + impl_unary!(f32x8: sin_v8f32); + impl_unary!(f64x2: sin_v2f64); + impl_unary!(f64x4: sin_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: sin_f32); + impl_unary!(f32x4: sin_v4f32); + impl_unary!(f32x8: sin_v8f32); + impl_unary!(f32x16: sin_v16f32); + + impl_unary!(f64x2: sin_v2f64); + impl_unary!(f64x4: sin_v4f64); + impl_unary!(f64x8: sin_v8f64); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/sin_cos_pi.rs b/vendor/packed_simd/src/codegen/math/float/sin_cos_pi.rs new file mode 100644 index 000000000..b283d1111 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/sin_cos_pi.rs @@ -0,0 +1,188 @@ +//! Vertical floating-point `sin_cos` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sin_cos + +use crate::*; + +pub(crate) trait SinCosPi: Sized { + type Output; + fn sin_cos_pi(self) -> Self::Output; +} + +macro_rules! impl_def { + ($vid:ident, $PI:path) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + #[inline] + fn sin_cos_pi(self) -> Self::Output { + let v = self * Self::splat($PI); + (v.sin(), v.cos()) + } + } + }; +} + +macro_rules! impl_def32 { + ($vid:ident) => { + impl_def!($vid, crate::f32::consts::PI); + }; +} +macro_rules! impl_def64 { + ($vid:ident) => { + impl_def!($vid, crate::f64::consts::PI); + }; +} + +macro_rules! impl_unary_t { + ($vid:ident: $fun:ident) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + fn sin_cos_pi(self) -> Self::Output { + unsafe { + use crate::mem::transmute; + transmute($fun(transmute(self))) + } + } + } + }; + ($vid:ident[t => $vid_t:ident]: $fun:ident) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + fn sin_cos_pi(self) -> Self::Output { + unsafe { + use crate::mem::{transmute, uninitialized}; + + union U { + vec: [$vid; 2], + twice: $vid_t, + } + + let twice = U { vec: [self, uninitialized()] }.twice; + let twice = transmute($fun(transmute(twice))); + + union R { + twice: ($vid_t, $vid_t), + vecs: ([$vid; 2], [$vid; 2]), + } + let r = R { twice }.vecs; + (*r.0.get_unchecked(0), *r.0.get_unchecked(1)) + } + } + } + }; + ($vid:ident[h => $vid_h:ident]: $fun:ident) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + fn sin_cos_pi(self) -> Self::Output { + unsafe { + use crate::mem::transmute; + + union U { + vec: $vid, + halves: [$vid_h; 2], + } + + let halves = U { vec: self }.halves; + + let res_0: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(0)))); + let res_1: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(1)))); + + union R { + result: ($vid, $vid), + halves: ([$vid_h; 2], [$vid_h; 2]), + } + R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) }.result + } + } + } + }; + ($vid:ident[q => $vid_q:ident]: $fun:ident) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + fn sin_cos_pi(self) -> Self::Output { + unsafe { + use crate::mem::transmute; + + union U { + vec: $vid, + quarters: [$vid_q; 4], + } + + let quarters = U { vec: self }.quarters; + + let res_0: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(0)))); + let res_1: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(1)))); + let res_2: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(2)))); + let res_3: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(3)))); + + union R { + result: ($vid, $vid), + quarters: ([$vid_q; 4], [$vid_q; 4]), + } + R { + quarters: ( + [res_0.0, res_1.0, res_2.0, res_3.0], + [res_0.1, res_1.1, res_2.1, res_3.1], + ), + } + .result + } + } + } + }; +} + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128); + impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2); + impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2); + + impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128); + impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2); + impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128); + impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); + impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx); + impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx); + + impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); + impl_unary_t!(f32x8: Sleef_sincospif8_u05avx); + impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); + impl_unary_t!(f64x4: Sleef_sincospid4_u05avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); + impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4); + impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4); + + impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); + impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4); + impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); + impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4); + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } + } + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/sin_pi.rs b/vendor/packed_simd/src/codegen/math/float/sin_pi.rs new file mode 100644 index 000000000..0c8f6bb12 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/sin_pi.rs @@ -0,0 +1,87 @@ +//! Vertical floating-point `sin_pi` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sin_pi + +use crate::*; + +pub(crate) trait SinPi { + fn sin_pi(self) -> Self; +} + +gen_unary_impl_table!(SinPi, sin_pi); + +macro_rules! impl_def { + ($vid:ident, $PI:path) => { + impl SinPi for $vid { + #[inline] + fn sin_pi(self) -> Self { + (self * Self::splat($PI)).sin() + } + } + }; +} +macro_rules! impl_def32 { + ($vid:ident) => { + impl_def!($vid, crate::f32::consts::PI); + }; +} +macro_rules! impl_def64 { + ($vid:ident) => { + impl_def!($vid, crate::f64::consts::PI); + }; +} + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2); + + impl_unary!(f32x4: Sleef_sinpif4_u05avx2128); + impl_unary!(f32x8: Sleef_sinpif8_u05avx2); + impl_unary!(f64x2: Sleef_sinpid2_u05avx2128); + impl_unary!(f64x4: Sleef_sinpid4_u05avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx); + impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx); + + impl_unary!(f32x4: Sleef_sinpif4_u05sse4); + impl_unary!(f32x8: Sleef_sinpif8_u05avx); + impl_unary!(f64x2: Sleef_sinpid2_u05sse4); + impl_unary!(f64x4: Sleef_sinpid4_u05avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4); + + impl_unary!(f32x4: Sleef_sinpif4_u05sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4); + impl_unary!(f64x2: Sleef_sinpid2_u05sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4); + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } + } + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/sqrt.rs b/vendor/packed_simd/src/codegen/math/float/sqrt.rs new file mode 100644 index 000000000..67bb0a2a9 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/sqrt.rs @@ -0,0 +1,103 @@ +//! Vertical floating-point `sqrt` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sqrt + +use crate::*; + +pub(crate) trait Sqrt { + fn sqrt(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sqrt.v2f32"] + fn sqrt_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sqrt.v4f32"] + fn sqrt_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sqrt.v8f32"] + fn sqrt_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sqrt.v16f32"] + fn sqrt_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit sqrtgle elem vectors + #[link_name = "llvm.sqrt.v1f64"] + fn sqrt_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.sqrt.v2f64"] + fn sqrt_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sqrt.v4f64"] + fn sqrt_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sqrt.v8f64"] + fn sqrt_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.sqrt.f32"] + fn sqrt_f32(x: f32) -> f32; + #[link_name = "llvm.sqrt.f64"] + fn sqrt_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Sqrt, sqrt); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: sqrt_f32); + impl_unary!(f32x4[f32; 4]: sqrt_f32); + impl_unary!(f32x8[f32; 8]: sqrt_f32); + impl_unary!(f32x16[f32; 16]: sqrt_f32); + + impl_unary!(f64x2[f64; 2]: sqrt_f64); + impl_unary!(f64x4[f64; 4]: sqrt_f64); + impl_unary!(f64x8[f64; 8]: sqrt_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2); + + impl_unary!(f32x4: Sleef_sqrtf4_avx2128); + impl_unary!(f32x8: Sleef_sqrtf8_avx2); + impl_unary!(f64x2: Sleef_sqrtd2_avx2128); + impl_unary!(f64x4: Sleef_sqrtd4_avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx); + impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx); + + impl_unary!(f32x4: Sleef_sqrtf4_sse4); + impl_unary!(f32x8: Sleef_sqrtf8_avx); + impl_unary!(f64x2: Sleef_sqrtd2_sse4); + impl_unary!(f64x4: Sleef_sqrtd4_avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4); + + impl_unary!(f32x4: Sleef_sqrtf4_sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4); + impl_unary!(f64x2: Sleef_sqrtd2_sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4); + } else { + impl_unary!(f32x2[f32; 2]: sqrt_f32); + impl_unary!(f32x16: sqrt_v16f32); + impl_unary!(f64x8: sqrt_v8f64); + + impl_unary!(f32x4: sqrt_v4f32); + impl_unary!(f32x8: sqrt_v8f32); + impl_unary!(f64x2: sqrt_v2f64); + impl_unary!(f64x4: sqrt_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: sqrt_f32); + impl_unary!(f32x4: sqrt_v4f32); + impl_unary!(f32x8: sqrt_v8f32); + impl_unary!(f32x16: sqrt_v16f32); + + impl_unary!(f64x2: sqrt_v2f64); + impl_unary!(f64x4: sqrt_v4f64); + impl_unary!(f64x8: sqrt_v8f64); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/sqrte.rs b/vendor/packed_simd/src/codegen/math/float/sqrte.rs new file mode 100644 index 000000000..58a1de1f4 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/sqrte.rs @@ -0,0 +1,67 @@ +//! Vertical floating-point `sqrt` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sqrte + +use crate::llvm::simd_fsqrt; +use crate::*; + +pub(crate) trait Sqrte { + fn sqrte(self) -> Self; +} + +gen_unary_impl_table!(Sqrte, sqrte); + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2); + + impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128); + impl_unary!(f32x8: Sleef_sqrtf8_u35avx2); + impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128); + impl_unary!(f64x4: Sleef_sqrtd4_u35avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx); + impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx); + + impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); + impl_unary!(f32x8: Sleef_sqrtf8_u35avx); + impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); + impl_unary!(f64x4: Sleef_sqrtd4_u35avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4); + + impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4); + impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4); + } else { + impl_unary!(f32x2[g]: simd_fsqrt); + impl_unary!(f32x16[g]: simd_fsqrt); + impl_unary!(f64x8[g]: simd_fsqrt); + + impl_unary!(f32x4[g]: simd_fsqrt); + impl_unary!(f32x8[g]: simd_fsqrt); + impl_unary!(f64x2[g]: simd_fsqrt); + impl_unary!(f64x4[g]: simd_fsqrt); + } + } + } else { + impl_unary!(f32x2[g]: simd_fsqrt); + impl_unary!(f32x4[g]: simd_fsqrt); + impl_unary!(f32x8[g]: simd_fsqrt); + impl_unary!(f32x16[g]: simd_fsqrt); + + impl_unary!(f64x2[g]: simd_fsqrt); + impl_unary!(f64x4[g]: simd_fsqrt); + impl_unary!(f64x8[g]: simd_fsqrt); + } +} diff --git a/vendor/packed_simd/src/codegen/math/float/tanh.rs b/vendor/packed_simd/src/codegen/math/float/tanh.rs new file mode 100644 index 000000000..4243b0d88 --- /dev/null +++ b/vendor/packed_simd/src/codegen/math/float/tanh.rs @@ -0,0 +1,120 @@ +//! Vertical floating-point `tanh` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors tanh + +#[cfg(not(feature = "std"))] +use num_traits::Float; + +use crate::*; + +pub(crate) trait Tanh { + fn tanh(self) -> Self; +} + +macro_rules! define_tanh { + ($name:ident, $basetype:ty, $simdtype:ty, $lanes:expr, $trait:path) => { + fn $name(x: $simdtype) -> $simdtype { + use core::intrinsics::transmute; + let mut buf: [$basetype; $lanes] = unsafe { transmute(x) }; + for elem in &mut buf { + *elem = <$basetype as $trait>::tanh(*elem); + } + unsafe { transmute(buf) } + } + }; + + (f32 => $name:ident, $type:ty, $lanes:expr) => { + define_tanh!($name, f32, $type, $lanes, Float); + }; + + (f64 => $name:ident, $type:ty, $lanes:expr) => { + define_tanh!($name, f64, $type, $lanes, Float); + }; +} + +// llvm does not seem to expose the hyperbolic versions of trigonometric +// functions; we thus call the classical rust versions on all of them (which +// stem from cmath). +define_tanh!(f32 => tanh_v2f32, f32x2, 2); +define_tanh!(f32 => tanh_v4f32, f32x4, 4); +define_tanh!(f32 => tanh_v8f32, f32x8, 8); +define_tanh!(f32 => tanh_v16f32, f32x16, 16); + +define_tanh!(f64 => tanh_v2f64, f64x2, 2); +define_tanh!(f64 => tanh_v4f64, f64x4, 4); +define_tanh!(f64 => tanh_v8f64, f64x8, 8); + +fn tanh_f32(x: f32) -> f32 { + Float::tanh(x) +} + +fn tanh_f64(x: f64) -> f64 { + Float::tanh(x) +} + +gen_unary_impl_table!(Tanh, tanh); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: tanh_f32); + impl_unary!(f32x4[f32; 4]: tanh_f32); + impl_unary!(f32x8[f32; 8]: tanh_f32); + impl_unary!(f32x16[f32; 16]: tanh_f32); + + impl_unary!(f64x2[f64; 2]: tanh_f64); + impl_unary!(f64x4[f64; 4]: tanh_f64); + impl_unary!(f64x8[f64; 8]: tanh_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_tanhf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_tanhd4_u10avx2); + + impl_unary!(f32x4: Sleef_tanhf4_u10avx2128); + impl_unary!(f32x8: Sleef_tanhf8_u10avx2); + impl_unary!(f64x2: Sleef_tanhd2_u10avx2128); + impl_unary!(f64x4: Sleef_tanhd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_tanhf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_tanhd4_u10avx); + + impl_unary!(f32x4: Sleef_tanhf4_u10sse4); + impl_unary!(f32x8: Sleef_tanhf8_u10avx); + impl_unary!(f64x2: Sleef_tanhd2_u10sse4); + impl_unary!(f64x4: Sleef_tanhd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_tanhf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_tanhd2_u10sse4); + + impl_unary!(f32x4: Sleef_tanhf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_tanhf4_u10sse4); + impl_unary!(f64x2: Sleef_tanhd2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_tanhd2_u10sse4); + } else { + impl_unary!(f32x2[f32; 2]: tanh_f32); + impl_unary!(f32x16: tanh_v16f32); + impl_unary!(f64x8: tanh_v8f64); + + impl_unary!(f32x4: tanh_v4f32); + impl_unary!(f32x8: tanh_v8f32); + impl_unary!(f64x2: tanh_v2f64); + impl_unary!(f64x4: tanh_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: tanh_f32); + impl_unary!(f32x4: tanh_v4f32); + impl_unary!(f32x8: tanh_v8f32); + impl_unary!(f32x16: tanh_v16f32); + + impl_unary!(f64x2: tanh_v2f64); + impl_unary!(f64x4: tanh_v4f64); + impl_unary!(f64x8: tanh_v8f64); + } +} -- cgit v1.2.3