summaryrefslogtreecommitdiffstats
path: root/vendor/packed_simd/src/codegen/math/float
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/packed_simd/src/codegen/math/float')
-rw-r--r--vendor/packed_simd/src/codegen/math/float/abs.rs103
-rw-r--r--vendor/packed_simd/src/codegen/math/float/cos.rs103
-rw-r--r--vendor/packed_simd/src/codegen/math/float/cos_pi.rs87
-rw-r--r--vendor/packed_simd/src/codegen/math/float/exp.rs112
-rw-r--r--vendor/packed_simd/src/codegen/math/float/ln.rs112
-rw-r--r--vendor/packed_simd/src/codegen/math/float/macros.rs470
-rw-r--r--vendor/packed_simd/src/codegen/math/float/mul_add.rs109
-rw-r--r--vendor/packed_simd/src/codegen/math/float/mul_adde.rs60
-rw-r--r--vendor/packed_simd/src/codegen/math/float/powf.rs112
-rw-r--r--vendor/packed_simd/src/codegen/math/float/sin.rs103
-rw-r--r--vendor/packed_simd/src/codegen/math/float/sin_cos_pi.rs188
-rw-r--r--vendor/packed_simd/src/codegen/math/float/sin_pi.rs87
-rw-r--r--vendor/packed_simd/src/codegen/math/float/sqrt.rs103
-rw-r--r--vendor/packed_simd/src/codegen/math/float/sqrte.rs67
-rw-r--r--vendor/packed_simd/src/codegen/math/float/tanh.rs120
15 files changed, 1936 insertions, 0 deletions
diff --git a/vendor/packed_simd/src/codegen/math/float/abs.rs b/vendor/packed_simd/src/codegen/math/float/abs.rs
new file mode 100644
index 000000000..34aacc25b
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/abs.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `fabs`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors fabs
+
+use crate::*;
+
+pub(crate) trait Abs {
+ fn abs(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.fabs.v2f32"]
+ fn fabs_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.fabs.v4f32"]
+ fn fabs_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.fabs.v8f32"]
+ fn fabs_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.fabs.v16f32"]
+ fn fabs_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit fabsgle elem vectors
+ #[link_name = "llvm.fabs.v1f64"]
+ fn fabs_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.fabs.v2f64"]
+ fn fabs_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.fabs.v4f64"]
+ fn fabs_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.fabs.v8f64"]
+ fn fabs_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.fabs.f32"]
+ fn fabs_f32(x: f32) -> f32;
+ #[link_name = "llvm.fabs.f64"]
+ fn fabs_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Abs, abs);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
+ impl_unary!(f32x4[f32; 4]: fabs_f32);
+ impl_unary!(f32x8[f32; 8]: fabs_f32);
+ impl_unary!(f32x16[f32; 16]: fabs_f32);
+
+ impl_unary!(f64x2[f64; 2]: fabs_f64);
+ impl_unary!(f64x4[f64; 4]: fabs_f64);
+ impl_unary!(f64x8[f64; 8]: fabs_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2);
+
+ impl_unary!(f32x4: Sleef_fabsf4_avx2128);
+ impl_unary!(f32x8: Sleef_fabsf8_avx2);
+ impl_unary!(f64x2: Sleef_fabsd2_avx2128);
+ impl_unary!(f64x4: Sleef_fabsd4_avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx);
+
+ impl_unary!(f32x4: Sleef_fabsf4_sse4);
+ impl_unary!(f32x8: Sleef_fabsf8_avx);
+ impl_unary!(f64x2: Sleef_fabsd2_sse4);
+ impl_unary!(f64x4: Sleef_fabsd4_avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4);
+
+ impl_unary!(f32x4: Sleef_fabsf4_sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4);
+ impl_unary!(f64x2: Sleef_fabsd2_sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
+ impl_unary!(f32x16: fabs_v16f32);
+ impl_unary!(f64x8: fabs_v8f64);
+
+ impl_unary!(f32x4: fabs_v4f32);
+ impl_unary!(f32x8: fabs_v8f32);
+ impl_unary!(f64x2: fabs_v2f64);
+ impl_unary!(f64x4: fabs_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
+ impl_unary!(f32x4: fabs_v4f32);
+ impl_unary!(f32x8: fabs_v8f32);
+ impl_unary!(f32x16: fabs_v16f32);
+
+ impl_unary!(f64x2: fabs_v2f64);
+ impl_unary!(f64x4: fabs_v4f64);
+ impl_unary!(f64x8: fabs_v8f64);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/cos.rs b/vendor/packed_simd/src/codegen/math/float/cos.rs
new file mode 100644
index 000000000..dec390cb7
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/cos.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vector cos
+
+use crate::*;
+
+pub(crate) trait Cos {
+ fn cos(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.cos.v2f32"]
+ fn cos_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.cos.v4f32"]
+ fn cos_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.cos.v8f32"]
+ fn cos_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.cos.v16f32"]
+ fn cos_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit cosgle elem vectors
+ #[link_name = "llvm.cos.v1f64"]
+ fn cos_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.cos.v2f64"]
+ fn cos_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.cos.v4f64"]
+ fn cos_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.cos.v8f64"]
+ fn cos_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.cos.f32"]
+ fn cos_f32(x: f32) -> f32;
+ #[link_name = "llvm.cos.f64"]
+ fn cos_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Cos, cos);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: cos_f32);
+ impl_unary!(f32x4[f32; 4]: cos_f32);
+ impl_unary!(f32x8[f32; 8]: cos_f32);
+ impl_unary!(f32x16[f32; 16]: cos_f32);
+
+ impl_unary!(f64x2[f64; 2]: cos_f64);
+ impl_unary!(f64x4[f64; 4]: cos_f64);
+ impl_unary!(f64x8[f64; 8]: cos_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_cosf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_cosf8_u10avx2);
+ impl_unary!(f64x2: Sleef_cosd2_u10avx2128);
+ impl_unary!(f64x4: Sleef_cosd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx);
+
+ impl_unary!(f32x4: Sleef_cosf4_u10sse4);
+ impl_unary!(f32x8: Sleef_cosf8_u10avx);
+ impl_unary!(f64x2: Sleef_cosd2_u10sse4);
+ impl_unary!(f64x4: Sleef_cosd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_cosf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4);
+ impl_unary!(f64x2: Sleef_cosd2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: cos_f32);
+ impl_unary!(f32x16: cos_v16f32);
+ impl_unary!(f64x8: cos_v8f64);
+
+ impl_unary!(f32x4: cos_v4f32);
+ impl_unary!(f32x8: cos_v8f32);
+ impl_unary!(f64x2: cos_v2f64);
+ impl_unary!(f64x4: cos_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: cos_f32);
+ impl_unary!(f32x4: cos_v4f32);
+ impl_unary!(f32x8: cos_v8f32);
+ impl_unary!(f32x16: cos_v16f32);
+
+ impl_unary!(f64x2: cos_v2f64);
+ impl_unary!(f64x4: cos_v4f64);
+ impl_unary!(f64x8: cos_v8f64);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/cos_pi.rs b/vendor/packed_simd/src/codegen/math/float/cos_pi.rs
new file mode 100644
index 000000000..e283280ee
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/cos_pi.rs
@@ -0,0 +1,87 @@
+//! Vertical floating-point `cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors cos_pi
+
+use crate::*;
+
+pub(crate) trait CosPi {
+ fn cos_pi(self) -> Self;
+}
+
+gen_unary_impl_table!(CosPi, cos_pi);
+
+macro_rules! impl_def {
+ ($vid:ident, $PI:path) => {
+ impl CosPi for $vid {
+ #[inline]
+ fn cos_pi(self) -> Self {
+ (self * Self::splat($PI)).cos()
+ }
+ }
+ };
+}
+macro_rules! impl_def32 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f32::consts::PI);
+ };
+}
+macro_rules! impl_def64 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f64::consts::PI);
+ };
+}
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2);
+
+ impl_unary!(f32x4: Sleef_cospif4_u05avx2128);
+ impl_unary!(f32x8: Sleef_cospif8_u05avx2);
+ impl_unary!(f64x2: Sleef_cospid2_u05avx2128);
+ impl_unary!(f64x4: Sleef_cospid4_u05avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx);
+
+ impl_unary!(f32x4: Sleef_cospif4_u05sse4);
+ impl_unary!(f32x8: Sleef_cospif8_u05avx);
+ impl_unary!(f64x2: Sleef_cospid2_u05sse4);
+ impl_unary!(f64x4: Sleef_cospid4_u05avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4);
+
+ impl_unary!(f32x4: Sleef_cospif4_u05sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4);
+ impl_unary!(f64x2: Sleef_cospid2_u05sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4);
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+ }
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/exp.rs b/vendor/packed_simd/src/codegen/math/float/exp.rs
new file mode 100644
index 000000000..a7b20580e
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/exp.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `exp`
+#![allow(unused)]
+
+// FIXME 64-bit expgle elem vectors misexpg
+
+use crate::*;
+
+pub(crate) trait Exp {
+ fn exp(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.exp.v2f32"]
+ fn exp_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.exp.v4f32"]
+ fn exp_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.exp.v8f32"]
+ fn exp_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.exp.v16f32"]
+ fn exp_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit expgle elem vectors
+ #[link_name = "llvm.exp.v1f64"]
+ fn exp_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.exp.v2f64"]
+ fn exp_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.exp.v4f64"]
+ fn exp_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.exp.v8f64"]
+ fn exp_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.exp.f32"]
+ fn exp_f32(x: f32) -> f32;
+ #[link_name = "llvm.exp.f64"]
+ fn exp_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Exp, exp);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: exp_f32);
+ impl_unary!(f32x4[f32; 4]: exp_f32);
+ impl_unary!(f32x8[f32; 8]: exp_f32);
+ impl_unary!(f32x16[f32; 16]: exp_f32);
+
+ impl_unary!(f64x2[f64; 2]: exp_f64);
+ impl_unary!(f64x4[f64; 4]: exp_f64);
+ impl_unary!(f64x8[f64; 8]: exp_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_expf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_expf8_u10avx2);
+ impl_unary!(f64x2: Sleef_expd2_u10avx2128);
+ impl_unary!(f64x4: Sleef_expd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx);
+
+ impl_unary!(f32x4: Sleef_expf4_u10sse4);
+ impl_unary!(f32x8: Sleef_expf8_u10avx);
+ impl_unary!(f64x2: Sleef_expd2_u10sse4);
+ impl_unary!(f64x4: Sleef_expd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_expf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4);
+ impl_unary!(f64x2: Sleef_expd2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4);
+ } else if #[cfg(target_feature = "sse2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2);
+ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2);
+ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2);
+
+ impl_unary!(f32x4: Sleef_expf4_u10sse2);
+ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2);
+ impl_unary!(f64x2: Sleef_expd2_u10sse2);
+ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2);
+ } else {
+ impl_unary!(f32x2[f32; 2]: exp_f32);
+ impl_unary!(f32x16: exp_v16f32);
+ impl_unary!(f64x8: exp_v8f64);
+
+ impl_unary!(f32x4: exp_v4f32);
+ impl_unary!(f32x8: exp_v8f32);
+ impl_unary!(f64x2: exp_v2f64);
+ impl_unary!(f64x4: exp_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: exp_f32);
+ impl_unary!(f32x4: exp_v4f32);
+ impl_unary!(f32x8: exp_v8f32);
+ impl_unary!(f32x16: exp_v16f32);
+
+ impl_unary!(f64x2: exp_v2f64);
+ impl_unary!(f64x4: exp_v4f64);
+ impl_unary!(f64x8: exp_v8f64);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/ln.rs b/vendor/packed_simd/src/codegen/math/float/ln.rs
new file mode 100644
index 000000000..a5e38cb40
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/ln.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `ln`
+#![allow(unused)]
+
+// FIXME 64-bit lngle elem vectors mislng
+
+use crate::*;
+
+pub(crate) trait Ln {
+ fn ln(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.log.v2f32"]
+ fn ln_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.log.v4f32"]
+ fn ln_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.log.v8f32"]
+ fn ln_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.log.v16f32"]
+ fn ln_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit lngle elem vectors
+ #[link_name = "llvm.log.v1f64"]
+ fn ln_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.log.v2f64"]
+ fn ln_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.log.v4f64"]
+ fn ln_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.log.v8f64"]
+ fn ln_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.log.f32"]
+ fn ln_f32(x: f32) -> f32;
+ #[link_name = "llvm.log.f64"]
+ fn ln_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Ln, ln);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: ln_f32);
+ impl_unary!(f32x4[f32; 4]: ln_f32);
+ impl_unary!(f32x8[f32; 8]: ln_f32);
+ impl_unary!(f32x16[f32; 16]: ln_f32);
+
+ impl_unary!(f64x2[f64; 2]: ln_f64);
+ impl_unary!(f64x4[f64; 4]: ln_f64);
+ impl_unary!(f64x8[f64; 8]: ln_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_logf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_logf8_u10avx2);
+ impl_unary!(f64x2: Sleef_logd2_u10avx2128);
+ impl_unary!(f64x4: Sleef_logd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx);
+
+ impl_unary!(f32x4: Sleef_logf4_u10sse4);
+ impl_unary!(f32x8: Sleef_logf8_u10avx);
+ impl_unary!(f64x2: Sleef_logd2_u10sse4);
+ impl_unary!(f64x4: Sleef_logd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_logf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4);
+ impl_unary!(f64x2: Sleef_logd2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4);
+ } else if #[cfg(target_feature = "sse2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2);
+ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2);
+ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2);
+
+ impl_unary!(f32x4: Sleef_logf4_u10sse2);
+ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2);
+ impl_unary!(f64x2: Sleef_logd2_u10sse2);
+ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2);
+ } else {
+ impl_unary!(f32x2[f32; 2]: ln_f32);
+ impl_unary!(f32x16: ln_v16f32);
+ impl_unary!(f64x8: ln_v8f64);
+
+ impl_unary!(f32x4: ln_v4f32);
+ impl_unary!(f32x8: ln_v8f32);
+ impl_unary!(f64x2: ln_v2f64);
+ impl_unary!(f64x4: ln_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: ln_f32);
+ impl_unary!(f32x4: ln_v4f32);
+ impl_unary!(f32x8: ln_v8f32);
+ impl_unary!(f32x16: ln_v16f32);
+
+ impl_unary!(f64x2: ln_v2f64);
+ impl_unary!(f64x4: ln_v4f64);
+ impl_unary!(f64x8: ln_v8f64);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/macros.rs b/vendor/packed_simd/src/codegen/math/float/macros.rs
new file mode 100644
index 000000000..8daee1afe
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/macros.rs
@@ -0,0 +1,470 @@
+//! Utility macros
+#![allow(unused)]
+
+macro_rules! impl_unary_ {
+ // implementation mapping 1:1
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self)))
+ }
+ }
+ }
+ };
+ // implementation mapping 1:1 for when `$fun` is a generic function
+ // like some of the fp math rustc intrinsics (e.g. `fn fun<T>(x: T) -> T`).
+ (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(self.0))
+ }
+ }
+ }
+ };
+ (scalar | $trait_id:ident, $trait_method:ident,
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ union U {
+ vec: $vec_id,
+ scalars: [$sid; $scount],
+ }
+ let mut scalars = U { vec: self }.scalars;
+ for i in &mut scalars {
+ *i = $fun(*i);
+ }
+ U { scalars }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun twice on each of the vector halves:
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vech_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ halves: [$vech_id; 2],
+ }
+
+ let mut halves = U { vec: self }.halves;
+
+ *halves.get_unchecked_mut(0) = transmute($fun(transmute(*halves.get_unchecked(0))));
+ *halves.get_unchecked_mut(1) = transmute($fun(transmute(*halves.get_unchecked(1))));
+
+ U { halves }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun four times on each of the vector quarters:
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vecq_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ quarters: [$vecq_id; 4],
+ }
+
+ let mut quarters = U { vec: self }.quarters;
+
+ *quarters.get_unchecked_mut(0) = transmute($fun(transmute(*quarters.get_unchecked(0))));
+ *quarters.get_unchecked_mut(1) = transmute($fun(transmute(*quarters.get_unchecked(1))));
+ *quarters.get_unchecked_mut(2) = transmute($fun(transmute(*quarters.get_unchecked(2))));
+ *quarters.get_unchecked_mut(3) = transmute($fun(transmute(*quarters.get_unchecked(3))));
+
+ U { quarters }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun once on a vector twice as large:
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vect_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vec_id; 2],
+ twice: $vect_id,
+ }
+
+ let twice = U { vec: [self, uninitialized()] }.twice;
+ let twice = transmute($fun(transmute(twice)));
+
+ *(U { twice }.vec.get_unchecked(0))
+ }
+ }
+ }
+ };
+}
+
+macro_rules! gen_unary_impl_table {
+ ($trait_id:ident, $trait_method:ident) => {
+ macro_rules! impl_unary {
+ ($vid:ident: $fun:ident) => {
+ impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[g]: $fun:ident) => {
+ impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+ impl_unary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun);
+ };
+ ($vid:ident[s]: $fun:ident) => {
+ impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl_unary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun);
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl_unary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun);
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl_unary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun);
+ };
+ }
+ };
+}
+
+macro_rules! impl_tertiary_ {
+ // implementation mapping 1:1
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self), transmute(y), transmute(z)))
+ }
+ }
+ }
+ };
+ (scalar | $trait_id:ident, $trait_method:ident,
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ union U {
+ vec: $vec_id,
+ scalars: [$sid; $scount],
+ }
+ let mut x = U { vec: self }.scalars;
+ let y = U { vec: y }.scalars;
+ let z = U { vec: z }.scalars;
+ for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) {
+ *i = $fun(*i, *y, *z);
+ }
+ U { vec: x }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun twice on each of the vector halves:
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vech_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ halves: [$vech_id; 2],
+ }
+
+ let mut x_halves = U { vec: self }.halves;
+ let y_halves = U { vec: y }.halves;
+ let z_halves = U { vec: z }.halves;
+
+ *x_halves.get_unchecked_mut(0) = transmute($fun(
+ transmute(*x_halves.get_unchecked(0)),
+ transmute(*y_halves.get_unchecked(0)),
+ transmute(*z_halves.get_unchecked(0)),
+ ));
+ *x_halves.get_unchecked_mut(1) = transmute($fun(
+ transmute(*x_halves.get_unchecked(1)),
+ transmute(*y_halves.get_unchecked(1)),
+ transmute(*z_halves.get_unchecked(1)),
+ ));
+
+ U { halves: x_halves }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun four times on each of the vector quarters:
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vecq_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ quarters: [$vecq_id; 4],
+ }
+
+ let mut x_quarters = U { vec: self }.quarters;
+ let y_quarters = U { vec: y }.quarters;
+ let z_quarters = U { vec: z }.quarters;
+
+ *x_quarters.get_unchecked_mut(0) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(0)),
+ transmute(*y_quarters.get_unchecked(0)),
+ transmute(*z_quarters.get_unchecked(0)),
+ ));
+
+ *x_quarters.get_unchecked_mut(1) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(1)),
+ transmute(*y_quarters.get_unchecked(1)),
+ transmute(*z_quarters.get_unchecked(1)),
+ ));
+
+ *x_quarters.get_unchecked_mut(2) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(2)),
+ transmute(*y_quarters.get_unchecked(2)),
+ transmute(*z_quarters.get_unchecked(2)),
+ ));
+
+ *x_quarters.get_unchecked_mut(3) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(3)),
+ transmute(*y_quarters.get_unchecked(3)),
+ transmute(*z_quarters.get_unchecked(3)),
+ ));
+
+ U { quarters: x_quarters }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun once on a vector twice as large:
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vect_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vec_id; 2],
+ twice: $vect_id,
+ }
+
+ let x_twice = U { vec: [self, uninitialized()] }.twice;
+ let y_twice = U { vec: [y, uninitialized()] }.twice;
+ let z_twice = U { vec: [z, uninitialized()] }.twice;
+ let twice: $vect_id =
+ transmute($fun(transmute(x_twice), transmute(y_twice), transmute(z_twice)));
+
+ *(U { twice }.vec.get_unchecked(0))
+ }
+ }
+ }
+ };
+}
+
+macro_rules! gen_tertiary_impl_table {
+ ($trait_id:ident, $trait_method:ident) => {
+ macro_rules! impl_tertiary {
+ ($vid:ident: $fun:ident) => {
+ impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+ impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun);
+ };
+ ($vid:ident[s]: $fun:ident) => {
+ impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl_tertiary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun);
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl_tertiary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun);
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl_tertiary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun);
+ };
+ }
+ };
+}
+
+macro_rules! impl_binary_ {
+ // implementation mapping 1:1
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self), transmute(y)))
+ }
+ }
+ }
+ };
+ (scalar | $trait_id:ident, $trait_method:ident,
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ union U {
+ vec: $vec_id,
+ scalars: [$sid; $scount],
+ }
+ let mut x = U { vec: self }.scalars;
+ let y = U { vec: y }.scalars;
+ for (x, y) in x.iter_mut().zip(&y) {
+ *x = $fun(*x, *y);
+ }
+ U { scalars: x }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun twice on each of the vector halves:
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vech_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ halves: [$vech_id; 2],
+ }
+
+ let mut x_halves = U { vec: self }.halves;
+ let y_halves = U { vec: y }.halves;
+
+ *x_halves.get_unchecked_mut(0) = transmute($fun(
+ transmute(*x_halves.get_unchecked(0)),
+ transmute(*y_halves.get_unchecked(0)),
+ ));
+ *x_halves.get_unchecked_mut(1) = transmute($fun(
+ transmute(*x_halves.get_unchecked(1)),
+ transmute(*y_halves.get_unchecked(1)),
+ ));
+
+ U { halves: x_halves }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun four times on each of the vector quarters:
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vecq_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ quarters: [$vecq_id; 4],
+ }
+
+ let mut x_quarters = U { vec: self }.quarters;
+ let y_quarters = U { vec: y }.quarters;
+
+ *x_quarters.get_unchecked_mut(0) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(0)),
+ transmute(*y_quarters.get_unchecked(0)),
+ ));
+
+ *x_quarters.get_unchecked_mut(1) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(1)),
+ transmute(*y_quarters.get_unchecked(1)),
+ ));
+
+ *x_quarters.get_unchecked_mut(2) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(2)),
+ transmute(*y_quarters.get_unchecked(2)),
+ ));
+
+ *x_quarters.get_unchecked_mut(3) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(3)),
+ transmute(*y_quarters.get_unchecked(3)),
+ ));
+
+ U { quarters: x_quarters }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun once on a vector twice as large:
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vect_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vec_id; 2],
+ twice: $vect_id,
+ }
+
+ let x_twice = U { vec: [self, uninitialized()] }.twice;
+ let y_twice = U { vec: [y, uninitialized()] }.twice;
+ let twice: $vect_id = transmute($fun(transmute(x_twice), transmute(y_twice)));
+
+ *(U { twice }.vec.get_unchecked(0))
+ }
+ }
+ }
+ };
+}
+
+macro_rules! gen_binary_impl_table {
+ ($trait_id:ident, $trait_method:ident) => {
+ macro_rules! impl_binary {
+ ($vid:ident: $fun:ident) => {
+ impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+ impl_binary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun);
+ };
+ ($vid:ident[s]: $fun:ident) => {
+ impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl_binary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun);
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl_binary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun);
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl_binary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun);
+ };
+ }
+ };
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/mul_add.rs b/vendor/packed_simd/src/codegen/math/float/mul_add.rs
new file mode 100644
index 000000000..d37f30fa8
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/mul_add.rs
@@ -0,0 +1,109 @@
+//! Vertical floating-point `mul_add`
+#![allow(unused)]
+use crate::*;
+
+// FIXME: 64-bit 1 element mul_add
+
+pub(crate) trait MulAdd {
+ fn mul_add(self, y: Self, z: Self) -> Self;
+}
+
+#[cfg(not(target_arch = "s390x"))]
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.fma.v2f32"]
+ fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
+ #[link_name = "llvm.fma.v4f32"]
+ fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
+ #[link_name = "llvm.fma.v8f32"]
+ fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
+ #[link_name = "llvm.fma.v16f32"]
+ fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
+ /* FIXME 64-bit single elem vectors
+ #[link_name = "llvm.fma.v1f64"]
+ fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.fma.v2f64"]
+ fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
+ #[link_name = "llvm.fma.v4f64"]
+ fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
+ #[link_name = "llvm.fma.v8f64"]
+ fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
+}
+
+gen_tertiary_impl_table!(MulAdd, mul_add);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ macro_rules! impl_broken {
+ ($id:ident) => {
+ impl MulAdd for $id {
+ #[inline]
+ fn mul_add(self, y: Self, z: Self) -> Self {
+ self * y + z
+ }
+ }
+ };
+ }
+
+ impl_broken!(f32x2);
+ impl_broken!(f32x4);
+ impl_broken!(f32x8);
+ impl_broken!(f32x16);
+
+ impl_broken!(f64x2);
+ impl_broken!(f64x4);
+ impl_broken!(f64x8);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128);
+ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2);
+ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2);
+
+ impl_tertiary!(f32x4: Sleef_fmaf4_avx2128);
+ impl_tertiary!(f32x8: Sleef_fmaf8_avx2);
+ impl_tertiary!(f64x2: Sleef_fmad2_avx2128);
+ impl_tertiary!(f64x4: Sleef_fmad4_avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
+ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx);
+ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx);
+
+ impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
+ impl_tertiary!(f32x8: Sleef_fmaf8_avx);
+ impl_tertiary!(f64x2: Sleef_fmad2_sse4);
+ impl_tertiary!(f64x4: Sleef_fmad4_avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
+ impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4);
+ impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4);
+
+ impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
+ impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4);
+ impl_tertiary!(f64x2: Sleef_fmad2_sse4);
+ impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4);
+ } else {
+ impl_tertiary!(f32x2: fma_v2f32);
+ impl_tertiary!(f32x16: fma_v16f32);
+ impl_tertiary!(f64x8: fma_v8f64);
+
+ impl_tertiary!(f32x4: fma_v4f32);
+ impl_tertiary!(f32x8: fma_v8f32);
+ impl_tertiary!(f64x2: fma_v2f64);
+ impl_tertiary!(f64x4: fma_v4f64);
+ }
+ }
+ } else {
+ impl_tertiary!(f32x2: fma_v2f32);
+ impl_tertiary!(f32x4: fma_v4f32);
+ impl_tertiary!(f32x8: fma_v8f32);
+ impl_tertiary!(f32x16: fma_v16f32);
+ // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
+ impl_tertiary!(f64x2: fma_v2f64);
+ impl_tertiary!(f64x4: fma_v4f64);
+ impl_tertiary!(f64x8: fma_v8f64);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/mul_adde.rs b/vendor/packed_simd/src/codegen/math/float/mul_adde.rs
new file mode 100644
index 000000000..c0baeacec
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/mul_adde.rs
@@ -0,0 +1,60 @@
+//! Approximation for floating-point `mul_add`
+use crate::*;
+
+// FIXME: 64-bit 1 element mul_adde
+
+pub(crate) trait MulAddE {
+ fn mul_adde(self, y: Self, z: Self) -> Self;
+}
+
+#[cfg(not(target_arch = "s390x"))]
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.fmuladd.v2f32"]
+ fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
+ #[link_name = "llvm.fmuladd.v4f32"]
+ fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
+ #[link_name = "llvm.fmuladd.v8f32"]
+ fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
+ #[link_name = "llvm.fmuladd.v16f32"]
+ fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
+ /* FIXME 64-bit single elem vectors
+ #[link_name = "llvm.fmuladd.v1f64"]
+ fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.fmuladd.v2f64"]
+ fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
+ #[link_name = "llvm.fmuladd.v4f64"]
+ fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
+ #[link_name = "llvm.fmuladd.v8f64"]
+ fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
+}
+
+macro_rules! impl_mul_adde {
+ ($id:ident : $fn:ident) => {
+ impl MulAddE for $id {
+ #[inline]
+ fn mul_adde(self, y: Self, z: Self) -> Self {
+ #[cfg(not(target_arch = "s390x"))]
+ {
+ use crate::mem::transmute;
+ unsafe { transmute($fn(transmute(self), transmute(y), transmute(z))) }
+ }
+ #[cfg(target_arch = "s390x")]
+ {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ self * y + z
+ }
+ }
+ }
+ };
+}
+
+impl_mul_adde!(f32x2: fmuladd_v2f32);
+impl_mul_adde!(f32x4: fmuladd_v4f32);
+impl_mul_adde!(f32x8: fmuladd_v8f32);
+impl_mul_adde!(f32x16: fmuladd_v16f32);
+// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
+impl_mul_adde!(f64x2: fmuladd_v2f64);
+impl_mul_adde!(f64x4: fmuladd_v4f64);
+impl_mul_adde!(f64x8: fmuladd_v8f64);
diff --git a/vendor/packed_simd/src/codegen/math/float/powf.rs b/vendor/packed_simd/src/codegen/math/float/powf.rs
new file mode 100644
index 000000000..89ca52e96
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/powf.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `powf`
+#![allow(unused)]
+
+// FIXME 64-bit powfgle elem vectors mispowfg
+
+use crate::*;
+
+pub(crate) trait Powf {
+ fn powf(self, x: Self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.pow.v2f32"]
+ fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2;
+ #[link_name = "llvm.pow.v4f32"]
+ fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4;
+ #[link_name = "llvm.pow.v8f32"]
+ fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8;
+ #[link_name = "llvm.pow.v16f32"]
+ fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16;
+ /* FIXME 64-bit powfgle elem vectors
+ #[link_name = "llvm.pow.v1f64"]
+ fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.pow.v2f64"]
+ fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2;
+ #[link_name = "llvm.pow.v4f64"]
+ fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4;
+ #[link_name = "llvm.pow.v8f64"]
+ fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8;
+
+ #[link_name = "llvm.pow.f32"]
+ fn powf_f32(x: f32, y: f32) -> f32;
+ #[link_name = "llvm.pow.f64"]
+ fn powf_f64(x: f64, y: f64) -> f64;
+}
+
+gen_binary_impl_table!(Powf, powf);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_binary!(f32x2[f32; 2]: powf_f32);
+ impl_binary!(f32x4[f32; 4]: powf_f32);
+ impl_binary!(f32x8[f32; 8]: powf_f32);
+ impl_binary!(f32x16[f32; 16]: powf_f32);
+
+ impl_binary!(f64x2[f64; 2]: powf_f64);
+ impl_binary!(f64x4[f64; 4]: powf_f64);
+ impl_binary!(f64x8[f64; 8]: powf_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128);
+ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2);
+ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2);
+
+ impl_binary!(f32x4: Sleef_powf4_u10avx2128);
+ impl_binary!(f32x8: Sleef_powf8_u10avx2);
+ impl_binary!(f64x2: Sleef_powd2_u10avx2128);
+ impl_binary!(f64x4: Sleef_powd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4);
+ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx);
+ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx);
+
+ impl_binary!(f32x4: Sleef_powf4_u10sse4);
+ impl_binary!(f32x8: Sleef_powf8_u10avx);
+ impl_binary!(f64x2: Sleef_powd2_u10sse4);
+ impl_binary!(f64x4: Sleef_powd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4);
+ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4);
+ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4);
+
+ impl_binary!(f32x4: Sleef_powf4_u10sse4);
+ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4);
+ impl_binary!(f64x2: Sleef_powd2_u10sse4);
+ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4);
+ } else if #[cfg(target_feature = "sse2")] {
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2);
+ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2);
+ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2);
+
+ impl_binary!(f32x4: Sleef_powf4_u10sse2);
+ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2);
+ impl_binary!(f64x2: Sleef_powd2_u10sse2);
+ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2);
+ } else {
+ impl_binary!(f32x2[f32; 2]: powf_f32);
+ impl_binary!(f32x4: powf_v4f32);
+ impl_binary!(f32x8: powf_v8f32);
+ impl_binary!(f32x16: powf_v16f32);
+
+ impl_binary!(f64x2: powf_v2f64);
+ impl_binary!(f64x4: powf_v4f64);
+ impl_binary!(f64x8: powf_v8f64);
+ }
+ }
+ } else {
+ impl_binary!(f32x2[f32; 2]: powf_f32);
+ impl_binary!(f32x4: powf_v4f32);
+ impl_binary!(f32x8: powf_v8f32);
+ impl_binary!(f32x16: powf_v16f32);
+
+ impl_binary!(f64x2: powf_v2f64);
+ impl_binary!(f64x4: powf_v4f64);
+ impl_binary!(f64x8: powf_v8f64);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/sin.rs b/vendor/packed_simd/src/codegen/math/float/sin.rs
new file mode 100644
index 000000000..d88141590
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/sin.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `sin`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin
+
+use crate::*;
+
+pub(crate) trait Sin {
+ fn sin(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.sin.v2f32"]
+ fn sin_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.sin.v4f32"]
+ fn sin_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.sin.v8f32"]
+ fn sin_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.sin.v16f32"]
+ fn sin_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit single elem vectors
+ #[link_name = "llvm.sin.v1f64"]
+ fn sin_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.sin.v2f64"]
+ fn sin_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.sin.v4f64"]
+ fn sin_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.sin.v8f64"]
+ fn sin_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.sin.f32"]
+ fn sin_f32(x: f32) -> f32;
+ #[link_name = "llvm.sin.f64"]
+ fn sin_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Sin, sin);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: sin_f32);
+ impl_unary!(f32x4[f32; 4]: sin_f32);
+ impl_unary!(f32x8[f32; 8]: sin_f32);
+ impl_unary!(f32x16[f32; 16]: sin_f32);
+
+ impl_unary!(f64x2[f64; 2]: sin_f64);
+ impl_unary!(f64x4[f64; 4]: sin_f64);
+ impl_unary!(f64x8[f64; 8]: sin_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_sinf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_sinf8_u10avx2);
+ impl_unary!(f64x2: Sleef_sind2_u10avx2128);
+ impl_unary!(f64x4: Sleef_sind4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx);
+
+ impl_unary!(f32x4: Sleef_sinf4_u10sse4);
+ impl_unary!(f32x8: Sleef_sinf8_u10avx);
+ impl_unary!(f64x2: Sleef_sind2_u10sse4);
+ impl_unary!(f64x4: Sleef_sind4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_sinf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4);
+ impl_unary!(f64x2: Sleef_sind2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: sin_f32);
+ impl_unary!(f32x16: sin_v16f32);
+ impl_unary!(f64x8: sin_v8f64);
+
+ impl_unary!(f32x4: sin_v4f32);
+ impl_unary!(f32x8: sin_v8f32);
+ impl_unary!(f64x2: sin_v2f64);
+ impl_unary!(f64x4: sin_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: sin_f32);
+ impl_unary!(f32x4: sin_v4f32);
+ impl_unary!(f32x8: sin_v8f32);
+ impl_unary!(f32x16: sin_v16f32);
+
+ impl_unary!(f64x2: sin_v2f64);
+ impl_unary!(f64x4: sin_v4f64);
+ impl_unary!(f64x8: sin_v8f64);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/sin_cos_pi.rs b/vendor/packed_simd/src/codegen/math/float/sin_cos_pi.rs
new file mode 100644
index 000000000..b283d1111
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/sin_cos_pi.rs
@@ -0,0 +1,188 @@
+//! Vertical floating-point `sin_cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin_cos
+
+use crate::*;
+
+pub(crate) trait SinCosPi: Sized {
+ type Output;
+ fn sin_cos_pi(self) -> Self::Output;
+}
+
+macro_rules! impl_def {
+ ($vid:ident, $PI:path) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ #[inline]
+ fn sin_cos_pi(self) -> Self::Output {
+ let v = self * Self::splat($PI);
+ (v.sin(), v.cos())
+ }
+ }
+ };
+}
+
+macro_rules! impl_def32 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f32::consts::PI);
+ };
+}
+macro_rules! impl_def64 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f64::consts::PI);
+ };
+}
+
+macro_rules! impl_unary_t {
+ ($vid:ident: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self)))
+ }
+ }
+ }
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vid; 2],
+ twice: $vid_t,
+ }
+
+ let twice = U { vec: [self, uninitialized()] }.twice;
+ let twice = transmute($fun(transmute(twice)));
+
+ union R {
+ twice: ($vid_t, $vid_t),
+ vecs: ([$vid; 2], [$vid; 2]),
+ }
+ let r = R { twice }.vecs;
+ (*r.0.get_unchecked(0), *r.0.get_unchecked(1))
+ }
+ }
+ }
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+
+ union U {
+ vec: $vid,
+ halves: [$vid_h; 2],
+ }
+
+ let halves = U { vec: self }.halves;
+
+ let res_0: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(0))));
+ let res_1: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(1))));
+
+ union R {
+ result: ($vid, $vid),
+ halves: ([$vid_h; 2], [$vid_h; 2]),
+ }
+ R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) }.result
+ }
+ }
+ }
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+
+ union U {
+ vec: $vid,
+ quarters: [$vid_q; 4],
+ }
+
+ let quarters = U { vec: self }.quarters;
+
+ let res_0: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(0))));
+ let res_1: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(1))));
+ let res_2: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(2))));
+ let res_3: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(3))));
+
+ union R {
+ result: ($vid, $vid),
+ quarters: ([$vid_q; 4], [$vid_q; 4]),
+ }
+ R {
+ quarters: (
+ [res_0.0, res_1.0, res_2.0, res_3.0],
+ [res_0.1, res_1.1, res_2.1, res_3.1],
+ ),
+ }
+ .result
+ }
+ }
+ }
+ };
+}
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128);
+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2);
+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128);
+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128);
+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx);
+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
+ impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4);
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+ }
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/sin_pi.rs b/vendor/packed_simd/src/codegen/math/float/sin_pi.rs
new file mode 100644
index 000000000..0c8f6bb12
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/sin_pi.rs
@@ -0,0 +1,87 @@
+//! Vertical floating-point `sin_pi`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin_pi
+
+use crate::*;
+
+pub(crate) trait SinPi {
+ fn sin_pi(self) -> Self;
+}
+
+gen_unary_impl_table!(SinPi, sin_pi);
+
+macro_rules! impl_def {
+ ($vid:ident, $PI:path) => {
+ impl SinPi for $vid {
+ #[inline]
+ fn sin_pi(self) -> Self {
+ (self * Self::splat($PI)).sin()
+ }
+ }
+ };
+}
+macro_rules! impl_def32 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f32::consts::PI);
+ };
+}
+macro_rules! impl_def64 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f64::consts::PI);
+ };
+}
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2);
+
+ impl_unary!(f32x4: Sleef_sinpif4_u05avx2128);
+ impl_unary!(f32x8: Sleef_sinpif8_u05avx2);
+ impl_unary!(f64x2: Sleef_sinpid2_u05avx2128);
+ impl_unary!(f64x4: Sleef_sinpid4_u05avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx);
+
+ impl_unary!(f32x4: Sleef_sinpif4_u05sse4);
+ impl_unary!(f32x8: Sleef_sinpif8_u05avx);
+ impl_unary!(f64x2: Sleef_sinpid2_u05sse4);
+ impl_unary!(f64x4: Sleef_sinpid4_u05avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4);
+
+ impl_unary!(f32x4: Sleef_sinpif4_u05sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4);
+ impl_unary!(f64x2: Sleef_sinpid2_u05sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4);
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+ }
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/sqrt.rs b/vendor/packed_simd/src/codegen/math/float/sqrt.rs
new file mode 100644
index 000000000..67bb0a2a9
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/sqrt.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `sqrt`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sqrt
+
+use crate::*;
+
+pub(crate) trait Sqrt {
+ fn sqrt(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.sqrt.v2f32"]
+ fn sqrt_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.sqrt.v4f32"]
+ fn sqrt_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.sqrt.v8f32"]
+ fn sqrt_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.sqrt.v16f32"]
+ fn sqrt_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit sqrtgle elem vectors
+ #[link_name = "llvm.sqrt.v1f64"]
+ fn sqrt_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.sqrt.v2f64"]
+ fn sqrt_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.sqrt.v4f64"]
+ fn sqrt_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.sqrt.v8f64"]
+ fn sqrt_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.sqrt.f32"]
+ fn sqrt_f32(x: f32) -> f32;
+ #[link_name = "llvm.sqrt.f64"]
+ fn sqrt_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Sqrt, sqrt);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
+ impl_unary!(f32x4[f32; 4]: sqrt_f32);
+ impl_unary!(f32x8[f32; 8]: sqrt_f32);
+ impl_unary!(f32x16[f32; 16]: sqrt_f32);
+
+ impl_unary!(f64x2[f64; 2]: sqrt_f64);
+ impl_unary!(f64x4[f64; 4]: sqrt_f64);
+ impl_unary!(f64x8[f64; 8]: sqrt_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_avx2128);
+ impl_unary!(f32x8: Sleef_sqrtf8_avx2);
+ impl_unary!(f64x2: Sleef_sqrtd2_avx2128);
+ impl_unary!(f64x4: Sleef_sqrtd4_avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_sse4);
+ impl_unary!(f32x8: Sleef_sqrtf8_avx);
+ impl_unary!(f64x2: Sleef_sqrtd2_sse4);
+ impl_unary!(f64x4: Sleef_sqrtd4_avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4);
+ impl_unary!(f64x2: Sleef_sqrtd2_sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
+ impl_unary!(f32x16: sqrt_v16f32);
+ impl_unary!(f64x8: sqrt_v8f64);
+
+ impl_unary!(f32x4: sqrt_v4f32);
+ impl_unary!(f32x8: sqrt_v8f32);
+ impl_unary!(f64x2: sqrt_v2f64);
+ impl_unary!(f64x4: sqrt_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
+ impl_unary!(f32x4: sqrt_v4f32);
+ impl_unary!(f32x8: sqrt_v8f32);
+ impl_unary!(f32x16: sqrt_v16f32);
+
+ impl_unary!(f64x2: sqrt_v2f64);
+ impl_unary!(f64x4: sqrt_v4f64);
+ impl_unary!(f64x8: sqrt_v8f64);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/sqrte.rs b/vendor/packed_simd/src/codegen/math/float/sqrte.rs
new file mode 100644
index 000000000..58a1de1f4
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/sqrte.rs
@@ -0,0 +1,67 @@
+//! Vertical floating-point `sqrt`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sqrte
+
+use crate::llvm::simd_fsqrt;
+use crate::*;
+
+pub(crate) trait Sqrte {
+ fn sqrte(self) -> Self;
+}
+
+gen_unary_impl_table!(Sqrte, sqrte);
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128);
+ impl_unary!(f32x8: Sleef_sqrtf8_u35avx2);
+ impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128);
+ impl_unary!(f64x4: Sleef_sqrtd4_u35avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f32x8: Sleef_sqrtf8_u35avx);
+ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4);
+ impl_unary!(f64x4: Sleef_sqrtd4_u35avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4);
+ } else {
+ impl_unary!(f32x2[g]: simd_fsqrt);
+ impl_unary!(f32x16[g]: simd_fsqrt);
+ impl_unary!(f64x8[g]: simd_fsqrt);
+
+ impl_unary!(f32x4[g]: simd_fsqrt);
+ impl_unary!(f32x8[g]: simd_fsqrt);
+ impl_unary!(f64x2[g]: simd_fsqrt);
+ impl_unary!(f64x4[g]: simd_fsqrt);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[g]: simd_fsqrt);
+ impl_unary!(f32x4[g]: simd_fsqrt);
+ impl_unary!(f32x8[g]: simd_fsqrt);
+ impl_unary!(f32x16[g]: simd_fsqrt);
+
+ impl_unary!(f64x2[g]: simd_fsqrt);
+ impl_unary!(f64x4[g]: simd_fsqrt);
+ impl_unary!(f64x8[g]: simd_fsqrt);
+ }
+}
diff --git a/vendor/packed_simd/src/codegen/math/float/tanh.rs b/vendor/packed_simd/src/codegen/math/float/tanh.rs
new file mode 100644
index 000000000..4243b0d88
--- /dev/null
+++ b/vendor/packed_simd/src/codegen/math/float/tanh.rs
@@ -0,0 +1,120 @@
+//! Vertical floating-point `tanh`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors tanh
+
+#[cfg(not(feature = "std"))]
+use num_traits::Float;
+
+use crate::*;
+
+pub(crate) trait Tanh {
+ fn tanh(self) -> Self;
+}
+
+macro_rules! define_tanh {
+ ($name:ident, $basetype:ty, $simdtype:ty, $lanes:expr, $trait:path) => {
+ fn $name(x: $simdtype) -> $simdtype {
+ use core::intrinsics::transmute;
+ let mut buf: [$basetype; $lanes] = unsafe { transmute(x) };
+ for elem in &mut buf {
+ *elem = <$basetype as $trait>::tanh(*elem);
+ }
+ unsafe { transmute(buf) }
+ }
+ };
+
+ (f32 => $name:ident, $type:ty, $lanes:expr) => {
+ define_tanh!($name, f32, $type, $lanes, Float);
+ };
+
+ (f64 => $name:ident, $type:ty, $lanes:expr) => {
+ define_tanh!($name, f64, $type, $lanes, Float);
+ };
+}
+
+// llvm does not seem to expose the hyperbolic versions of trigonometric
+// functions; we thus call the classical rust versions on all of them (which
+// stem from cmath).
+define_tanh!(f32 => tanh_v2f32, f32x2, 2);
+define_tanh!(f32 => tanh_v4f32, f32x4, 4);
+define_tanh!(f32 => tanh_v8f32, f32x8, 8);
+define_tanh!(f32 => tanh_v16f32, f32x16, 16);
+
+define_tanh!(f64 => tanh_v2f64, f64x2, 2);
+define_tanh!(f64 => tanh_v4f64, f64x4, 4);
+define_tanh!(f64 => tanh_v8f64, f64x8, 8);
+
+fn tanh_f32(x: f32) -> f32 {
+ Float::tanh(x)
+}
+
+fn tanh_f64(x: f64) -> f64 {
+ Float::tanh(x)
+}
+
+gen_unary_impl_table!(Tanh, tanh);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: tanh_f32);
+ impl_unary!(f32x4[f32; 4]: tanh_f32);
+ impl_unary!(f32x8[f32; 8]: tanh_f32);
+ impl_unary!(f32x16[f32; 16]: tanh_f32);
+
+ impl_unary!(f64x2[f64; 2]: tanh_f64);
+ impl_unary!(f64x4[f64; 4]: tanh_f64);
+ impl_unary!(f64x8[f64; 8]: tanh_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_tanhf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_tanhd4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_tanhf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_tanhf8_u10avx2);
+ impl_unary!(f64x2: Sleef_tanhd2_u10avx2128);
+ impl_unary!(f64x4: Sleef_tanhd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_tanhf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_tanhd4_u10avx);
+
+ impl_unary!(f32x4: Sleef_tanhf4_u10sse4);
+ impl_unary!(f32x8: Sleef_tanhf8_u10avx);
+ impl_unary!(f64x2: Sleef_tanhd2_u10sse4);
+ impl_unary!(f64x4: Sleef_tanhd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_tanhf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_tanhd2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_tanhf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_tanhf4_u10sse4);
+ impl_unary!(f64x2: Sleef_tanhd2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_tanhd2_u10sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: tanh_f32);
+ impl_unary!(f32x16: tanh_v16f32);
+ impl_unary!(f64x8: tanh_v8f64);
+
+ impl_unary!(f32x4: tanh_v4f32);
+ impl_unary!(f32x8: tanh_v8f32);
+ impl_unary!(f64x2: tanh_v2f64);
+ impl_unary!(f64x4: tanh_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: tanh_f32);
+ impl_unary!(f32x4: tanh_v4f32);
+ impl_unary!(f32x8: tanh_v8f32);
+ impl_unary!(f32x16: tanh_v16f32);
+
+ impl_unary!(f64x2: tanh_v2f64);
+ impl_unary!(f64x4: tanh_v4f64);
+ impl_unary!(f64x8: tanh_v8f64);
+ }
+}