summaryrefslogtreecommitdiffstats
path: root/vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs')
-rw-r--r--vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs195
1 files changed, 195 insertions, 0 deletions
diff --git a/vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs b/vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs
new file mode 100644
index 000000000..0f1249ec8
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/math/float/sin_cos_pi.rs
@@ -0,0 +1,195 @@
+//! Vertical floating-point `sin_cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin_cos
+
+use crate::*;
+
+crate trait SinCosPi: Sized {
+ type Output;
+ fn sin_cos_pi(self) -> Self::Output;
+}
+
+macro_rules! impl_def {
+ ($vid:ident, $PI:path) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ #[inline]
+ fn sin_cos_pi(self) -> Self::Output {
+ let v = self * Self::splat($PI);
+ (v.sin(), v.cos())
+ }
+ }
+ };
+}
+
+macro_rules! impl_def32 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f32::consts::PI);
+ };
+}
+macro_rules! impl_def64 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f64::consts::PI);
+ };
+}
+
+macro_rules! impl_unary_t {
+ ($vid:ident: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self)))
+ }
+ }
+ }
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vid; 2],
+ twice: $vid_t,
+ }
+
+ let twice = U { vec: [self, uninitialized()] }.twice;
+ let twice = transmute($fun(transmute(twice)));
+
+ union R {
+ twice: ($vid_t, $vid_t),
+ vecs: ([$vid; 2], [$vid; 2]),
+ }
+ let r = R { twice }.vecs;
+ (*r.0.get_unchecked(0), *r.0.get_unchecked(1))
+ }
+ }
+ }
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+
+ union U {
+ vec: $vid,
+ halves: [$vid_h; 2],
+ }
+
+ let halves = U { vec: self }.halves;
+
+ let res_0: ($vid_h, $vid_h) =
+ transmute($fun(transmute(*halves.get_unchecked(0))));
+ let res_1: ($vid_h, $vid_h) =
+ transmute($fun(transmute(*halves.get_unchecked(1))));
+
+ union R {
+ result: ($vid, $vid),
+ halves: ([$vid_h; 2], [$vid_h; 2]),
+ }
+ R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) }
+ .result
+ }
+ }
+ }
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+
+ union U {
+ vec: $vid,
+ quarters: [$vid_q; 4],
+ }
+
+ let quarters = U { vec: self }.quarters;
+
+ let res_0: ($vid_q, $vid_q) =
+ transmute($fun(transmute(*quarters.get_unchecked(0))));
+ let res_1: ($vid_q, $vid_q) =
+ transmute($fun(transmute(*quarters.get_unchecked(1))));
+ let res_2: ($vid_q, $vid_q) =
+ transmute($fun(transmute(*quarters.get_unchecked(2))));
+ let res_3: ($vid_q, $vid_q) =
+ transmute($fun(transmute(*quarters.get_unchecked(3))));
+
+ union R {
+ result: ($vid, $vid),
+ quarters: ([$vid_q; 4], [$vid_q; 4]),
+ }
+ R {
+ quarters: (
+ [res_0.0, res_1.0, res_2.0, res_3.0],
+ [res_0.1, res_1.1, res_2.1, res_3.1],
+ ),
+ }
+ .result
+ }
+ }
+ }
+ };
+}
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128);
+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2);
+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128);
+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128);
+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx);
+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
+ impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4);
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+ }
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+}