//! Vertical floating-point `sin_cos` #![allow(unused)] // FIXME 64-bit 1 elem vectors sin_cos use crate::*; crate trait SinCosPi: Sized { type Output; fn sin_cos_pi(self) -> Self::Output; } macro_rules! impl_def { ($vid:ident, $PI:path) => { impl SinCosPi for $vid { type Output = (Self, Self); #[inline] fn sin_cos_pi(self) -> Self::Output { let v = self * Self::splat($PI); (v.sin(), v.cos()) } } }; } macro_rules! impl_def32 { ($vid:ident) => { impl_def!($vid, crate::f32::consts::PI); }; } macro_rules! impl_def64 { ($vid:ident) => { impl_def!($vid, crate::f64::consts::PI); }; } macro_rules! impl_unary_t { ($vid:ident: $fun:ident) => { impl SinCosPi for $vid { type Output = (Self, Self); fn sin_cos_pi(self) -> Self::Output { unsafe { use crate::mem::transmute; transmute($fun(transmute(self))) } } } }; ($vid:ident[t => $vid_t:ident]: $fun:ident) => { impl SinCosPi for $vid { type Output = (Self, Self); fn sin_cos_pi(self) -> Self::Output { unsafe { use crate::mem::{transmute, uninitialized}; union U { vec: [$vid; 2], twice: $vid_t, } let twice = U { vec: [self, uninitialized()] }.twice; let twice = transmute($fun(transmute(twice))); union R { twice: ($vid_t, $vid_t), vecs: ([$vid; 2], [$vid; 2]), } let r = R { twice }.vecs; (*r.0.get_unchecked(0), *r.0.get_unchecked(1)) } } } }; ($vid:ident[h => $vid_h:ident]: $fun:ident) => { impl SinCosPi for $vid { type Output = (Self, Self); fn sin_cos_pi(self) -> Self::Output { unsafe { use crate::mem::transmute; union U { vec: $vid, halves: [$vid_h; 2], } let halves = U { vec: self }.halves; let res_0: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(0)))); let res_1: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(1)))); union R { result: ($vid, $vid), halves: ([$vid_h; 2], [$vid_h; 2]), } R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) } .result } } } }; ($vid:ident[q => $vid_q:ident]: $fun:ident) => { impl SinCosPi for $vid { type Output = (Self, Self); fn sin_cos_pi(self) -> Self::Output { unsafe { use crate::mem::transmute; union U { vec: $vid, quarters: [$vid_q; 4], } let quarters = U { vec: self }.quarters; let res_0: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(0)))); let res_1: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(1)))); let res_2: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(2)))); let res_3: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(3)))); union R { result: ($vid, $vid), quarters: ([$vid_q; 4], [$vid_q; 4]), } R { quarters: ( [res_0.0, res_1.0, res_2.0, res_3.0], [res_0.1, res_1.1, res_2.1, res_3.1], ), } .result } } } }; } cfg_if! { if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128); impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2); impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2); impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128); impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2); impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128); impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2); } else if #[cfg(target_feature = "avx")] { impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx); impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx); impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); impl_unary_t!(f32x8: Sleef_sincospif8_u05avx); impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); impl_unary_t!(f64x4: Sleef_sincospid4_u05avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4); impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4); impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4); impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4); } else { impl_def32!(f32x2); impl_def32!(f32x4); impl_def32!(f32x8); impl_def32!(f32x16); impl_def64!(f64x2); impl_def64!(f64x4); impl_def64!(f64x8); } } } else { impl_def32!(f32x2); impl_def32!(f32x4); impl_def32!(f32x8); impl_def32!(f32x16); impl_def64!(f64x2); impl_def64!(f64x4); impl_def64!(f64x8); } }