//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD. use core::marker::PhantomData; use core::ops::*; use crate::types::*; use crate::{vec128_storage, vec256_storage, vec512_storage}; #[derive(Copy, Clone, Default)] #[allow(non_camel_case_types)] pub struct x2(pub [W; 2], PhantomData); impl x2 { #[inline(always)] pub fn new(xs: [W; 2]) -> Self { x2(xs, PhantomData) } } macro_rules! fwd_binop_x2 { ($trait:ident, $fn:ident) => { impl $trait for x2 { type Output = x2; #[inline(always)] fn $fn(self, rhs: Self) -> Self::Output { x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])]) } } }; } macro_rules! fwd_binop_assign_x2 { ($trait:ident, $fn_assign:ident) => { impl $trait for x2 { #[inline(always)] fn $fn_assign(&mut self, rhs: Self) { (self.0[0]).$fn_assign(rhs.0[0]); (self.0[1]).$fn_assign(rhs.0[1]); } } }; } macro_rules! fwd_unop_x2 { ($fn:ident) => { #[inline(always)] fn $fn(self) -> Self { x2::new([self.0[0].$fn(), self.0[1].$fn()]) } }; } impl RotateEachWord32 for x2 where W: Copy + RotateEachWord32, { fwd_unop_x2!(rotate_each_word_right7); fwd_unop_x2!(rotate_each_word_right8); fwd_unop_x2!(rotate_each_word_right11); fwd_unop_x2!(rotate_each_word_right12); fwd_unop_x2!(rotate_each_word_right16); fwd_unop_x2!(rotate_each_word_right20); fwd_unop_x2!(rotate_each_word_right24); fwd_unop_x2!(rotate_each_word_right25); } impl RotateEachWord64 for x2 where W: Copy + RotateEachWord64, { fwd_unop_x2!(rotate_each_word_right32); } impl RotateEachWord128 for x2 where W: RotateEachWord128 {} impl BitOps0 for x2 where W: BitOps0, G: Copy, { } impl BitOps32 for x2 where W: BitOps32 + BitOps0, G: Copy, { } impl BitOps64 for x2 where W: BitOps64 + BitOps0, G: Copy, { } impl BitOps128 for x2 where W: BitOps128 + BitOps0, G: Copy, { } fwd_binop_x2!(BitAnd, bitand); fwd_binop_x2!(BitOr, bitor); fwd_binop_x2!(BitXor, bitxor); fwd_binop_x2!(AndNot, andnot); fwd_binop_assign_x2!(BitAndAssign, bitand_assign); fwd_binop_assign_x2!(BitOrAssign, bitor_assign); fwd_binop_assign_x2!(BitXorAssign, bitxor_assign); impl ArithOps for x2 where W: ArithOps, G: Copy, { } fwd_binop_x2!(Add, add); fwd_binop_assign_x2!(AddAssign, add_assign); impl Not for x2 { type Output = x2; #[inline(always)] fn not(self) -> Self::Output { x2::new([self.0[0].not(), self.0[1].not()]) } } impl UnsafeFrom<[W; 2]> for x2 { #[inline(always)] unsafe fn unsafe_from(xs: [W; 2]) -> Self { x2::new(xs) } } impl Vec2 for x2 { #[inline(always)] fn extract(self, i: u32) -> W { self.0[i as usize] } #[inline(always)] fn insert(mut self, w: W, i: u32) -> Self { self.0[i as usize] = w; self } } impl, G> Store for x2 { #[inline(always)] unsafe fn unpack(p: vec256_storage) -> Self { let p = p.split128(); x2::new([W::unpack(p[0]), W::unpack(p[1])]) } } impl From> for vec256_storage where W: Copy, vec128_storage: From, { #[inline(always)] fn from(x: x2) -> Self { vec256_storage::new128([x.0[0].into(), x.0[1].into()]) } } impl Swap64 for x2 where W: Swap64 + Copy, { fwd_unop_x2!(swap1); fwd_unop_x2!(swap2); fwd_unop_x2!(swap4); fwd_unop_x2!(swap8); fwd_unop_x2!(swap16); fwd_unop_x2!(swap32); fwd_unop_x2!(swap64); } impl MultiLane<[W; 2]> for x2 { #[inline(always)] fn to_lanes(self) -> [W; 2] { self.0 } #[inline(always)] fn from_lanes(lanes: [W; 2]) -> Self { x2::new(lanes) } } impl BSwap for x2 { #[inline(always)] fn bswap(self) -> Self { x2::new([self.0[0].bswap(), self.0[1].bswap()]) } } impl StoreBytes for x2 { #[inline(always)] unsafe fn unsafe_read_le(input: &[u8]) -> Self { let input = input.split_at(16); x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)]) } #[inline(always)] unsafe fn unsafe_read_be(input: &[u8]) -> Self { x2::unsafe_read_le(input).bswap() } #[inline(always)] fn write_le(self, out: &mut [u8]) { let out = out.split_at_mut(16); self.0[0].write_le(out.0); self.0[1].write_le(out.1); } #[inline(always)] fn write_be(self, out: &mut [u8]) { let out = out.split_at_mut(16); self.0[0].write_be(out.0); self.0[1].write_be(out.1); } } #[derive(Copy, Clone, Default)] #[allow(non_camel_case_types)] pub struct x4(pub [W; 4]); impl x4 { #[inline(always)] pub fn new(xs: [W; 4]) -> Self { x4(xs) } } macro_rules! fwd_binop_x4 { ($trait:ident, $fn:ident) => { impl $trait for x4 { type Output = x4; #[inline(always)] fn $fn(self, rhs: Self) -> Self::Output { x4([ self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1]), self.0[2].$fn(rhs.0[2]), self.0[3].$fn(rhs.0[3]), ]) } } }; } macro_rules! fwd_binop_assign_x4 { ($trait:ident, $fn_assign:ident) => { impl $trait for x4 { #[inline(always)] fn $fn_assign(&mut self, rhs: Self) { self.0[0].$fn_assign(rhs.0[0]); self.0[1].$fn_assign(rhs.0[1]); self.0[2].$fn_assign(rhs.0[2]); self.0[3].$fn_assign(rhs.0[3]); } } }; } macro_rules! fwd_unop_x4 { ($fn:ident) => { #[inline(always)] fn $fn(self) -> Self { x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()]) } }; } impl RotateEachWord32 for x4 where W: Copy + RotateEachWord32, { fwd_unop_x4!(rotate_each_word_right7); fwd_unop_x4!(rotate_each_word_right8); fwd_unop_x4!(rotate_each_word_right11); fwd_unop_x4!(rotate_each_word_right12); fwd_unop_x4!(rotate_each_word_right16); fwd_unop_x4!(rotate_each_word_right20); fwd_unop_x4!(rotate_each_word_right24); fwd_unop_x4!(rotate_each_word_right25); } impl RotateEachWord64 for x4 where W: Copy + RotateEachWord64, { fwd_unop_x4!(rotate_each_word_right32); } impl RotateEachWord128 for x4 where W: RotateEachWord128 {} impl BitOps0 for x4 where W: BitOps0 {} impl BitOps32 for x4 where W: BitOps32 + BitOps0 {} impl BitOps64 for x4 where W: BitOps64 + BitOps0 {} impl BitOps128 for x4 where W: BitOps128 + BitOps0 {} fwd_binop_x4!(BitAnd, bitand); fwd_binop_x4!(BitOr, bitor); fwd_binop_x4!(BitXor, bitxor); fwd_binop_x4!(AndNot, andnot); fwd_binop_assign_x4!(BitAndAssign, bitand_assign); fwd_binop_assign_x4!(BitOrAssign, bitor_assign); fwd_binop_assign_x4!(BitXorAssign, bitxor_assign); impl ArithOps for x4 where W: ArithOps {} fwd_binop_x4!(Add, add); fwd_binop_assign_x4!(AddAssign, add_assign); impl Not for x4 { type Output = x4; #[inline(always)] fn not(self) -> Self::Output { x4([ self.0[0].not(), self.0[1].not(), self.0[2].not(), self.0[3].not(), ]) } } impl UnsafeFrom<[W; 4]> for x4 { #[inline(always)] unsafe fn unsafe_from(xs: [W; 4]) -> Self { x4(xs) } } impl Vec4 for x4 { #[inline(always)] fn extract(self, i: u32) -> W { self.0[i as usize] } #[inline(always)] fn insert(mut self, w: W, i: u32) -> Self { self.0[i as usize] = w; self } } impl> Store for x4 { #[inline(always)] unsafe fn unpack(p: vec512_storage) -> Self { let p = p.split128(); x4([ W::unpack(p[0]), W::unpack(p[1]), W::unpack(p[2]), W::unpack(p[3]), ]) } } impl From> for vec512_storage where W: Copy, vec128_storage: From, { #[inline(always)] fn from(x: x4) -> Self { vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()]) } } impl Swap64 for x4 where W: Swap64 + Copy, { fwd_unop_x4!(swap1); fwd_unop_x4!(swap2); fwd_unop_x4!(swap4); fwd_unop_x4!(swap8); fwd_unop_x4!(swap16); fwd_unop_x4!(swap32); fwd_unop_x4!(swap64); } impl MultiLane<[W; 4]> for x4 { #[inline(always)] fn to_lanes(self) -> [W; 4] { self.0 } #[inline(always)] fn from_lanes(lanes: [W; 4]) -> Self { x4(lanes) } } impl BSwap for x4 { #[inline(always)] fn bswap(self) -> Self { x4([ self.0[0].bswap(), self.0[1].bswap(), self.0[2].bswap(), self.0[3].bswap(), ]) } } impl StoreBytes for x4 { #[inline(always)] unsafe fn unsafe_read_le(input: &[u8]) -> Self { x4([ W::unsafe_read_le(&input[0..16]), W::unsafe_read_le(&input[16..32]), W::unsafe_read_le(&input[32..48]), W::unsafe_read_le(&input[48..64]), ]) } #[inline(always)] unsafe fn unsafe_read_be(input: &[u8]) -> Self { x4::unsafe_read_le(input).bswap() } #[inline(always)] fn write_le(self, out: &mut [u8]) { self.0[0].write_le(&mut out[0..16]); self.0[1].write_le(&mut out[16..32]); self.0[2].write_le(&mut out[32..48]); self.0[3].write_le(&mut out[48..64]); } #[inline(always)] fn write_be(self, out: &mut [u8]) { self.0[0].write_be(&mut out[0..16]); self.0[1].write_be(&mut out[16..32]); self.0[2].write_be(&mut out[32..48]); self.0[3].write_be(&mut out[48..64]); } } impl LaneWords4 for x4 { #[inline(always)] fn shuffle_lane_words2301(self) -> Self { x4([ self.0[0].shuffle_lane_words2301(), self.0[1].shuffle_lane_words2301(), self.0[2].shuffle_lane_words2301(), self.0[3].shuffle_lane_words2301(), ]) } #[inline(always)] fn shuffle_lane_words1230(self) -> Self { x4([ self.0[0].shuffle_lane_words1230(), self.0[1].shuffle_lane_words1230(), self.0[2].shuffle_lane_words1230(), self.0[3].shuffle_lane_words1230(), ]) } #[inline(always)] fn shuffle_lane_words3012(self) -> Self { x4([ self.0[0].shuffle_lane_words3012(), self.0[1].shuffle_lane_words3012(), self.0[2].shuffle_lane_words3012(), self.0[3].shuffle_lane_words3012(), ]) } }