diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
commit | 9835e2ae736235810b4ea1c162ca5e65c547e770 (patch) | |
tree | 3fcebf40ed70e581d776a8a4c65923e8ec20e026 /vendor/ppv-lite86/src/soft.rs | |
parent | Releasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff) | |
download | rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip |
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ppv-lite86/src/soft.rs')
-rw-r--r-- | vendor/ppv-lite86/src/soft.rs | 92 |
1 files changed, 72 insertions, 20 deletions
diff --git a/vendor/ppv-lite86/src/soft.rs b/vendor/ppv-lite86/src/soft.rs index d12dac528..0ae390c44 100644 --- a/vendor/ppv-lite86/src/soft.rs +++ b/vendor/ppv-lite86/src/soft.rs @@ -1,9 +1,9 @@ //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD. -use core::marker::PhantomData; -use core::ops::*; use crate::types::*; use crate::{vec128_storage, vec256_storage, vec512_storage}; +use core::marker::PhantomData; +use core::ops::*; #[derive(Copy, Clone, Default)] #[allow(non_camel_case_types)] @@ -175,26 +175,50 @@ impl<W: BSwap + Copy, G> BSwap for x2<W, G> { impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> { #[inline(always)] unsafe fn unsafe_read_le(input: &[u8]) -> Self { - let input = input.split_at(16); + let input = input.split_at(input.len() / 2); x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)]) } #[inline(always)] unsafe fn unsafe_read_be(input: &[u8]) -> Self { - x2::unsafe_read_le(input).bswap() + let input = input.split_at(input.len() / 2); + x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)]) } #[inline(always)] fn write_le(self, out: &mut [u8]) { - let out = out.split_at_mut(16); + let out = out.split_at_mut(out.len() / 2); self.0[0].write_le(out.0); self.0[1].write_le(out.1); } #[inline(always)] fn write_be(self, out: &mut [u8]) { - let out = out.split_at_mut(16); + let out = out.split_at_mut(out.len() / 2); self.0[0].write_be(out.0); self.0[1].write_be(out.1); } } +impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> { + #[inline(always)] + fn shuffle_lane_words2301(self) -> Self { + Self::new([ + self.0[0].shuffle_lane_words2301(), + self.0[1].shuffle_lane_words2301(), + ]) + } + #[inline(always)] + fn shuffle_lane_words1230(self) -> Self { + Self::new([ + self.0[0].shuffle_lane_words1230(), + self.0[1].shuffle_lane_words1230(), + ]) + } + #[inline(always)] + fn shuffle_lane_words3012(self) -> Self { + Self::new([ + self.0[0].shuffle_lane_words3012(), + self.0[1].shuffle_lane_words3012(), + ]) + } +} #[derive(Copy, Clone, Default)] #[allow(non_camel_case_types)] @@ -238,7 +262,12 @@ macro_rules! fwd_unop_x4 { ($fn:ident) => { #[inline(always)] fn $fn(self) -> Self { - x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()]) + x4([ + self.0[0].$fn(), + self.0[1].$fn(), + self.0[2].$fn(), + self.0[3].$fn(), + ]) } }; } @@ -305,6 +334,20 @@ impl<W: Copy> Vec4<W> for x4<W> { self } } +impl<W: Copy> Vec4Ext<W> for x4<W> { + #[inline(always)] + fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) + where + Self: Sized, + { + ( + x4([a.0[0], b.0[0], c.0[0], d.0[0]]), + x4([a.0[1], b.0[1], c.0[1], d.0[1]]), + x4([a.0[2], b.0[2], c.0[2], d.0[2]]), + x4([a.0[3], b.0[3], c.0[3], d.0[3]]), + ) + } +} impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> { #[inline(always)] unsafe fn unpack(p: vec512_storage) -> Self { @@ -363,30 +406,39 @@ impl<W: BSwap + Copy> BSwap for x4<W> { impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> { #[inline(always)] unsafe fn unsafe_read_le(input: &[u8]) -> Self { + let n = input.len() / 4; x4([ - W::unsafe_read_le(&input[0..16]), - W::unsafe_read_le(&input[16..32]), - W::unsafe_read_le(&input[32..48]), - W::unsafe_read_le(&input[48..64]), + W::unsafe_read_le(&input[..n]), + W::unsafe_read_le(&input[n..n * 2]), + W::unsafe_read_le(&input[n * 2..n * 3]), + W::unsafe_read_le(&input[n * 3..]), ]) } #[inline(always)] unsafe fn unsafe_read_be(input: &[u8]) -> Self { - x4::unsafe_read_le(input).bswap() + let n = input.len() / 4; + x4([ + W::unsafe_read_be(&input[..n]), + W::unsafe_read_be(&input[n..n * 2]), + W::unsafe_read_be(&input[n * 2..n * 3]), + W::unsafe_read_be(&input[n * 3..]), + ]) } #[inline(always)] fn write_le(self, out: &mut [u8]) { - self.0[0].write_le(&mut out[0..16]); - self.0[1].write_le(&mut out[16..32]); - self.0[2].write_le(&mut out[32..48]); - self.0[3].write_le(&mut out[48..64]); + let n = out.len() / 4; + self.0[0].write_le(&mut out[..n]); + self.0[1].write_le(&mut out[n..n * 2]); + self.0[2].write_le(&mut out[n * 2..n * 3]); + self.0[3].write_le(&mut out[n * 3..]); } #[inline(always)] fn write_be(self, out: &mut [u8]) { - self.0[0].write_be(&mut out[0..16]); - self.0[1].write_be(&mut out[16..32]); - self.0[2].write_be(&mut out[32..48]); - self.0[3].write_be(&mut out[48..64]); + let n = out.len() / 4; + self.0[0].write_be(&mut out[..n]); + self.0[1].write_be(&mut out[n..n * 2]); + self.0[2].write_be(&mut out[n * 2..n * 3]); + self.0[3].write_be(&mut out[n * 3..]); } } impl<W: Copy + LaneWords4> LaneWords4 for x4<W> { |