summaryrefslogtreecommitdiffstats
path: root/vendor/ppv-lite86/src/generic.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/ppv-lite86/src/generic.rs')
-rw-r--r--vendor/ppv-lite86/src/generic.rs158
1 files changed, 96 insertions, 62 deletions
diff --git a/vendor/ppv-lite86/src/generic.rs b/vendor/ppv-lite86/src/generic.rs
index 4f4113fc3..add6c4856 100644
--- a/vendor/ppv-lite86/src/generic.rs
+++ b/vendor/ppv-lite86/src/generic.rs
@@ -1,50 +1,50 @@
#![allow(non_camel_case_types)]
-use core::ops::*;
use crate::soft::{x2, x4};
use crate::types::*;
+use core::ops::*;
+#[repr(C)]
#[derive(Clone, Copy)]
pub union vec128_storage {
d: [u32; 4],
q: [u64; 2],
- o: [u128; 1],
}
impl From<[u32; 4]> for vec128_storage {
- #[inline]
+ #[inline(always)]
fn from(d: [u32; 4]) -> Self {
Self { d }
}
}
impl From<vec128_storage> for [u32; 4] {
- #[inline]
+ #[inline(always)]
fn from(d: vec128_storage) -> Self {
unsafe { d.d }
}
}
impl From<[u64; 2]> for vec128_storage {
- #[inline]
+ #[inline(always)]
fn from(q: [u64; 2]) -> Self {
Self { q }
}
}
impl From<vec128_storage> for [u64; 2] {
- #[inline]
+ #[inline(always)]
fn from(q: vec128_storage) -> Self {
unsafe { q.q }
}
}
impl Default for vec128_storage {
- #[inline]
+ #[inline(always)]
fn default() -> Self {
- Self { o: [0] }
+ Self { q: [0, 0] }
}
}
impl Eq for vec128_storage {}
impl PartialEq<vec128_storage> for vec128_storage {
- #[inline]
+ #[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
- unsafe { self.o == rhs.o }
+ unsafe { self.q == rhs.q }
}
}
#[derive(Clone, Copy, PartialEq, Eq, Default)]
@@ -61,20 +61,22 @@ impl vec256_storage {
self.v128
}
}
-impl From<[u64; 4]> for vec256_storage {
- #[inline]
- fn from(q: [u64; 4]) -> Self {
- Self { v128: [[0, 1].into(), [2, 3].into()] }
- }
-}
impl From<vec256_storage> for [u64; 4] {
- #[inline]
+ #[inline(always)]
fn from(q: vec256_storage) -> Self {
let [a, b]: [u64; 2] = q.v128[0].into();
let [c, d]: [u64; 2] = q.v128[1].into();
[a, b, c, d]
}
}
+impl From<[u64; 4]> for vec256_storage {
+ #[inline(always)]
+ fn from([a, b, c, d]: [u64; 4]) -> Self {
+ Self {
+ v128: [[a, b].into(), [c, d].into()],
+ }
+ }
+}
#[derive(Clone, Copy, PartialEq, Eq, Default)]
pub struct vec512_storage {
v128: [vec128_storage; 4],
@@ -90,6 +92,7 @@ impl vec512_storage {
}
}
+#[inline(always)]
fn dmap<T, F>(t: T, f: F) -> T
where
T: Store<vec128_storage> + Into<vec128_storage>,
@@ -123,6 +126,7 @@ where
unsafe { T::unpack(d) }
}
+#[inline(always)]
fn qmap<T, F>(t: T, f: F) -> T
where
T: Store<vec128_storage> + Into<vec128_storage>,
@@ -136,6 +140,7 @@ where
unsafe { T::unpack(q) }
}
+#[inline(always)]
fn qmap2<T, F>(a: T, b: T, f: F) -> T
where
T: Store<vec128_storage> + Into<vec128_storage>,
@@ -151,17 +156,29 @@ where
unsafe { T::unpack(q) }
}
+#[inline(always)]
+fn o_of_q(q: [u64; 2]) -> u128 {
+ u128::from(q[0]) | (u128::from(q[1]) << 64)
+}
+
+#[inline(always)]
+fn q_of_o(o: u128) -> [u64; 2] {
+ [o as u64, (o >> 64) as u64]
+}
+
+#[inline(always)]
fn omap<T, F>(a: T, f: F) -> T
where
T: Store<vec128_storage> + Into<vec128_storage>,
F: Fn(u128) -> u128,
{
let a: vec128_storage = a.into();
- let ao = unsafe { a.o };
- let o = vec128_storage { o: [f(ao[0])] };
+ let ao = o_of_q(unsafe { a.q });
+ let o = vec128_storage { q: q_of_o(f(ao)) };
unsafe { T::unpack(o) }
}
+#[inline(always)]
fn omap2<T, F>(a: T, b: T, f: F) -> T
where
T: Store<vec128_storage> + Into<vec128_storage>,
@@ -169,10 +186,10 @@ where
{
let a: vec128_storage = a.into();
let b: vec128_storage = b.into();
- let ao = unsafe { a.o };
- let bo = unsafe { b.o };
+ let ao = o_of_q(unsafe { a.q });
+ let bo = o_of_q(unsafe { b.q });
let o = vec128_storage {
- o: [f(ao[0], bo[0])],
+ q: q_of_o(f(ao, bo)),
};
unsafe { T::unpack(o) }
}
@@ -245,39 +262,39 @@ macro_rules! impl_bitops {
}
impl Swap64 for $vec {
- #[inline]
+ #[inline(always)]
fn swap1(self) -> Self {
qmap(self, |x| {
((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
})
}
- #[inline]
+ #[inline(always)]
fn swap2(self) -> Self {
qmap(self, |x| {
((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
})
}
- #[inline]
+ #[inline(always)]
fn swap4(self) -> Self {
qmap(self, |x| {
((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
})
}
- #[inline]
+ #[inline(always)]
fn swap8(self) -> Self {
qmap(self, |x| {
((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
})
}
- #[inline]
+ #[inline(always)]
fn swap16(self) -> Self {
dmap(self, |x| x.rotate_left(16))
}
- #[inline]
+ #[inline(always)]
fn swap32(self) -> Self {
qmap(self, |x| x.rotate_left(32))
}
- #[inline]
+ #[inline(always)]
fn swap64(self) -> Self {
omap(self, |x| (x << 64) | (x >> 64))
}
@@ -289,82 +306,83 @@ impl_bitops!(u64x2_generic);
impl_bitops!(u128x1_generic);
impl RotateEachWord32 for u32x4_generic {
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right7(self) -> Self {
dmap(self, |x| x.rotate_right(7))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right8(self) -> Self {
dmap(self, |x| x.rotate_right(8))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right11(self) -> Self {
dmap(self, |x| x.rotate_right(11))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right12(self) -> Self {
dmap(self, |x| x.rotate_right(12))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right16(self) -> Self {
dmap(self, |x| x.rotate_right(16))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right20(self) -> Self {
dmap(self, |x| x.rotate_right(20))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right24(self) -> Self {
dmap(self, |x| x.rotate_right(24))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right25(self) -> Self {
dmap(self, |x| x.rotate_right(25))
}
}
impl RotateEachWord32 for u64x2_generic {
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right7(self) -> Self {
qmap(self, |x| x.rotate_right(7))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right8(self) -> Self {
qmap(self, |x| x.rotate_right(8))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right11(self) -> Self {
qmap(self, |x| x.rotate_right(11))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right12(self) -> Self {
qmap(self, |x| x.rotate_right(12))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right16(self) -> Self {
qmap(self, |x| x.rotate_right(16))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right20(self) -> Self {
qmap(self, |x| x.rotate_right(20))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right24(self) -> Self {
qmap(self, |x| x.rotate_right(24))
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right25(self) -> Self {
qmap(self, |x| x.rotate_right(25))
}
}
impl RotateEachWord64 for u64x2_generic {
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right32(self) -> Self {
qmap(self, |x| x.rotate_right(32))
}
}
// workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
+#[inline(always)]
fn rotate_u128_right(x: u128, i: u32) -> u128 {
(x >> i) | (x << (128 - i))
}
@@ -375,41 +393,41 @@ fn test_rotate_u128() {
}
impl RotateEachWord32 for u128x1_generic {
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right7(self) -> Self {
Self([rotate_u128_right(self.0[0], 7)])
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right8(self) -> Self {
Self([rotate_u128_right(self.0[0], 8)])
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right11(self) -> Self {
Self([rotate_u128_right(self.0[0], 11)])
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right12(self) -> Self {
Self([rotate_u128_right(self.0[0], 12)])
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right16(self) -> Self {
Self([rotate_u128_right(self.0[0], 16)])
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right20(self) -> Self {
Self([rotate_u128_right(self.0[0], 20)])
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right24(self) -> Self {
Self([rotate_u128_right(self.0[0], 24)])
}
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right25(self) -> Self {
Self([rotate_u128_right(self.0[0], 25)])
}
}
impl RotateEachWord64 for u128x1_generic {
- #[inline]
+ #[inline(always)]
fn rotate_each_word_right32(self) -> Self {
Self([rotate_u128_right(self.0[0], 32)])
}
@@ -428,7 +446,7 @@ impl Machine for GenericMachine {
type u32x4x4 = u32x4x4_generic;
type u64x2x4 = u64x2x4_generic;
type u128x4 = u128x4_generic;
- #[inline]
+ #[inline(always)]
unsafe fn instance() -> Self {
Self
}
@@ -456,7 +474,7 @@ impl From<u64x2_generic> for vec128_storage {
impl From<u128x1_generic> for vec128_storage {
#[inline(always)]
fn from(o: u128x1_generic) -> Self {
- Self { o: o.0 }
+ Self { q: q_of_o(o.0[0]) }
}
}
@@ -475,7 +493,7 @@ impl Store<vec128_storage> for u64x2_generic {
impl Store<vec128_storage> for u128x1_generic {
#[inline(always)]
unsafe fn unpack(s: vec128_storage) -> Self {
- Self(s.o)
+ Self([o_of_q(s.q); 1])
}
}
@@ -605,6 +623,22 @@ pub type u32x4x4_generic = x4<u32x4_generic>;
pub type u64x2x4_generic = x4<u64x2_generic>;
pub type u128x4_generic = x4<u128x1_generic>;
+impl Vector<[u32; 16]> for u32x4x4_generic {
+ fn to_scalars(self) -> [u32; 16] {
+ let [a, b, c, d] = self.0;
+ let a = a.0;
+ let b = b.0;
+ let c = c.0;
+ let d = d.0;
+ [
+ a[0], a[1], a[2], a[3], //
+ b[0], b[1], b[2], b[3], //
+ c[0], c[1], c[2], c[3], //
+ d[0], d[1], d[2], d[3], //
+ ]
+ }
+}
+
impl MultiLane<[u32; 4]> for u32x4_generic {
#[inline(always)]
fn to_lanes(self) -> [u32; 4] {
@@ -745,7 +779,7 @@ impl u128x4<GenericMachine> for u128x4_generic {}
#[macro_export]
macro_rules! dispatch {
($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
- #[inline]
+ #[inline(always)]
$($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
let $mach = unsafe { $crate::generic::GenericMachine::instance() };
#[inline(always)]
@@ -762,7 +796,7 @@ macro_rules! dispatch {
#[macro_export]
macro_rules! dispatch_light128 {
($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
- #[inline]
+ #[inline(always)]
$($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
let $mach = unsafe { $crate::generic::GenericMachine::instance() };
#[inline(always)]
@@ -779,7 +813,7 @@ macro_rules! dispatch_light128 {
#[macro_export]
macro_rules! dispatch_light256 {
($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
- #[inline]
+ #[inline(always)]
$($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
let $mach = unsafe { $crate::generic::GenericMachine::instance() };
#[inline(always)]
@@ -796,7 +830,7 @@ macro_rules! dispatch_light256 {
#[macro_export]
macro_rules! dispatch_light512 {
($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
- #[inline]
+ #[inline(always)]
$($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
let $mach = unsafe { $crate::generic::GenericMachine::instance() };
#[inline(always)]