865 lines
24 KiB
Rust
865 lines
24 KiB
Rust
#![allow(non_camel_case_types)]
|
|
|
|
use crate::soft::{x2, x4};
|
|
use crate::types::*;
|
|
use core::ops::*;
|
|
|
|
#[repr(C)]
|
|
#[derive(Clone, Copy)]
|
|
pub union vec128_storage {
|
|
d: [u32; 4],
|
|
q: [u64; 2],
|
|
}
|
|
impl From<[u32; 4]> for vec128_storage {
|
|
#[inline(always)]
|
|
fn from(d: [u32; 4]) -> Self {
|
|
Self { d }
|
|
}
|
|
}
|
|
impl From<vec128_storage> for [u32; 4] {
|
|
#[inline(always)]
|
|
fn from(d: vec128_storage) -> Self {
|
|
unsafe { d.d }
|
|
}
|
|
}
|
|
impl From<[u64; 2]> for vec128_storage {
|
|
#[inline(always)]
|
|
fn from(q: [u64; 2]) -> Self {
|
|
Self { q }
|
|
}
|
|
}
|
|
impl From<vec128_storage> for [u64; 2] {
|
|
#[inline(always)]
|
|
fn from(q: vec128_storage) -> Self {
|
|
unsafe { q.q }
|
|
}
|
|
}
|
|
impl Default for vec128_storage {
|
|
#[inline(always)]
|
|
fn default() -> Self {
|
|
Self { q: [0, 0] }
|
|
}
|
|
}
|
|
impl Eq for vec128_storage {}
|
|
impl PartialEq<vec128_storage> for vec128_storage {
|
|
#[inline(always)]
|
|
fn eq(&self, rhs: &Self) -> bool {
|
|
unsafe { self.q == rhs.q }
|
|
}
|
|
}
|
|
#[derive(Clone, Copy, PartialEq, Eq, Default)]
|
|
pub struct vec256_storage {
|
|
v128: [vec128_storage; 2],
|
|
}
|
|
impl vec256_storage {
|
|
#[inline(always)]
|
|
pub fn new128(v128: [vec128_storage; 2]) -> Self {
|
|
Self { v128 }
|
|
}
|
|
#[inline(always)]
|
|
pub fn split128(self) -> [vec128_storage; 2] {
|
|
self.v128
|
|
}
|
|
}
|
|
impl From<vec256_storage> for [u64; 4] {
|
|
#[inline(always)]
|
|
fn from(q: vec256_storage) -> Self {
|
|
let [a, b]: [u64; 2] = q.v128[0].into();
|
|
let [c, d]: [u64; 2] = q.v128[1].into();
|
|
[a, b, c, d]
|
|
}
|
|
}
|
|
impl From<[u64; 4]> for vec256_storage {
|
|
#[inline(always)]
|
|
fn from([a, b, c, d]: [u64; 4]) -> Self {
|
|
Self {
|
|
v128: [[a, b].into(), [c, d].into()],
|
|
}
|
|
}
|
|
}
|
|
#[derive(Clone, Copy, PartialEq, Eq, Default)]
|
|
pub struct vec512_storage {
|
|
v128: [vec128_storage; 4],
|
|
}
|
|
impl vec512_storage {
|
|
#[inline(always)]
|
|
pub fn new128(v128: [vec128_storage; 4]) -> Self {
|
|
Self { v128 }
|
|
}
|
|
#[inline(always)]
|
|
pub fn split128(self) -> [vec128_storage; 4] {
|
|
self.v128
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn dmap<T, F>(t: T, f: F) -> T
|
|
where
|
|
T: Store<vec128_storage> + Into<vec128_storage>,
|
|
F: Fn(u32) -> u32,
|
|
{
|
|
let t: vec128_storage = t.into();
|
|
let d = unsafe { t.d };
|
|
let d = vec128_storage {
|
|
d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
|
|
};
|
|
unsafe { T::unpack(d) }
|
|
}
|
|
|
|
fn dmap2<T, F>(a: T, b: T, f: F) -> T
|
|
where
|
|
T: Store<vec128_storage> + Into<vec128_storage>,
|
|
F: Fn(u32, u32) -> u32,
|
|
{
|
|
let a: vec128_storage = a.into();
|
|
let b: vec128_storage = b.into();
|
|
let ao = unsafe { a.d };
|
|
let bo = unsafe { b.d };
|
|
let d = vec128_storage {
|
|
d: [
|
|
f(ao[0], bo[0]),
|
|
f(ao[1], bo[1]),
|
|
f(ao[2], bo[2]),
|
|
f(ao[3], bo[3]),
|
|
],
|
|
};
|
|
unsafe { T::unpack(d) }
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn qmap<T, F>(t: T, f: F) -> T
|
|
where
|
|
T: Store<vec128_storage> + Into<vec128_storage>,
|
|
F: Fn(u64) -> u64,
|
|
{
|
|
let t: vec128_storage = t.into();
|
|
let q = unsafe { t.q };
|
|
let q = vec128_storage {
|
|
q: [f(q[0]), f(q[1])],
|
|
};
|
|
unsafe { T::unpack(q) }
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn qmap2<T, F>(a: T, b: T, f: F) -> T
|
|
where
|
|
T: Store<vec128_storage> + Into<vec128_storage>,
|
|
F: Fn(u64, u64) -> u64,
|
|
{
|
|
let a: vec128_storage = a.into();
|
|
let b: vec128_storage = b.into();
|
|
let ao = unsafe { a.q };
|
|
let bo = unsafe { b.q };
|
|
let q = vec128_storage {
|
|
q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
|
|
};
|
|
unsafe { T::unpack(q) }
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn o_of_q(q: [u64; 2]) -> u128 {
|
|
u128::from(q[0]) | (u128::from(q[1]) << 64)
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn q_of_o(o: u128) -> [u64; 2] {
|
|
[o as u64, (o >> 64) as u64]
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn omap<T, F>(a: T, f: F) -> T
|
|
where
|
|
T: Store<vec128_storage> + Into<vec128_storage>,
|
|
F: Fn(u128) -> u128,
|
|
{
|
|
let a: vec128_storage = a.into();
|
|
let ao = o_of_q(unsafe { a.q });
|
|
let o = vec128_storage { q: q_of_o(f(ao)) };
|
|
unsafe { T::unpack(o) }
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn omap2<T, F>(a: T, b: T, f: F) -> T
|
|
where
|
|
T: Store<vec128_storage> + Into<vec128_storage>,
|
|
F: Fn(u128, u128) -> u128,
|
|
{
|
|
let a: vec128_storage = a.into();
|
|
let b: vec128_storage = b.into();
|
|
let ao = o_of_q(unsafe { a.q });
|
|
let bo = o_of_q(unsafe { b.q });
|
|
let o = vec128_storage {
|
|
q: q_of_o(f(ao, bo)),
|
|
};
|
|
unsafe { T::unpack(o) }
|
|
}
|
|
|
|
impl RotateEachWord128 for u128x1_generic {}
|
|
impl BitOps128 for u128x1_generic {}
|
|
impl BitOps64 for u128x1_generic {}
|
|
impl BitOps64 for u64x2_generic {}
|
|
impl BitOps32 for u128x1_generic {}
|
|
impl BitOps32 for u64x2_generic {}
|
|
impl BitOps32 for u32x4_generic {}
|
|
impl BitOps0 for u128x1_generic {}
|
|
impl BitOps0 for u64x2_generic {}
|
|
impl BitOps0 for u32x4_generic {}
|
|
|
|
macro_rules! impl_bitops {
|
|
($vec:ident) => {
|
|
impl Not for $vec {
|
|
type Output = Self;
|
|
#[inline(always)]
|
|
fn not(self) -> Self::Output {
|
|
omap(self, |x| !x)
|
|
}
|
|
}
|
|
impl BitAnd for $vec {
|
|
type Output = Self;
|
|
#[inline(always)]
|
|
fn bitand(self, rhs: Self) -> Self::Output {
|
|
omap2(self, rhs, |x, y| x & y)
|
|
}
|
|
}
|
|
impl BitOr for $vec {
|
|
type Output = Self;
|
|
#[inline(always)]
|
|
fn bitor(self, rhs: Self) -> Self::Output {
|
|
omap2(self, rhs, |x, y| x | y)
|
|
}
|
|
}
|
|
impl BitXor for $vec {
|
|
type Output = Self;
|
|
#[inline(always)]
|
|
fn bitxor(self, rhs: Self) -> Self::Output {
|
|
omap2(self, rhs, |x, y| x ^ y)
|
|
}
|
|
}
|
|
impl AndNot for $vec {
|
|
type Output = Self;
|
|
#[inline(always)]
|
|
fn andnot(self, rhs: Self) -> Self::Output {
|
|
omap2(self, rhs, |x, y| !x & y)
|
|
}
|
|
}
|
|
impl BitAndAssign for $vec {
|
|
#[inline(always)]
|
|
fn bitand_assign(&mut self, rhs: Self) {
|
|
*self = *self & rhs
|
|
}
|
|
}
|
|
impl BitOrAssign for $vec {
|
|
#[inline(always)]
|
|
fn bitor_assign(&mut self, rhs: Self) {
|
|
*self = *self | rhs
|
|
}
|
|
}
|
|
impl BitXorAssign for $vec {
|
|
#[inline(always)]
|
|
fn bitxor_assign(&mut self, rhs: Self) {
|
|
*self = *self ^ rhs
|
|
}
|
|
}
|
|
|
|
impl Swap64 for $vec {
|
|
#[inline(always)]
|
|
fn swap1(self) -> Self {
|
|
qmap(self, |x| {
|
|
((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
|
|
})
|
|
}
|
|
#[inline(always)]
|
|
fn swap2(self) -> Self {
|
|
qmap(self, |x| {
|
|
((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
|
|
})
|
|
}
|
|
#[inline(always)]
|
|
fn swap4(self) -> Self {
|
|
qmap(self, |x| {
|
|
((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
|
|
})
|
|
}
|
|
#[inline(always)]
|
|
fn swap8(self) -> Self {
|
|
qmap(self, |x| {
|
|
((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
|
|
})
|
|
}
|
|
#[inline(always)]
|
|
fn swap16(self) -> Self {
|
|
dmap(self, |x| x.rotate_left(16))
|
|
}
|
|
#[inline(always)]
|
|
fn swap32(self) -> Self {
|
|
qmap(self, |x| x.rotate_left(32))
|
|
}
|
|
#[inline(always)]
|
|
fn swap64(self) -> Self {
|
|
omap(self, |x| (x << 64) | (x >> 64))
|
|
}
|
|
}
|
|
};
|
|
}
|
|
impl_bitops!(u32x4_generic);
|
|
impl_bitops!(u64x2_generic);
|
|
impl_bitops!(u128x1_generic);
|
|
|
|
impl RotateEachWord32 for u32x4_generic {
|
|
#[inline(always)]
|
|
fn rotate_each_word_right7(self) -> Self {
|
|
dmap(self, |x| x.rotate_right(7))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right8(self) -> Self {
|
|
dmap(self, |x| x.rotate_right(8))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right11(self) -> Self {
|
|
dmap(self, |x| x.rotate_right(11))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right12(self) -> Self {
|
|
dmap(self, |x| x.rotate_right(12))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right16(self) -> Self {
|
|
dmap(self, |x| x.rotate_right(16))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right20(self) -> Self {
|
|
dmap(self, |x| x.rotate_right(20))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right24(self) -> Self {
|
|
dmap(self, |x| x.rotate_right(24))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right25(self) -> Self {
|
|
dmap(self, |x| x.rotate_right(25))
|
|
}
|
|
}
|
|
|
|
impl RotateEachWord32 for u64x2_generic {
|
|
#[inline(always)]
|
|
fn rotate_each_word_right7(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(7))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right8(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(8))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right11(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(11))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right12(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(12))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right16(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(16))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right20(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(20))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right24(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(24))
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right25(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(25))
|
|
}
|
|
}
|
|
impl RotateEachWord64 for u64x2_generic {
|
|
#[inline(always)]
|
|
fn rotate_each_word_right32(self) -> Self {
|
|
qmap(self, |x| x.rotate_right(32))
|
|
}
|
|
}
|
|
|
|
// workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
|
|
#[inline(always)]
|
|
fn rotate_u128_right(x: u128, i: u32) -> u128 {
|
|
(x >> i) | (x << (128 - i))
|
|
}
|
|
#[test]
|
|
fn test_rotate_u128() {
|
|
const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
|
|
assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
|
|
}
|
|
|
|
impl RotateEachWord32 for u128x1_generic {
|
|
#[inline(always)]
|
|
fn rotate_each_word_right7(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 7)])
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right8(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 8)])
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right11(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 11)])
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right12(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 12)])
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right16(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 16)])
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right20(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 20)])
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right24(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 24)])
|
|
}
|
|
#[inline(always)]
|
|
fn rotate_each_word_right25(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 25)])
|
|
}
|
|
}
|
|
impl RotateEachWord64 for u128x1_generic {
|
|
#[inline(always)]
|
|
fn rotate_each_word_right32(self) -> Self {
|
|
Self([rotate_u128_right(self.0[0], 32)])
|
|
}
|
|
}
|
|
|
|
#[derive(Copy, Clone)]
|
|
pub struct GenericMachine;
|
|
impl Machine for GenericMachine {
|
|
type u32x4 = u32x4_generic;
|
|
type u64x2 = u64x2_generic;
|
|
type u128x1 = u128x1_generic;
|
|
type u32x4x2 = u32x4x2_generic;
|
|
type u64x2x2 = u64x2x2_generic;
|
|
type u64x4 = u64x4_generic;
|
|
type u128x2 = u128x2_generic;
|
|
type u32x4x4 = u32x4x4_generic;
|
|
type u64x2x4 = u64x2x4_generic;
|
|
type u128x4 = u128x4_generic;
|
|
#[inline(always)]
|
|
unsafe fn instance() -> Self {
|
|
Self
|
|
}
|
|
}
|
|
|
|
#[derive(Copy, Clone, Debug, PartialEq)]
|
|
pub struct u32x4_generic([u32; 4]);
|
|
#[derive(Copy, Clone, Debug, PartialEq)]
|
|
pub struct u64x2_generic([u64; 2]);
|
|
#[derive(Copy, Clone, Debug, PartialEq)]
|
|
pub struct u128x1_generic([u128; 1]);
|
|
|
|
impl From<u32x4_generic> for vec128_storage {
|
|
#[inline(always)]
|
|
fn from(d: u32x4_generic) -> Self {
|
|
Self { d: d.0 }
|
|
}
|
|
}
|
|
impl From<u64x2_generic> for vec128_storage {
|
|
#[inline(always)]
|
|
fn from(q: u64x2_generic) -> Self {
|
|
Self { q: q.0 }
|
|
}
|
|
}
|
|
impl From<u128x1_generic> for vec128_storage {
|
|
#[inline(always)]
|
|
fn from(o: u128x1_generic) -> Self {
|
|
Self { q: q_of_o(o.0[0]) }
|
|
}
|
|
}
|
|
|
|
impl Store<vec128_storage> for u32x4_generic {
|
|
#[inline(always)]
|
|
unsafe fn unpack(s: vec128_storage) -> Self {
|
|
Self(s.d)
|
|
}
|
|
}
|
|
impl Store<vec128_storage> for u64x2_generic {
|
|
#[inline(always)]
|
|
unsafe fn unpack(s: vec128_storage) -> Self {
|
|
Self(s.q)
|
|
}
|
|
}
|
|
impl Store<vec128_storage> for u128x1_generic {
|
|
#[inline(always)]
|
|
unsafe fn unpack(s: vec128_storage) -> Self {
|
|
Self([o_of_q(s.q); 1])
|
|
}
|
|
}
|
|
|
|
impl ArithOps for u32x4_generic {}
|
|
impl ArithOps for u64x2_generic {}
|
|
impl ArithOps for u128x1_generic {}
|
|
|
|
impl Add for u32x4_generic {
|
|
type Output = Self;
|
|
#[inline(always)]
|
|
fn add(self, rhs: Self) -> Self::Output {
|
|
dmap2(self, rhs, |x, y| x.wrapping_add(y))
|
|
}
|
|
}
|
|
impl Add for u64x2_generic {
|
|
type Output = Self;
|
|
#[inline(always)]
|
|
fn add(self, rhs: Self) -> Self::Output {
|
|
qmap2(self, rhs, |x, y| x.wrapping_add(y))
|
|
}
|
|
}
|
|
impl Add for u128x1_generic {
|
|
type Output = Self;
|
|
#[inline(always)]
|
|
fn add(self, rhs: Self) -> Self::Output {
|
|
omap2(self, rhs, |x, y| x.wrapping_add(y))
|
|
}
|
|
}
|
|
impl AddAssign for u32x4_generic {
|
|
#[inline(always)]
|
|
fn add_assign(&mut self, rhs: Self) {
|
|
*self = *self + rhs
|
|
}
|
|
}
|
|
impl AddAssign for u64x2_generic {
|
|
#[inline(always)]
|
|
fn add_assign(&mut self, rhs: Self) {
|
|
*self = *self + rhs
|
|
}
|
|
}
|
|
impl AddAssign for u128x1_generic {
|
|
#[inline(always)]
|
|
fn add_assign(&mut self, rhs: Self) {
|
|
*self = *self + rhs
|
|
}
|
|
}
|
|
impl BSwap for u32x4_generic {
|
|
#[inline(always)]
|
|
fn bswap(self) -> Self {
|
|
dmap(self, |x| x.swap_bytes())
|
|
}
|
|
}
|
|
impl BSwap for u64x2_generic {
|
|
#[inline(always)]
|
|
fn bswap(self) -> Self {
|
|
qmap(self, |x| x.swap_bytes())
|
|
}
|
|
}
|
|
impl BSwap for u128x1_generic {
|
|
#[inline(always)]
|
|
fn bswap(self) -> Self {
|
|
omap(self, |x| x.swap_bytes())
|
|
}
|
|
}
|
|
impl StoreBytes for u32x4_generic {
|
|
#[inline(always)]
|
|
unsafe fn unsafe_read_le(input: &[u8]) -> Self {
|
|
assert_eq!(input.len(), 16);
|
|
let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
|
|
dmap(x, |x| x.to_le())
|
|
}
|
|
#[inline(always)]
|
|
unsafe fn unsafe_read_be(input: &[u8]) -> Self {
|
|
assert_eq!(input.len(), 16);
|
|
let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
|
|
dmap(x, |x| x.to_be())
|
|
}
|
|
#[inline(always)]
|
|
fn write_le(self, out: &mut [u8]) {
|
|
assert_eq!(out.len(), 16);
|
|
let x = dmap(self, |x| x.to_le());
|
|
unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
|
|
}
|
|
#[inline(always)]
|
|
fn write_be(self, out: &mut [u8]) {
|
|
assert_eq!(out.len(), 16);
|
|
let x = dmap(self, |x| x.to_be());
|
|
unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
|
|
}
|
|
}
|
|
impl StoreBytes for u64x2_generic {
|
|
#[inline(always)]
|
|
unsafe fn unsafe_read_le(input: &[u8]) -> Self {
|
|
assert_eq!(input.len(), 16);
|
|
let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
|
|
qmap(x, |x| x.to_le())
|
|
}
|
|
#[inline(always)]
|
|
unsafe fn unsafe_read_be(input: &[u8]) -> Self {
|
|
assert_eq!(input.len(), 16);
|
|
let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
|
|
qmap(x, |x| x.to_be())
|
|
}
|
|
#[inline(always)]
|
|
fn write_le(self, out: &mut [u8]) {
|
|
assert_eq!(out.len(), 16);
|
|
let x = qmap(self, |x| x.to_le());
|
|
unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
|
|
}
|
|
#[inline(always)]
|
|
fn write_be(self, out: &mut [u8]) {
|
|
assert_eq!(out.len(), 16);
|
|
let x = qmap(self, |x| x.to_be());
|
|
unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
|
|
}
|
|
}
|
|
|
|
#[derive(Copy, Clone)]
|
|
pub struct G0;
|
|
#[derive(Copy, Clone)]
|
|
pub struct G1;
|
|
pub type u32x4x2_generic = x2<u32x4_generic, G0>;
|
|
pub type u64x2x2_generic = x2<u64x2_generic, G0>;
|
|
pub type u64x4_generic = x2<u64x2_generic, G1>;
|
|
pub type u128x2_generic = x2<u128x1_generic, G0>;
|
|
pub type u32x4x4_generic = x4<u32x4_generic>;
|
|
pub type u64x2x4_generic = x4<u64x2_generic>;
|
|
pub type u128x4_generic = x4<u128x1_generic>;
|
|
|
|
impl Vector<[u32; 16]> for u32x4x4_generic {
|
|
fn to_scalars(self) -> [u32; 16] {
|
|
let [a, b, c, d] = self.0;
|
|
let a = a.0;
|
|
let b = b.0;
|
|
let c = c.0;
|
|
let d = d.0;
|
|
[
|
|
a[0], a[1], a[2], a[3], //
|
|
b[0], b[1], b[2], b[3], //
|
|
c[0], c[1], c[2], c[3], //
|
|
d[0], d[1], d[2], d[3], //
|
|
]
|
|
}
|
|
}
|
|
|
|
impl MultiLane<[u32; 4]> for u32x4_generic {
|
|
#[inline(always)]
|
|
fn to_lanes(self) -> [u32; 4] {
|
|
self.0
|
|
}
|
|
#[inline(always)]
|
|
fn from_lanes(xs: [u32; 4]) -> Self {
|
|
Self(xs)
|
|
}
|
|
}
|
|
impl MultiLane<[u64; 2]> for u64x2_generic {
|
|
#[inline(always)]
|
|
fn to_lanes(self) -> [u64; 2] {
|
|
self.0
|
|
}
|
|
#[inline(always)]
|
|
fn from_lanes(xs: [u64; 2]) -> Self {
|
|
Self(xs)
|
|
}
|
|
}
|
|
impl MultiLane<[u64; 4]> for u64x4_generic {
|
|
#[inline(always)]
|
|
fn to_lanes(self) -> [u64; 4] {
|
|
let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
|
|
[a[0], a[1], b[0], b[1]]
|
|
}
|
|
#[inline(always)]
|
|
fn from_lanes(xs: [u64; 4]) -> Self {
|
|
let (a, b) = (
|
|
u64x2_generic::from_lanes([xs[0], xs[1]]),
|
|
u64x2_generic::from_lanes([xs[2], xs[3]]),
|
|
);
|
|
x2::new([a, b])
|
|
}
|
|
}
|
|
impl MultiLane<[u128; 1]> for u128x1_generic {
|
|
#[inline(always)]
|
|
fn to_lanes(self) -> [u128; 1] {
|
|
self.0
|
|
}
|
|
#[inline(always)]
|
|
fn from_lanes(xs: [u128; 1]) -> Self {
|
|
Self(xs)
|
|
}
|
|
}
|
|
impl Vec4<u32> for u32x4_generic {
|
|
#[inline(always)]
|
|
fn extract(self, i: u32) -> u32 {
|
|
self.0[i as usize]
|
|
}
|
|
#[inline(always)]
|
|
fn insert(mut self, v: u32, i: u32) -> Self {
|
|
self.0[i as usize] = v;
|
|
self
|
|
}
|
|
}
|
|
impl Vec4<u64> for u64x4_generic {
|
|
#[inline(always)]
|
|
fn extract(self, i: u32) -> u64 {
|
|
let d: [u64; 4] = self.to_lanes();
|
|
d[i as usize]
|
|
}
|
|
#[inline(always)]
|
|
fn insert(self, v: u64, i: u32) -> Self {
|
|
self.0[(i / 2) as usize].insert(v, i % 2);
|
|
self
|
|
}
|
|
}
|
|
impl Vec2<u64> for u64x2_generic {
|
|
#[inline(always)]
|
|
fn extract(self, i: u32) -> u64 {
|
|
self.0[i as usize]
|
|
}
|
|
#[inline(always)]
|
|
fn insert(mut self, v: u64, i: u32) -> Self {
|
|
self.0[i as usize] = v;
|
|
self
|
|
}
|
|
}
|
|
|
|
impl Words4 for u32x4_generic {
|
|
#[inline(always)]
|
|
fn shuffle2301(self) -> Self {
|
|
self.swap64()
|
|
}
|
|
#[inline(always)]
|
|
fn shuffle1230(self) -> Self {
|
|
let x = self.0;
|
|
Self([x[3], x[0], x[1], x[2]])
|
|
}
|
|
#[inline(always)]
|
|
fn shuffle3012(self) -> Self {
|
|
let x = self.0;
|
|
Self([x[1], x[2], x[3], x[0]])
|
|
}
|
|
}
|
|
impl LaneWords4 for u32x4_generic {
|
|
#[inline(always)]
|
|
fn shuffle_lane_words2301(self) -> Self {
|
|
self.shuffle2301()
|
|
}
|
|
#[inline(always)]
|
|
fn shuffle_lane_words1230(self) -> Self {
|
|
self.shuffle1230()
|
|
}
|
|
#[inline(always)]
|
|
fn shuffle_lane_words3012(self) -> Self {
|
|
self.shuffle3012()
|
|
}
|
|
}
|
|
|
|
impl Words4 for u64x4_generic {
|
|
#[inline(always)]
|
|
fn shuffle2301(self) -> Self {
|
|
x2::new([self.0[1], self.0[0]])
|
|
}
|
|
#[inline(always)]
|
|
fn shuffle1230(self) -> Self {
|
|
unimplemented!()
|
|
}
|
|
#[inline(always)]
|
|
fn shuffle3012(self) -> Self {
|
|
unimplemented!()
|
|
}
|
|
}
|
|
|
|
impl u32x4<GenericMachine> for u32x4_generic {}
|
|
impl u64x2<GenericMachine> for u64x2_generic {}
|
|
impl u128x1<GenericMachine> for u128x1_generic {}
|
|
impl u32x4x2<GenericMachine> for u32x4x2_generic {}
|
|
impl u64x2x2<GenericMachine> for u64x2x2_generic {}
|
|
impl u64x4<GenericMachine> for u64x4_generic {}
|
|
impl u128x2<GenericMachine> for u128x2_generic {}
|
|
impl u32x4x4<GenericMachine> for u32x4x4_generic {}
|
|
impl u64x2x4<GenericMachine> for u64x2x4_generic {}
|
|
impl u128x4<GenericMachine> for u128x4_generic {}
|
|
|
|
#[macro_export]
|
|
macro_rules! dispatch {
|
|
($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
|
|
#[inline(always)]
|
|
$($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
|
|
let $mach = unsafe { $crate::generic::GenericMachine::instance() };
|
|
#[inline(always)]
|
|
fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
|
|
fn_impl($mach, $($arg),*)
|
|
}
|
|
};
|
|
($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
|
|
dispatch!($mach, $MTy, {
|
|
$([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
|
|
});
|
|
}
|
|
}
|
|
#[macro_export]
|
|
macro_rules! dispatch_light128 {
|
|
($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
|
|
#[inline(always)]
|
|
$($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
|
|
let $mach = unsafe { $crate::generic::GenericMachine::instance() };
|
|
#[inline(always)]
|
|
fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
|
|
fn_impl($mach, $($arg),*)
|
|
}
|
|
};
|
|
($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
|
|
dispatch!($mach, $MTy, {
|
|
$([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
|
|
});
|
|
}
|
|
}
|
|
#[macro_export]
|
|
macro_rules! dispatch_light256 {
|
|
($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
|
|
#[inline(always)]
|
|
$($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
|
|
let $mach = unsafe { $crate::generic::GenericMachine::instance() };
|
|
#[inline(always)]
|
|
fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
|
|
fn_impl($mach, $($arg),*)
|
|
}
|
|
};
|
|
($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
|
|
dispatch!($mach, $MTy, {
|
|
$([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
|
|
});
|
|
}
|
|
}
|
|
#[macro_export]
|
|
macro_rules! dispatch_light512 {
|
|
($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
|
|
#[inline(always)]
|
|
$($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
|
|
let $mach = unsafe { $crate::generic::GenericMachine::instance() };
|
|
#[inline(always)]
|
|
fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
|
|
fn_impl($mach, $($arg),*)
|
|
}
|
|
};
|
|
($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
|
|
dispatch!($mach, $MTy, {
|
|
$([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
|
|
});
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_bswap32() {
|
|
let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
|
|
let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
|
|
|
|
let m = unsafe { GenericMachine::instance() };
|
|
|
|
let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
|
|
let x = x.bswap();
|
|
|
|
let y = m.vec(ys);
|
|
assert_eq!(x, y);
|
|
}
|
|
}
|