#![allow(non_camel_case_types)] use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not}; pub trait AndNot { type Output; fn andnot(self, rhs: Self) -> Self::Output; } pub trait BSwap { fn bswap(self) -> Self; } /// Ops that depend on word size pub trait ArithOps: Add + AddAssign + Sized + Copy + Clone + BSwap {} /// Ops that are independent of word size and endian pub trait BitOps0: BitAnd + BitOr + BitXor + BitXorAssign + Not + AndNot + Sized + Copy + Clone { } pub trait BitOps32: BitOps0 + RotateEachWord32 {} pub trait BitOps64: BitOps32 + RotateEachWord64 {} pub trait BitOps128: BitOps64 + RotateEachWord128 {} pub trait RotateEachWord32 { fn rotate_each_word_right7(self) -> Self; fn rotate_each_word_right8(self) -> Self; fn rotate_each_word_right11(self) -> Self; fn rotate_each_word_right12(self) -> Self; fn rotate_each_word_right16(self) -> Self; fn rotate_each_word_right20(self) -> Self; fn rotate_each_word_right24(self) -> Self; fn rotate_each_word_right25(self) -> Self; } pub trait RotateEachWord64 { fn rotate_each_word_right32(self) -> Self; } pub trait RotateEachWord128 {} // Vector type naming scheme: // uN[xP]xL // Unsigned; N-bit words * P bits per lane * L lanes // // A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of // wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and // slow inter-lane operations. use crate::arch::{vec128_storage, vec256_storage, vec512_storage}; #[allow(clippy::missing_safety_doc)] pub trait UnsafeFrom { unsafe fn unsafe_from(t: T) -> Self; } /// A vector composed of two elements, which may be words or themselves vectors. pub trait Vec2 { fn extract(self, i: u32) -> W; fn insert(self, w: W, i: u32) -> Self; } /// A vector composed of four elements, which may be words or themselves vectors. pub trait Vec4 { fn extract(self, i: u32) -> W; fn insert(self, w: W, i: u32) -> Self; } /// Vec4 functions which may not be implemented yet for all Vec4 types. /// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage, /// import Vec4Ext only together with Vec4, and don't qualify its methods. pub trait Vec4Ext { fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized; } pub trait Vector { fn to_scalars(self) -> T; } // TODO: multiples of 4 should inherit this /// A vector composed of four words; depending on their size, operations may cross lanes. pub trait Words4 { fn shuffle1230(self) -> Self; fn shuffle2301(self) -> Self; fn shuffle3012(self) -> Self; } /// A vector composed one or more lanes each composed of four words. pub trait LaneWords4 { fn shuffle_lane_words1230(self) -> Self; fn shuffle_lane_words2301(self) -> Self; fn shuffle_lane_words3012(self) -> Self; } // TODO: make this a part of BitOps /// Exchange neigboring ranges of bits of the specified size pub trait Swap64 { fn swap1(self) -> Self; fn swap2(self) -> Self; fn swap4(self) -> Self; fn swap8(self) -> Self; fn swap16(self) -> Self; fn swap32(self) -> Self; fn swap64(self) -> Self; } pub trait u32x4: BitOps32 + Store + ArithOps + Vec4 + Words4 + LaneWords4 + StoreBytes + MultiLane<[u32; 4]> + Into { } pub trait u64x2: BitOps64 + Store + ArithOps + Vec2 + MultiLane<[u64; 2]> + Into { } pub trait u128x1: BitOps128 + Store + Swap64 + MultiLane<[u128; 1]> + Into { } pub trait u32x4x2: BitOps32 + Store + Vec2 + MultiLane<[M::u32x4; 2]> + ArithOps + Into + StoreBytes { } pub trait u64x2x2: BitOps64 + Store + Vec2 + MultiLane<[M::u64x2; 2]> + ArithOps + StoreBytes + Into { } pub trait u64x4: BitOps64 + Store + Vec4 + MultiLane<[u64; 4]> + ArithOps + Words4 + StoreBytes + Into { } pub trait u128x2: BitOps128 + Store + Vec2 + MultiLane<[M::u128x1; 2]> + Swap64 + Into { } pub trait u32x4x4: BitOps32 + Store + Vec4 + Vec4Ext + Vector<[u32; 16]> + MultiLane<[M::u32x4; 4]> + ArithOps + LaneWords4 + Into + StoreBytes { } pub trait u64x2x4: BitOps64 + Store + Vec4 + MultiLane<[M::u64x2; 4]> + ArithOps + Into { } // TODO: Words4 pub trait u128x4: BitOps128 + Store + Vec4 + MultiLane<[M::u128x1; 4]> + Swap64 + Into { } /// A vector composed of multiple 128-bit lanes. pub trait MultiLane { /// Split a multi-lane vector into single-lane vectors. fn to_lanes(self) -> Lanes; /// Build a multi-lane vector from individual lanes. fn from_lanes(lanes: Lanes) -> Self; } /// Combine single vectors into a multi-lane vector. pub trait VZip { fn vzip(self) -> V; } impl VZip for T where V: MultiLane, { #[inline(always)] fn vzip(self) -> V { V::from_lanes(self) } } pub trait Machine: Sized + Copy { type u32x4: u32x4; type u64x2: u64x2; type u128x1: u128x1; type u32x4x2: u32x4x2; type u64x2x2: u64x2x2; type u64x4: u64x4; type u128x2: u128x2; type u32x4x4: u32x4x4; type u64x2x4: u64x2x4; type u128x4: u128x4; #[inline(always)] fn unpack>(self, s: S) -> V { unsafe { V::unpack(s) } } #[inline(always)] fn vec(self, a: A) -> V where V: MultiLane, { V::from_lanes(a) } #[inline(always)] fn read_le(self, input: &[u8]) -> V where V: StoreBytes, { unsafe { V::unsafe_read_le(input) } } #[inline(always)] fn read_be(self, input: &[u8]) -> V where V: StoreBytes, { unsafe { V::unsafe_read_be(input) } } /// # Safety /// Caller must ensure the type of Self is appropriate for the hardware of the execution /// environment. unsafe fn instance() -> Self; } pub trait Store { /// # Safety /// Caller must ensure the type of Self is appropriate for the hardware of the execution /// environment. unsafe fn unpack(p: S) -> Self; } pub trait StoreBytes { /// # Safety /// Caller must ensure the type of Self is appropriate for the hardware of the execution /// environment. unsafe fn unsafe_read_le(input: &[u8]) -> Self; /// # Safety /// Caller must ensure the type of Self is appropriate for the hardware of the execution /// environment. unsafe fn unsafe_read_be(input: &[u8]) -> Self; fn write_le(self, out: &mut [u8]); fn write_be(self, out: &mut [u8]); }