diff options
Diffstat (limited to 'rust/vendor/polyval/src/backend/pmull.rs')
-rw-r--r-- | rust/vendor/polyval/src/backend/pmull.rs | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/rust/vendor/polyval/src/backend/pmull.rs b/rust/vendor/polyval/src/backend/pmull.rs new file mode 100644 index 0000000..01d3626 --- /dev/null +++ b/rust/vendor/polyval/src/backend/pmull.rs @@ -0,0 +1,116 @@ +//! ARMv8 `PMULL`-accelerated implementation of POLYVAL. +//! +//! Based on this C intrinsics implementation: +//! <https://github.com/noloader/AES-Intrinsics/blob/master/clmul-arm.c> +//! +//! Original C written and placed in public domain by Jeffrey Walton. +//! Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and +//! Barry O'Rourke for the mbedTLS project. +//! +//! For more information about PMULL, see: +//! - <https://developer.arm.com/documentation/100069/0608/A64-SIMD-Vector-Instructions/PMULL--PMULL2--vector-> +//! - <https://eprint.iacr.org/2015/688.pdf> + +use crate::{Block, Key}; +use core::{arch::aarch64::*, mem}; +use universal_hash::{consts::U16, NewUniversalHash, Output, UniversalHash}; + +/// **POLYVAL**: GHASH-like universal hash over GF(2^128). +#[derive(Clone)] +pub struct Polyval { + h: uint8x16_t, + y: uint8x16_t, +} + +impl NewUniversalHash for Polyval { + type KeySize = U16; + + /// Initialize POLYVAL with the given `H` field element + fn new(h: &Key) -> Self { + unsafe { + Self { + h: vld1q_u8(h.as_ptr()), + y: vdupq_n_u8(0), // all zeroes + } + } + } +} + +impl UniversalHash for Polyval { + type BlockSize = U16; + + #[inline] + fn update(&mut self, x: &Block) { + unsafe { + self.mul(x); + } + } + + /// Reset internal state + fn reset(&mut self) { + unsafe { + self.y = vdupq_n_u8(0); + } + } + + /// Get GHASH output + fn finalize(self) -> Output<Self> { + unsafe { mem::transmute(self.y) } + } +} + +impl Polyval { + /// Mask value used when performing reduction. + /// This corresponds to POLYVAL's polynomial with the highest bit unset. + const MASK: u128 = 1 << 127 | 1 << 126 | 1 << 121 | 1; + + /// POLYVAL carryless multiplication. + // TODO(tarcieri): investigate ordering optimizations and fusions e.g.`fuse-crypto-eor` + #[inline] + #[target_feature(enable = "neon")] + unsafe fn mul(&mut self, x: &Block) { + let h = self.h; + let y = veorq_u8(self.y, vld1q_u8(x.as_ptr())); + + // polynomial multiply + let z = vdupq_n_u8(0); + let r0 = pmull::<0, 0>(h, y); + let r1 = pmull::<1, 1>(h, y); + let t0 = pmull::<0, 1>(h, y); + let t1 = pmull::<1, 0>(h, y); + let t0 = veorq_u8(t0, t1); + let t1 = vextq_u8(z, t0, 8); + let r0 = veorq_u8(r0, t1); + let t1 = vextq_u8(t0, z, 8); + let r1 = veorq_u8(r1, t1); + + // polynomial reduction + let p = mem::transmute(Self::MASK); + let t0 = pmull::<0, 1>(r0, p); + let t1 = vextq_u8(t0, t0, 8); + let r0 = veorq_u8(r0, t1); + let t1 = pmull::<1, 1>(r0, p); + let r0 = veorq_u8(r0, t1); + + self.y = veorq_u8(r0, r1); + } +} + +/// Wrapper for the ARM64 `PMULL` instruction. +#[inline(always)] +unsafe fn pmull<const A_LANE: i32, const B_LANE: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + mem::transmute(vmull_p64( + vgetq_lane_u64(vreinterpretq_u64_u8(a), A_LANE), + vgetq_lane_u64(vreinterpretq_u64_u8(b), B_LANE), + )) +} + +// TODO(tarcieri): zeroize support +// #[cfg(feature = "zeroize")] +// impl Drop for Polyval { +// fn drop(&mut self) { +// use zeroize::Zeroize; +// self.h.zeroize(); +// self.y.zeroize(); +// } +// } |