summaryrefslogtreecommitdiffstats
path: root/rust/vendor/polyval/src/backend/pmull.rs
diff options
context:
space:
mode:
Diffstat (limited to 'rust/vendor/polyval/src/backend/pmull.rs')
-rw-r--r--rust/vendor/polyval/src/backend/pmull.rs116
1 files changed, 116 insertions, 0 deletions
diff --git a/rust/vendor/polyval/src/backend/pmull.rs b/rust/vendor/polyval/src/backend/pmull.rs
new file mode 100644
index 0000000..01d3626
--- /dev/null
+++ b/rust/vendor/polyval/src/backend/pmull.rs
@@ -0,0 +1,116 @@
+//! ARMv8 `PMULL`-accelerated implementation of POLYVAL.
+//!
+//! Based on this C intrinsics implementation:
+//! <https://github.com/noloader/AES-Intrinsics/blob/master/clmul-arm.c>
+//!
+//! Original C written and placed in public domain by Jeffrey Walton.
+//! Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and
+//! Barry O'Rourke for the mbedTLS project.
+//!
+//! For more information about PMULL, see:
+//! - <https://developer.arm.com/documentation/100069/0608/A64-SIMD-Vector-Instructions/PMULL--PMULL2--vector->
+//! - <https://eprint.iacr.org/2015/688.pdf>
+
+use crate::{Block, Key};
+use core::{arch::aarch64::*, mem};
+use universal_hash::{consts::U16, NewUniversalHash, Output, UniversalHash};
+
+/// **POLYVAL**: GHASH-like universal hash over GF(2^128).
+#[derive(Clone)]
+pub struct Polyval {
+ h: uint8x16_t,
+ y: uint8x16_t,
+}
+
+impl NewUniversalHash for Polyval {
+ type KeySize = U16;
+
+ /// Initialize POLYVAL with the given `H` field element
+ fn new(h: &Key) -> Self {
+ unsafe {
+ Self {
+ h: vld1q_u8(h.as_ptr()),
+ y: vdupq_n_u8(0), // all zeroes
+ }
+ }
+ }
+}
+
+impl UniversalHash for Polyval {
+ type BlockSize = U16;
+
+ #[inline]
+ fn update(&mut self, x: &Block) {
+ unsafe {
+ self.mul(x);
+ }
+ }
+
+ /// Reset internal state
+ fn reset(&mut self) {
+ unsafe {
+ self.y = vdupq_n_u8(0);
+ }
+ }
+
+ /// Get GHASH output
+ fn finalize(self) -> Output<Self> {
+ unsafe { mem::transmute(self.y) }
+ }
+}
+
+impl Polyval {
+ /// Mask value used when performing reduction.
+ /// This corresponds to POLYVAL's polynomial with the highest bit unset.
+ const MASK: u128 = 1 << 127 | 1 << 126 | 1 << 121 | 1;
+
+ /// POLYVAL carryless multiplication.
+ // TODO(tarcieri): investigate ordering optimizations and fusions e.g.`fuse-crypto-eor`
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn mul(&mut self, x: &Block) {
+ let h = self.h;
+ let y = veorq_u8(self.y, vld1q_u8(x.as_ptr()));
+
+ // polynomial multiply
+ let z = vdupq_n_u8(0);
+ let r0 = pmull::<0, 0>(h, y);
+ let r1 = pmull::<1, 1>(h, y);
+ let t0 = pmull::<0, 1>(h, y);
+ let t1 = pmull::<1, 0>(h, y);
+ let t0 = veorq_u8(t0, t1);
+ let t1 = vextq_u8(z, t0, 8);
+ let r0 = veorq_u8(r0, t1);
+ let t1 = vextq_u8(t0, z, 8);
+ let r1 = veorq_u8(r1, t1);
+
+ // polynomial reduction
+ let p = mem::transmute(Self::MASK);
+ let t0 = pmull::<0, 1>(r0, p);
+ let t1 = vextq_u8(t0, t0, 8);
+ let r0 = veorq_u8(r0, t1);
+ let t1 = pmull::<1, 1>(r0, p);
+ let r0 = veorq_u8(r0, t1);
+
+ self.y = veorq_u8(r0, r1);
+ }
+}
+
+/// Wrapper for the ARM64 `PMULL` instruction.
+#[inline(always)]
+unsafe fn pmull<const A_LANE: i32, const B_LANE: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+ mem::transmute(vmull_p64(
+ vgetq_lane_u64(vreinterpretq_u64_u8(a), A_LANE),
+ vgetq_lane_u64(vreinterpretq_u64_u8(b), B_LANE),
+ ))
+}
+
+// TODO(tarcieri): zeroize support
+// #[cfg(feature = "zeroize")]
+// impl Drop for Polyval {
+// fn drop(&mut self) {
+// use zeroize::Zeroize;
+// self.h.zeroize();
+// self.y.zeroize();
+// }
+// }