diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 17:39:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 17:39:49 +0000 |
commit | a0aa2307322cd47bbf416810ac0292925e03be87 (patch) | |
tree | 37076262a026c4b48c8a0e84f44ff9187556ca35 /rust/vendor/polyval/src | |
parent | Initial commit. (diff) | |
download | suricata-a0aa2307322cd47bbf416810ac0292925e03be87.tar.xz suricata-a0aa2307322cd47bbf416810ac0292925e03be87.zip |
Adding upstream version 1:7.0.3.upstream/1%7.0.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'rust/vendor/polyval/src')
-rw-r--r-- | rust/vendor/polyval/src/backend.rs | 24 | ||||
-rw-r--r-- | rust/vendor/polyval/src/backend/autodetect.rs | 111 | ||||
-rw-r--r-- | rust/vendor/polyval/src/backend/clmul.rs | 142 | ||||
-rw-r--r-- | rust/vendor/polyval/src/backend/pmull.rs | 116 | ||||
-rw-r--r-- | rust/vendor/polyval/src/backend/soft32.rs | 281 | ||||
-rw-r--r-- | rust/vendor/polyval/src/backend/soft64.rs | 205 | ||||
-rw-r--r-- | rust/vendor/polyval/src/lib.rs | 110 | ||||
-rw-r--r-- | rust/vendor/polyval/src/mulx.rs | 188 |
8 files changed, 1177 insertions, 0 deletions
diff --git a/rust/vendor/polyval/src/backend.rs b/rust/vendor/polyval/src/backend.rs new file mode 100644 index 0000000..2bd0c28 --- /dev/null +++ b/rust/vendor/polyval/src/backend.rs @@ -0,0 +1,24 @@ +//! POLYVAL backends + +#[cfg_attr(not(target_pointer_width = "64"), path = "backend/soft32.rs")] +#[cfg_attr(target_pointer_width = "64", path = "backend/soft64.rs")] +mod soft; + +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_arch = "aarch64", feature = "armv8", not(feature = "force-soft")))] { + mod autodetect; + mod pmull; + pub use crate::backend::autodetect::Polyval; + } else if #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + not(feature = "force-soft") + ))] { + mod autodetect; + mod clmul; + pub use crate::backend::autodetect::Polyval; + } else { + pub use crate::backend::soft::Polyval; + } +} diff --git a/rust/vendor/polyval/src/backend/autodetect.rs b/rust/vendor/polyval/src/backend/autodetect.rs new file mode 100644 index 0000000..4d4ee97 --- /dev/null +++ b/rust/vendor/polyval/src/backend/autodetect.rs @@ -0,0 +1,111 @@ +//! Autodetection for CPU intrinsics, with fallback to the "soft" backend when +//! they are unavailable. + +use crate::{backend::soft, Block, Key}; +use core::mem::ManuallyDrop; +use universal_hash::{consts::U16, NewUniversalHash, Output, UniversalHash}; + +#[cfg(all(target_arch = "aarch64", feature = "armv8"))] +use super::pmull as intrinsics; + +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +use super::clmul as intrinsics; + +#[cfg(all(target_arch = "aarch64", feature = "armv8"))] +cpufeatures::new!(mul_intrinsics, "aes"); // `aes` implies PMULL + +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +cpufeatures::new!(mul_intrinsics, "pclmulqdq", "sse4.1"); + +/// **POLYVAL**: GHASH-like universal hash over GF(2^128). +pub struct Polyval { + inner: Inner, + token: mul_intrinsics::InitToken, +} + +union Inner { + intrinsics: ManuallyDrop<intrinsics::Polyval>, + soft: ManuallyDrop<soft::Polyval>, +} + +impl NewUniversalHash for Polyval { + type KeySize = U16; + + /// Initialize POLYVAL with the given `H` field element + fn new(h: &Key) -> Self { + let (token, has_intrinsics) = mul_intrinsics::init_get(); + + let inner = if has_intrinsics { + Inner { + intrinsics: ManuallyDrop::new(intrinsics::Polyval::new(h)), + } + } else { + Inner { + soft: ManuallyDrop::new(soft::Polyval::new(h)), + } + }; + + Self { inner, token } + } +} + +impl UniversalHash for Polyval { + type BlockSize = U16; + + /// Input a field element `X` to be authenticated + #[inline] + fn update(&mut self, x: &Block) { + if self.token.get() { + unsafe { (*self.inner.intrinsics).update(x) } + } else { + unsafe { (*self.inner.soft).update(x) } + } + } + + /// Reset internal state + fn reset(&mut self) { + if self.token.get() { + unsafe { (*self.inner.intrinsics).reset() } + } else { + unsafe { (*self.inner.soft).reset() } + } + } + + /// Get POLYVAL result (i.e. computed `S` field element) + fn finalize(self) -> Output<Self> { + let output_bytes = if self.token.get() { + unsafe { + ManuallyDrop::into_inner(self.inner.intrinsics) + .finalize() + .into_bytes() + } + } else { + unsafe { + ManuallyDrop::into_inner(self.inner.soft) + .finalize() + .into_bytes() + } + }; + + Output::new(output_bytes) + } +} + +impl Clone for Polyval { + fn clone(&self) -> Self { + let inner = if self.token.get() { + Inner { + intrinsics: ManuallyDrop::new(unsafe { (*self.inner.intrinsics).clone() }), + } + } else { + Inner { + soft: ManuallyDrop::new(unsafe { (*self.inner.soft).clone() }), + } + }; + + Self { + inner, + token: self.token, + } + } +} diff --git a/rust/vendor/polyval/src/backend/clmul.rs b/rust/vendor/polyval/src/backend/clmul.rs new file mode 100644 index 0000000..d5d3c2b --- /dev/null +++ b/rust/vendor/polyval/src/backend/clmul.rs @@ -0,0 +1,142 @@ +//! Intel `CLMUL`-accelerated implementation for modern x86/x86_64 CPUs +//! (i.e. Intel Sandy Bridge-compatible or newer) + +use crate::{Block, Key}; +use universal_hash::{consts::U16, NewUniversalHash, Output, UniversalHash}; + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +/// **POLYVAL**: GHASH-like universal hash over GF(2^128). +#[derive(Clone)] +pub struct Polyval { + h: __m128i, + y: __m128i, +} + +impl NewUniversalHash for Polyval { + type KeySize = U16; + + /// Initialize POLYVAL with the given `H` field element + fn new(h: &Key) -> Self { + unsafe { + // `_mm_loadu_si128` performs an unaligned load + #[allow(clippy::cast_ptr_alignment)] + Self { + h: _mm_loadu_si128(h.as_ptr() as *const __m128i), + y: _mm_setzero_si128(), + } + } + } +} + +impl UniversalHash for Polyval { + type BlockSize = U16; + + #[inline] + fn update(&mut self, x: &Block) { + unsafe { + self.mul(x); + } + } + + /// Reset internal state + fn reset(&mut self) { + unsafe { + self.y = _mm_setzero_si128(); + } + } + + /// Get GHASH output + fn finalize(self) -> Output<Self> { + unsafe { core::mem::transmute(self.y) } + } +} + +impl Polyval { + #[inline] + #[target_feature(enable = "pclmulqdq")] + #[target_feature(enable = "sse4.1")] + unsafe fn mul(&mut self, x: &Block) { + let h = self.h; + + // `_mm_loadu_si128` performs an unaligned load + #[allow(clippy::cast_ptr_alignment)] + let x = _mm_loadu_si128(x.as_ptr() as *const __m128i); + let y = _mm_xor_si128(self.y, x); + + let h0 = h; + let h1 = _mm_shuffle_epi32(h, 0x0E); + let h2 = _mm_xor_si128(h0, h1); + let y0 = y; + + // Multiply values partitioned to 64-bit parts + let y1 = _mm_shuffle_epi32(y, 0x0E); + let y2 = _mm_xor_si128(y0, y1); + let t0 = _mm_clmulepi64_si128(y0, h0, 0x00); + let t1 = _mm_clmulepi64_si128(y, h, 0x11); + let t2 = _mm_clmulepi64_si128(y2, h2, 0x00); + let t2 = _mm_xor_si128(t2, _mm_xor_si128(t0, t1)); + let v0 = t0; + let v1 = _mm_xor_si128(_mm_shuffle_epi32(t0, 0x0E), t2); + let v2 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); + let v3 = _mm_shuffle_epi32(t1, 0x0E); + + // Polynomial reduction + let v2 = xor5( + v2, + v0, + _mm_srli_epi64(v0, 1), + _mm_srli_epi64(v0, 2), + _mm_srli_epi64(v0, 7), + ); + + let v1 = xor4( + v1, + _mm_slli_epi64(v0, 63), + _mm_slli_epi64(v0, 62), + _mm_slli_epi64(v0, 57), + ); + + let v3 = xor5( + v3, + v1, + _mm_srli_epi64(v1, 1), + _mm_srli_epi64(v1, 2), + _mm_srli_epi64(v1, 7), + ); + + let v2 = xor4( + v2, + _mm_slli_epi64(v1, 63), + _mm_slli_epi64(v1, 62), + _mm_slli_epi64(v1, 57), + ); + + self.y = _mm_unpacklo_epi64(v2, v3); + } +} + +#[cfg(feature = "zeroize")] +impl Drop for Polyval { + fn drop(&mut self) { + use zeroize::Zeroize; + self.h.zeroize(); + self.y.zeroize(); + } +} + +#[inline(always)] +unsafe fn xor4(e1: __m128i, e2: __m128i, e3: __m128i, e4: __m128i) -> __m128i { + _mm_xor_si128(_mm_xor_si128(e1, e2), _mm_xor_si128(e3, e4)) +} + +#[inline(always)] +unsafe fn xor5(e1: __m128i, e2: __m128i, e3: __m128i, e4: __m128i, e5: __m128i) -> __m128i { + _mm_xor_si128( + e1, + _mm_xor_si128(_mm_xor_si128(e2, e3), _mm_xor_si128(e4, e5)), + ) +} diff --git a/rust/vendor/polyval/src/backend/pmull.rs b/rust/vendor/polyval/src/backend/pmull.rs new file mode 100644 index 0000000..01d3626 --- /dev/null +++ b/rust/vendor/polyval/src/backend/pmull.rs @@ -0,0 +1,116 @@ +//! ARMv8 `PMULL`-accelerated implementation of POLYVAL. +//! +//! Based on this C intrinsics implementation: +//! <https://github.com/noloader/AES-Intrinsics/blob/master/clmul-arm.c> +//! +//! Original C written and placed in public domain by Jeffrey Walton. +//! Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and +//! Barry O'Rourke for the mbedTLS project. +//! +//! For more information about PMULL, see: +//! - <https://developer.arm.com/documentation/100069/0608/A64-SIMD-Vector-Instructions/PMULL--PMULL2--vector-> +//! - <https://eprint.iacr.org/2015/688.pdf> + +use crate::{Block, Key}; +use core::{arch::aarch64::*, mem}; +use universal_hash::{consts::U16, NewUniversalHash, Output, UniversalHash}; + +/// **POLYVAL**: GHASH-like universal hash over GF(2^128). +#[derive(Clone)] +pub struct Polyval { + h: uint8x16_t, + y: uint8x16_t, +} + +impl NewUniversalHash for Polyval { + type KeySize = U16; + + /// Initialize POLYVAL with the given `H` field element + fn new(h: &Key) -> Self { + unsafe { + Self { + h: vld1q_u8(h.as_ptr()), + y: vdupq_n_u8(0), // all zeroes + } + } + } +} + +impl UniversalHash for Polyval { + type BlockSize = U16; + + #[inline] + fn update(&mut self, x: &Block) { + unsafe { + self.mul(x); + } + } + + /// Reset internal state + fn reset(&mut self) { + unsafe { + self.y = vdupq_n_u8(0); + } + } + + /// Get GHASH output + fn finalize(self) -> Output<Self> { + unsafe { mem::transmute(self.y) } + } +} + +impl Polyval { + /// Mask value used when performing reduction. + /// This corresponds to POLYVAL's polynomial with the highest bit unset. + const MASK: u128 = 1 << 127 | 1 << 126 | 1 << 121 | 1; + + /// POLYVAL carryless multiplication. + // TODO(tarcieri): investigate ordering optimizations and fusions e.g.`fuse-crypto-eor` + #[inline] + #[target_feature(enable = "neon")] + unsafe fn mul(&mut self, x: &Block) { + let h = self.h; + let y = veorq_u8(self.y, vld1q_u8(x.as_ptr())); + + // polynomial multiply + let z = vdupq_n_u8(0); + let r0 = pmull::<0, 0>(h, y); + let r1 = pmull::<1, 1>(h, y); + let t0 = pmull::<0, 1>(h, y); + let t1 = pmull::<1, 0>(h, y); + let t0 = veorq_u8(t0, t1); + let t1 = vextq_u8(z, t0, 8); + let r0 = veorq_u8(r0, t1); + let t1 = vextq_u8(t0, z, 8); + let r1 = veorq_u8(r1, t1); + + // polynomial reduction + let p = mem::transmute(Self::MASK); + let t0 = pmull::<0, 1>(r0, p); + let t1 = vextq_u8(t0, t0, 8); + let r0 = veorq_u8(r0, t1); + let t1 = pmull::<1, 1>(r0, p); + let r0 = veorq_u8(r0, t1); + + self.y = veorq_u8(r0, r1); + } +} + +/// Wrapper for the ARM64 `PMULL` instruction. +#[inline(always)] +unsafe fn pmull<const A_LANE: i32, const B_LANE: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + mem::transmute(vmull_p64( + vgetq_lane_u64(vreinterpretq_u64_u8(a), A_LANE), + vgetq_lane_u64(vreinterpretq_u64_u8(b), B_LANE), + )) +} + +// TODO(tarcieri): zeroize support +// #[cfg(feature = "zeroize")] +// impl Drop for Polyval { +// fn drop(&mut self) { +// use zeroize::Zeroize; +// self.h.zeroize(); +// self.y.zeroize(); +// } +// } diff --git a/rust/vendor/polyval/src/backend/soft32.rs b/rust/vendor/polyval/src/backend/soft32.rs new file mode 100644 index 0000000..6a09c58 --- /dev/null +++ b/rust/vendor/polyval/src/backend/soft32.rs @@ -0,0 +1,281 @@ +//! Constant-time software implementation of POLYVAL for 32-bit architectures +//! Adapted from BearSSL's `ghash_ctmul32.c`: +//! +//! <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul32.c;hb=4b6046412> +//! +//! Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> +//! +//! This implementation uses 32-bit multiplications, and only the low +//! 32 bits for each multiplication result. This is meant primarily for +//! the ARM Cortex M0 and M0+, whose multiplication opcode does not yield +//! the upper 32 bits; but it might also be useful on architectures where +//! access to the upper 32 bits requires use of specific registers that +//! create contention (e.g. on i386, "mul" necessarily outputs the result +//! in edx:eax, while "imul" can use any registers but is limited to the +//! low 32 bits). +//! +//! The implementation trick that is used here is bit-reversing (bit 0 +//! is swapped with bit 31, bit 1 with bit 30, and so on). In GF(2)[X], +//! for all values x and y, we have: +//! +//! ```text +//! rev32(x) * rev32(y) = rev64(x * y) +//! ``` +//! +//! In other words, if we bit-reverse (over 32 bits) the operands, then we +//! bit-reverse (over 64 bits) the result. + +use crate::{Block, Key}; +use core::{ + convert::TryInto, + num::Wrapping, + ops::{Add, Mul}, +}; +use universal_hash::{consts::U16, NewUniversalHash, Output, UniversalHash}; + +#[cfg(feature = "zeroize")] +use zeroize::Zeroize; + +/// **POLYVAL**: GHASH-like universal hash over GF(2^128). +#[derive(Clone)] +pub struct Polyval { + /// GF(2^128) field element input blocks are multiplied by + h: U32x4, + + /// Field element representing the computed universal hash + s: U32x4, +} + +impl NewUniversalHash for Polyval { + type KeySize = U16; + + /// Initialize POLYVAL with the given `H` field element + fn new(h: &Key) -> Self { + Self { + h: h.into(), + s: U32x4::default(), + } + } +} + +impl UniversalHash for Polyval { + type BlockSize = U16; + + /// Input a field element `X` to be authenticated + fn update(&mut self, x: &Block) { + let x = U32x4::from(x); + self.s = (self.s + x) * self.h; + } + + /// Reset internal state + fn reset(&mut self) { + self.s = U32x4::default(); + } + + /// Get POLYVAL result (i.e. computed `S` field element) + fn finalize(self) -> Output<Self> { + let mut block = Block::default(); + + for (chunk, i) in block + .chunks_mut(4) + .zip(&[self.s.0, self.s.1, self.s.2, self.s.3]) + { + chunk.copy_from_slice(&i.to_le_bytes()); + } + + Output::new(block) + } +} + +#[cfg(feature = "zeroize")] +impl Drop for Polyval { + fn drop(&mut self) { + self.h.zeroize(); + self.s.zeroize(); + } +} + +/// 4 x `u32` values +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +struct U32x4(u32, u32, u32, u32); + +impl From<&Block> for U32x4 { + fn from(bytes: &Block) -> U32x4 { + U32x4( + u32::from_le_bytes(bytes[..4].try_into().unwrap()), + u32::from_le_bytes(bytes[4..8].try_into().unwrap()), + u32::from_le_bytes(bytes[8..12].try_into().unwrap()), + u32::from_le_bytes(bytes[12..].try_into().unwrap()), + ) + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Add for U32x4 { + type Output = Self; + + /// Adds two POLYVAL field elements. + fn add(self, rhs: Self) -> Self::Output { + U32x4( + self.0 ^ rhs.0, + self.1 ^ rhs.1, + self.2 ^ rhs.2, + self.3 ^ rhs.3, + ) + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Mul for U32x4 { + type Output = Self; + + /// Computes carryless POLYVAL multiplication over GF(2^128) in constant time. + /// + /// Method described at: + /// <https://www.bearssl.org/constanttime.html#ghash-for-gcm> + /// + /// POLYVAL multiplication is effectively the little endian equivalent of + /// GHASH multiplication, aside from one small detail described here: + /// + /// <https://crypto.stackexchange.com/questions/66448/how-does-bearssls-gcm-modular-reduction-work/66462#66462> + /// + /// > The product of two bit-reversed 128-bit polynomials yields the + /// > bit-reversed result over 255 bits, not 256. The BearSSL code ends up + /// > with a 256-bit result in zw[], and that value is shifted by one bit, + /// > because of that reversed convention issue. Thus, the code must + /// > include a shifting step to put it back where it should + /// + /// This shift is unnecessary for POLYVAL and has been removed. + fn mul(self, rhs: Self) -> Self { + let hw = [self.0, self.1, self.2, self.3]; + let yw = [rhs.0, rhs.1, rhs.2, rhs.3]; + let hwr = [rev32(hw[0]), rev32(hw[1]), rev32(hw[2]), rev32(hw[3])]; + + // We are using Karatsuba: the 128x128 multiplication is + // reduced to three 64x64 multiplications, hence nine + // 32x32 multiplications. With the bit-reversal trick, + // we have to perform 18 32x32 multiplications. + + let mut a = [0u32; 18]; + + a[0] = yw[0]; + a[1] = yw[1]; + a[2] = yw[2]; + a[3] = yw[3]; + a[4] = a[0] ^ a[1]; + a[5] = a[2] ^ a[3]; + a[6] = a[0] ^ a[2]; + a[7] = a[1] ^ a[3]; + a[8] = a[6] ^ a[7]; + a[9] = rev32(yw[0]); + a[10] = rev32(yw[1]); + a[11] = rev32(yw[2]); + a[12] = rev32(yw[3]); + a[13] = a[9] ^ a[10]; + a[14] = a[11] ^ a[12]; + a[15] = a[9] ^ a[11]; + a[16] = a[10] ^ a[12]; + a[17] = a[15] ^ a[16]; + + let mut b = [0u32; 18]; + + b[0] = hw[0]; + b[1] = hw[1]; + b[2] = hw[2]; + b[3] = hw[3]; + b[4] = b[0] ^ b[1]; + b[5] = b[2] ^ b[3]; + b[6] = b[0] ^ b[2]; + b[7] = b[1] ^ b[3]; + b[8] = b[6] ^ b[7]; + b[9] = hwr[0]; + b[10] = hwr[1]; + b[11] = hwr[2]; + b[12] = hwr[3]; + b[13] = b[9] ^ b[10]; + b[14] = b[11] ^ b[12]; + b[15] = b[9] ^ b[11]; + b[16] = b[10] ^ b[12]; + b[17] = b[15] ^ b[16]; + + let mut c = [0u32; 18]; + + for i in 0..18 { + c[i] = bmul32(a[i], b[i]); + } + + c[4] ^= c[0] ^ c[1]; + c[5] ^= c[2] ^ c[3]; + c[8] ^= c[6] ^ c[7]; + + c[13] ^= c[9] ^ c[10]; + c[14] ^= c[11] ^ c[12]; + c[17] ^= c[15] ^ c[16]; + + let mut zw = [0u32; 8]; + + zw[0] = c[0]; + zw[1] = c[4] ^ rev32(c[9]) >> 1; + zw[2] = c[1] ^ c[0] ^ c[2] ^ c[6] ^ rev32(c[13]) >> 1; + zw[3] = c[4] ^ c[5] ^ c[8] ^ rev32(c[10] ^ c[9] ^ c[11] ^ c[15]) >> 1; + zw[4] = c[2] ^ c[1] ^ c[3] ^ c[7] ^ rev32(c[13] ^ c[14] ^ c[17]) >> 1; + zw[5] = c[5] ^ rev32(c[11] ^ c[10] ^ c[12] ^ c[16]) >> 1; + zw[6] = c[3] ^ rev32(c[14]) >> 1; + zw[7] = rev32(c[12]) >> 1; + + for i in 0..4 { + let lw = zw[i]; + zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7); + zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25); + } + + U32x4(zw[4], zw[5], zw[6], zw[7]) + } +} + +#[cfg(feature = "zeroize")] +impl Zeroize for U32x4 { + fn zeroize(&mut self) { + self.0.zeroize(); + self.1.zeroize(); + self.2.zeroize(); + self.3.zeroize(); + } +} + +/// Multiplication in GF(2)[X], truncated to the low 32-bits, with “holes” +/// (sequences of zeroes) to avoid carry spilling. +/// +/// When carries do occur, they wind up in a "hole" and are subsequently masked +/// out of the result. +fn bmul32(x: u32, y: u32) -> u32 { + let x0 = Wrapping(x & 0x1111_1111); + let x1 = Wrapping(x & 0x2222_2222); + let x2 = Wrapping(x & 0x4444_4444); + let x3 = Wrapping(x & 0x8888_8888); + let y0 = Wrapping(y & 0x1111_1111); + let y1 = Wrapping(y & 0x2222_2222); + let y2 = Wrapping(y & 0x4444_4444); + let y3 = Wrapping(y & 0x8888_8888); + + let mut z0 = ((x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1)).0; + let mut z1 = ((x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2)).0; + let mut z2 = ((x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3)).0; + let mut z3 = ((x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0)).0; + + z0 &= 0x1111_1111; + z1 &= 0x2222_2222; + z2 &= 0x4444_4444; + z3 &= 0x8888_8888; + + z0 | z1 | z2 | z3 +} + +/// Bit-reverse a 32-bit word in constant time. +fn rev32(mut x: u32) -> u32 { + x = ((x & 0x5555_5555) << 1) | (x >> 1 & 0x5555_5555); + x = ((x & 0x3333_3333) << 2) | (x >> 2 & 0x3333_3333); + x = ((x & 0x0f0f_0f0f) << 4) | (x >> 4 & 0x0f0f_0f0f); + x = ((x & 0x00ff_00ff) << 8) | (x >> 8 & 0x00ff_00ff); + (x << 16) | (x >> 16) +} diff --git a/rust/vendor/polyval/src/backend/soft64.rs b/rust/vendor/polyval/src/backend/soft64.rs new file mode 100644 index 0000000..fe159a4 --- /dev/null +++ b/rust/vendor/polyval/src/backend/soft64.rs @@ -0,0 +1,205 @@ +//! Constant-time software implementation of POLYVAL for 64-bit architectures. +//! Adapted from BearSSL's `ghash_ctmul64.c`: +//! +//! <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c;hb=4b6046412> +//! +//! Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> + +use crate::{Block, Key}; +use core::{ + convert::TryInto, + num::Wrapping, + ops::{Add, Mul}, +}; +use universal_hash::{consts::U16, NewUniversalHash, Output, UniversalHash}; + +#[cfg(feature = "zeroize")] +use zeroize::Zeroize; + +/// **POLYVAL**: GHASH-like universal hash over GF(2^128). +#[derive(Clone)] +pub struct Polyval { + /// GF(2^128) field element input blocks are multiplied by + h: U64x2, + + /// Field element representing the computed universal hash + s: U64x2, +} + +impl NewUniversalHash for Polyval { + type KeySize = U16; + + /// Initialize POLYVAL with the given `H` field element + fn new(h: &Key) -> Self { + Self { + h: h.into(), + s: U64x2::default(), + } + } +} + +impl UniversalHash for Polyval { + type BlockSize = U16; + + /// Input a field element `X` to be authenticated + fn update(&mut self, x: &Block) { + let x = U64x2::from(x); + self.s = (self.s + x) * self.h; + } + + /// Reset internal state + fn reset(&mut self) { + self.s = U64x2::default(); + } + + /// Get POLYVAL result (i.e. computed `S` field element) + fn finalize(self) -> Output<Self> { + let mut block = Block::default(); + + for (chunk, i) in block.chunks_mut(8).zip(&[self.s.0, self.s.1]) { + chunk.copy_from_slice(&i.to_le_bytes()); + } + + Output::new(block) + } +} + +#[cfg(feature = "zeroize")] +impl Drop for Polyval { + fn drop(&mut self) { + self.h.zeroize(); + self.s.zeroize(); + } +} + +/// 2 x `u64` values +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +struct U64x2(u64, u64); + +impl From<&Block> for U64x2 { + fn from(bytes: &Block) -> U64x2 { + U64x2( + u64::from_le_bytes(bytes[..8].try_into().unwrap()), + u64::from_le_bytes(bytes[8..].try_into().unwrap()), + ) + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Add for U64x2 { + type Output = Self; + + /// Adds two POLYVAL field elements. + fn add(self, rhs: Self) -> Self::Output { + U64x2(self.0 ^ rhs.0, self.1 ^ rhs.1) + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Mul for U64x2 { + type Output = Self; + + /// Computes carryless POLYVAL multiplication over GF(2^128) in constant time. + /// + /// Method described at: + /// <https://www.bearssl.org/constanttime.html#ghash-for-gcm> + /// + /// POLYVAL multiplication is effectively the little endian equivalent of + /// GHASH multiplication, aside from one small detail described here: + /// + /// <https://crypto.stackexchange.com/questions/66448/how-does-bearssls-gcm-modular-reduction-work/66462#66462> + /// + /// > The product of two bit-reversed 128-bit polynomials yields the + /// > bit-reversed result over 255 bits, not 256. The BearSSL code ends up + /// > with a 256-bit result in zw[], and that value is shifted by one bit, + /// > because of that reversed convention issue. Thus, the code must + /// > include a shifting step to put it back where it should + /// + /// This shift is unnecessary for POLYVAL and has been removed. + fn mul(self, rhs: Self) -> Self { + let h0 = self.0; + let h1 = self.1; + let h0r = rev64(h0); + let h1r = rev64(h1); + let h2 = h0 ^ h1; + let h2r = h0r ^ h1r; + + let y0 = rhs.0; + let y1 = rhs.1; + let y0r = rev64(y0); + let y1r = rev64(y1); + let y2 = y0 ^ y1; + let y2r = y0r ^ y1r; + let z0 = bmul64(y0, h0); + let z1 = bmul64(y1, h1); + + let mut z2 = bmul64(y2, h2); + let mut z0h = bmul64(y0r, h0r); + let mut z1h = bmul64(y1r, h1r); + let mut z2h = bmul64(y2r, h2r); + + z2 ^= z0 ^ z1; + z2h ^= z0h ^ z1h; + z0h = rev64(z0h) >> 1; + z1h = rev64(z1h) >> 1; + z2h = rev64(z2h) >> 1; + + let v0 = z0; + let mut v1 = z0h ^ z2; + let mut v2 = z1 ^ z2h; + let mut v3 = z1h; + + v2 ^= v0 ^ (v0 >> 1) ^ (v0 >> 2) ^ (v0 >> 7); + v1 ^= (v0 << 63) ^ (v0 << 62) ^ (v0 << 57); + v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7); + v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57); + + U64x2(v2, v3) + } +} + +#[cfg(feature = "zeroize")] +impl Zeroize for U64x2 { + fn zeroize(&mut self) { + self.0.zeroize(); + self.1.zeroize(); + } +} + +/// Multiplication in GF(2)[X], truncated to the low 64-bits, with “holes” +/// (sequences of zeroes) to avoid carry spilling. +/// +/// When carries do occur, they wind up in a "hole" and are subsequently masked +/// out of the result. +fn bmul64(x: u64, y: u64) -> u64 { + let x0 = Wrapping(x & 0x1111_1111_1111_1111); + let x1 = Wrapping(x & 0x2222_2222_2222_2222); + let x2 = Wrapping(x & 0x4444_4444_4444_4444); + let x3 = Wrapping(x & 0x8888_8888_8888_8888); + let y0 = Wrapping(y & 0x1111_1111_1111_1111); + let y1 = Wrapping(y & 0x2222_2222_2222_2222); + let y2 = Wrapping(y & 0x4444_4444_4444_4444); + let y3 = Wrapping(y & 0x8888_8888_8888_8888); + + let mut z0 = ((x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1)).0; + let mut z1 = ((x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2)).0; + let mut z2 = ((x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3)).0; + let mut z3 = ((x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0)).0; + + z0 &= 0x1111_1111_1111_1111; + z1 &= 0x2222_2222_2222_2222; + z2 &= 0x4444_4444_4444_4444; + z3 &= 0x8888_8888_8888_8888; + + z0 | z1 | z2 | z3 +} + +/// Bit-reverse a `u64` in constant time +fn rev64(mut x: u64) -> u64 { + x = ((x & 0x5555_5555_5555_5555) << 1) | ((x >> 1) & 0x5555_5555_5555_5555); + x = ((x & 0x3333_3333_3333_3333) << 2) | ((x >> 2) & 0x3333_3333_3333_3333); + x = ((x & 0x0f0f_0f0f_0f0f_0f0f) << 4) | ((x >> 4) & 0x0f0f_0f0f_0f0f_0f0f); + x = ((x & 0x00ff_00ff_00ff_00ff) << 8) | ((x >> 8) & 0x00ff_00ff_00ff_00ff); + x = ((x & 0xffff_0000_ffff) << 16) | ((x >> 16) & 0xffff_0000_ffff); + (x << 32) | (x >> 32) +} diff --git a/rust/vendor/polyval/src/lib.rs b/rust/vendor/polyval/src/lib.rs new file mode 100644 index 0000000..146776e --- /dev/null +++ b/rust/vendor/polyval/src/lib.rs @@ -0,0 +1,110 @@ +//! **POLYVAL** is a GHASH-like universal hash over GF(2^128) useful for +//! implementing [AES-GCM-SIV] or [AES-GCM/GMAC]. +//! +//! From [RFC 8452 Section 3] which defines POLYVAL for use in AES-GCM-SIV: +//! +//! > "POLYVAL, like GHASH (the authenticator in AES-GCM; ...), operates in a +//! > binary field of size 2^128. The field is defined by the irreducible +//! > polynomial x^128 + x^127 + x^126 + x^121 + 1." +//! +//! By multiplying (in the finite field sense) a sequence of 128-bit blocks of +//! input data data by a field element `H`, POLYVAL can be used to authenticate +//! the message sequence as powers (in the finite field sense) of `H`. +//! +//! # Minimum Supported Rust Version +//! Rust **1.49** or higher. +//! +//! In the future the minimum supported Rust version may be changed, but it +//! be will be accompanied with a minor version bump. +//! +//! # Supported backends +//! This crate provides multiple backends including a portable pure Rust +//! backend as well as ones based on CPU intrinsics. +//! +//! ## "soft" portable backend +//! As a baseline implementation, this crate provides a constant-time pure Rust +//! implementation based on [BearSSL], which is a straightforward and +//! compact implementation which uses a clever but simple technique to avoid +//! carry-spilling. +//! +//! ## ARMv8 intrinsics (`PMULL`, nightly-only) +//! On `aarch64` targets including `aarch64-apple-darwin` (Apple M1) and Linux +//! targets such as `aarch64-unknown-linux-gnu` and `aarch64-unknown-linux-musl`, +//! support for using the `PMULL` instructions in ARMv8's Cryptography Extensions +//! is available when using the nightly compiler, and can be enabled using the +//! `armv8` crate feature. +//! +//! On Linux and macOS, when the `armv8` feature is enabled support for AES +//! intrinsics is autodetected at runtime. On other platforms the `crypto` +//! target feature must be enabled via RUSTFLAGS. +//! +//! ## `x86`/`x86_64` intrinsics (`CMLMUL`) +//! By default this crate uses runtime detection on `i686`/`x86_64` targets +//! in order to determine if `CLMUL` is available, and if it is not, it will +//! fallback to using a constant-time software implementation. +//! +//! For optimal performance, set `target-cpu` in `RUSTFLAGS` to `sandybridge` +//! or newer: +//! +//! Example: +//! +//! ```text +//! $ RUSTFLAGS="-Ctarget-cpu=sandybridge" cargo bench +//! ``` +//! +//! # Relationship to GHASH +//! POLYVAL can be thought of as the little endian equivalent of GHASH, which +//! affords it a small performance advantage over GHASH when used on little +//! endian architectures. +//! +//! It has also been designed so it can also be used to compute GHASH and with +//! it GMAC, the Message Authentication Code (MAC) used by AES-GCM. +//! +//! From [RFC 8452 Appendix A]: +//! +//! > "GHASH and POLYVAL both operate in GF(2^128), although with different +//! > irreducible polynomials: POLYVAL works modulo x^128 + x^127 + x^126 + +//! > x^121 + 1 and GHASH works modulo x^128 + x^7 + x^2 + x + 1. Note +//! > that these irreducible polynomials are the 'reverse' of each other." +//! +//! [AES-GCM-SIV]: https://en.wikipedia.org/wiki/AES-GCM-SIV +//! [AES-GCM/GMAC]: https://en.wikipedia.org/wiki/Galois/Counter_Mode +//! [BearSSL]: https://www.bearssl.org/constanttime.html#ghash-for-gcm +//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3 +//! [RFC 8452 Appendix A]: https://tools.ietf.org/html/rfc8452#appendix-A + +#![no_std] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![cfg_attr( + all(feature = "armv8", target_arch = "aarch64"), + feature(stdsimd, aarch64_target_feature) +)] +#![doc( + html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/8f1a9894/logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/8f1a9894/logo.svg", + html_root_url = "https://docs.rs/polyval/0.5.3" +)] +#![warn(missing_docs, rust_2018_idioms)] + +mod backend; +mod mulx; + +pub use crate::{backend::Polyval, mulx::mulx}; +pub use universal_hash; + +opaque_debug::implement!(Polyval); + +/// Size of a POLYVAL block in bytes +pub const BLOCK_SIZE: usize = 16; + +/// Size of a POLYVAL key in bytes +pub const KEY_SIZE: usize = 16; + +/// POLYVAL keys (16-bytes) +pub type Key = universal_hash::Key<Polyval>; + +/// POLYVAL blocks (16-bytes) +pub type Block = universal_hash::Block<Polyval>; + +/// POLYVAL tags (16-bytes) +pub type Tag = universal_hash::Output<Polyval>; diff --git a/rust/vendor/polyval/src/mulx.rs b/rust/vendor/polyval/src/mulx.rs new file mode 100644 index 0000000..2abd868 --- /dev/null +++ b/rust/vendor/polyval/src/mulx.rs @@ -0,0 +1,188 @@ +//! The `mulX_POLYVAL()` function. + +use crate::Block; + +/// The `mulX_POLYVAL()` function as defined in [RFC 8452 Appendix A][1]. +/// +/// Performs a doubling (a.k.a. "multiply by x") over GF(2^128). +/// This is useful for implementing GHASH in terms of POLYVAL. +/// +/// [1]: https://tools.ietf.org/html/rfc8452#appendix-A +pub fn mulx(block: &Block) -> Block { + let mut v = u128::from_le_bytes((*block).into()); + let v_hi = v >> 127; + + v <<= 1; + v ^= v_hi ^ (v_hi << 127) ^ (v_hi << 126) ^ (v_hi << 121); + v.to_le_bytes().into() +} + +#[cfg(test)] +mod tests { + use super::{mulx, Block}; + use hex_literal::hex; + + /// Test vector given in RFC 8452 Appendix A. + /// + /// NOTE: the vector in the RFC actually contains a typo which has been + /// reported (and accepted) as RFC errata, so we use the vector from the + /// errata instead: + /// + /// <https://www.rfc-editor.org/errata_search.php?rfc=8452> + #[test] + fn rfc8452_vector() { + let input = Block::from(hex!("9c98c04df9387ded828175a92ba652d8")); + let expected_output = Block::from(hex!("3931819bf271fada0503eb52574ca572")); + let actual_output = mulx(&input); + assert_eq!(expected_output, actual_output); + } + + /// Test against the `MULX_TEST_VECTORS` given below, which cover the full + /// size of a POLYVAL field element. + #[test] + fn mulx_vectors() { + // One + let mut r = Block::from(hex!("01000000000000000000000000000000")); + + for vector in MULX_TEST_VECTORS { + r = mulx(&r); + assert_eq!(&r, Block::from_slice(vector)); + } + } + + /// `mulX_POLYVAL()` test vectors. + /// + /// These were generated by this crate when in a known-correct state, + /// verified by a GHASH implementation based on a POLYVAL core successfully + /// passing the NIST test vectors. + const MULX_TEST_VECTORS: &[[u8; 16]] = &[ + hex!("02000000000000000000000000000000"), + hex!("04000000000000000000000000000000"), + hex!("08000000000000000000000000000000"), + hex!("10000000000000000000000000000000"), + hex!("20000000000000000000000000000000"), + hex!("40000000000000000000000000000000"), + hex!("80000000000000000000000000000000"), + hex!("00010000000000000000000000000000"), + hex!("00020000000000000000000000000000"), + hex!("00040000000000000000000000000000"), + hex!("00080000000000000000000000000000"), + hex!("00100000000000000000000000000000"), + hex!("00200000000000000000000000000000"), + hex!("00400000000000000000000000000000"), + hex!("00800000000000000000000000000000"), + hex!("00000100000000000000000000000000"), + hex!("00000200000000000000000000000000"), + hex!("00000400000000000000000000000000"), + hex!("00000800000000000000000000000000"), + hex!("00001000000000000000000000000000"), + hex!("00002000000000000000000000000000"), + hex!("00004000000000000000000000000000"), + hex!("00008000000000000000000000000000"), + hex!("00000001000000000000000000000000"), + hex!("00000002000000000000000000000000"), + hex!("00000004000000000000000000000000"), + hex!("00000008000000000000000000000000"), + hex!("00000010000000000000000000000000"), + hex!("00000020000000000000000000000000"), + hex!("00000040000000000000000000000000"), + hex!("00000080000000000000000000000000"), + hex!("00000000010000000000000000000000"), + hex!("00000000020000000000000000000000"), + hex!("00000000040000000000000000000000"), + hex!("00000000080000000000000000000000"), + hex!("00000000100000000000000000000000"), + hex!("00000000200000000000000000000000"), + hex!("00000000400000000000000000000000"), + hex!("00000000800000000000000000000000"), + hex!("00000000000100000000000000000000"), + hex!("00000000000200000000000000000000"), + hex!("00000000000400000000000000000000"), + hex!("00000000000800000000000000000000"), + hex!("00000000001000000000000000000000"), + hex!("00000000002000000000000000000000"), + hex!("00000000004000000000000000000000"), + hex!("00000000008000000000000000000000"), + hex!("00000000000001000000000000000000"), + hex!("00000000000002000000000000000000"), + hex!("00000000000004000000000000000000"), + hex!("00000000000008000000000000000000"), + hex!("00000000000010000000000000000000"), + hex!("00000000000020000000000000000000"), + hex!("00000000000040000000000000000000"), + hex!("00000000000080000000000000000000"), + hex!("00000000000000010000000000000000"), + hex!("00000000000000020000000000000000"), + hex!("00000000000000040000000000000000"), + hex!("00000000000000080000000000000000"), + hex!("00000000000000100000000000000000"), + hex!("00000000000000200000000000000000"), + hex!("00000000000000400000000000000000"), + hex!("00000000000000800000000000000000"), + hex!("00000000000000000100000000000000"), + hex!("00000000000000000200000000000000"), + hex!("00000000000000000400000000000000"), + hex!("00000000000000000800000000000000"), + hex!("00000000000000001000000000000000"), + hex!("00000000000000002000000000000000"), + hex!("00000000000000004000000000000000"), + hex!("00000000000000008000000000000000"), + hex!("00000000000000000001000000000000"), + hex!("00000000000000000002000000000000"), + hex!("00000000000000000004000000000000"), + hex!("00000000000000000008000000000000"), + hex!("00000000000000000010000000000000"), + hex!("00000000000000000020000000000000"), + hex!("00000000000000000040000000000000"), + hex!("00000000000000000080000000000000"), + hex!("00000000000000000000010000000000"), + hex!("00000000000000000000020000000000"), + hex!("00000000000000000000040000000000"), + hex!("00000000000000000000080000000000"), + hex!("00000000000000000000100000000000"), + hex!("00000000000000000000200000000000"), + hex!("00000000000000000000400000000000"), + hex!("00000000000000000000800000000000"), + hex!("00000000000000000000000100000000"), + hex!("00000000000000000000000200000000"), + hex!("00000000000000000000000400000000"), + hex!("00000000000000000000000800000000"), + hex!("00000000000000000000001000000000"), + hex!("00000000000000000000002000000000"), + hex!("00000000000000000000004000000000"), + hex!("00000000000000000000008000000000"), + hex!("00000000000000000000000001000000"), + hex!("00000000000000000000000002000000"), + hex!("00000000000000000000000004000000"), + hex!("00000000000000000000000008000000"), + hex!("00000000000000000000000010000000"), + hex!("00000000000000000000000020000000"), + hex!("00000000000000000000000040000000"), + hex!("00000000000000000000000080000000"), + hex!("00000000000000000000000000010000"), + hex!("00000000000000000000000000020000"), + hex!("00000000000000000000000000040000"), + hex!("00000000000000000000000000080000"), + hex!("00000000000000000000000000100000"), + hex!("00000000000000000000000000200000"), + hex!("00000000000000000000000000400000"), + hex!("00000000000000000000000000800000"), + hex!("00000000000000000000000000000100"), + hex!("00000000000000000000000000000200"), + hex!("00000000000000000000000000000400"), + hex!("00000000000000000000000000000800"), + hex!("00000000000000000000000000001000"), + hex!("00000000000000000000000000002000"), + hex!("00000000000000000000000000004000"), + hex!("00000000000000000000000000008000"), + hex!("00000000000000000000000000000001"), + hex!("00000000000000000000000000000002"), + hex!("00000000000000000000000000000004"), + hex!("00000000000000000000000000000008"), + hex!("00000000000000000000000000000010"), + hex!("00000000000000000000000000000020"), + hex!("00000000000000000000000000000040"), + hex!("00000000000000000000000000000080"), + hex!("010000000000000000000000000000c2"), + ]; +} |