diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:31 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:31 +0000 |
commit | dc0db358abe19481e475e10c32149b53370f1a1c (patch) | |
tree | ab8ce99c4b255ce46f99ef402c27916055b899ee /vendor/sha2/src | |
parent | Releasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff) | |
download | rustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip |
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/sha2/src')
-rw-r--r-- | vendor/sha2/src/lib.rs | 3 | ||||
-rw-r--r-- | vendor/sha2/src/sha256/aarch64.rs | 146 | ||||
-rw-r--r-- | vendor/sha2/src/sha512.rs | 4 | ||||
-rw-r--r-- | vendor/sha2/src/sha512/aarch64.rs | 235 |
4 files changed, 386 insertions, 2 deletions
diff --git a/vendor/sha2/src/lib.rs b/vendor/sha2/src/lib.rs index 9082fc5b8..a3482e84a 100644 --- a/vendor/sha2/src/lib.rs +++ b/vendor/sha2/src/lib.rs @@ -6,7 +6,8 @@ //! Algorithmically, there are only 2 core algorithms: SHA-256 and SHA-512. //! All other algorithms are just applications of these with different initial //! hash values, and truncated to different digest bit lengths. The first two -//! algorithms in the list are based on SHA-256, while the last three on SHA-512. +//! algorithms in the list are based on SHA-256, while the last four are based +//! on SHA-512. //! //! # Usage //! diff --git a/vendor/sha2/src/sha256/aarch64.rs b/vendor/sha2/src/sha256/aarch64.rs index 7eaa2de73..9d220a311 100644 --- a/vendor/sha2/src/sha256/aarch64.rs +++ b/vendor/sha2/src/sha256/aarch64.rs @@ -1,15 +1,159 @@ //! SHA-256 `aarch64` backend. +// Implementation adapted from mbedtls. + // TODO: stdarch intrinsics: RustCrypto/hashes#257 +use core::arch::{aarch64::*, asm}; + +use crate::consts::K32; + cpufeatures::new!(sha2_hwcap, "sha2"); pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725 // after stabilization if sha2_hwcap::get() { - sha2_asm::compress256(state, blocks); + unsafe { sha256_compress(state, blocks) } } else { super::soft::compress(state, blocks); } } + +#[target_feature(enable = "sha2")] +unsafe fn sha256_compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + // SAFETY: Requires the sha2 feature. + + // Load state into vectors. + let mut abcd = vld1q_u32(state[0..4].as_ptr()); + let mut efgh = vld1q_u32(state[4..8].as_ptr()); + + // Iterate through the message blocks. + for block in blocks { + // Keep original state values. + let abcd_orig = abcd; + let efgh_orig = efgh; + + // Load the message block into vectors, assuming little endianness. + let mut s0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[0..16].as_ptr()))); + let mut s1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[16..32].as_ptr()))); + let mut s2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[32..48].as_ptr()))); + let mut s3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[48..64].as_ptr()))); + + // Rounds 0 to 3 + let mut tmp = vaddq_u32(s0, vld1q_u32(&K32[0])); + let mut abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds 4 to 7 + tmp = vaddq_u32(s1, vld1q_u32(&K32[4])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds 8 to 11 + tmp = vaddq_u32(s2, vld1q_u32(&K32[8])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds 12 to 15 + tmp = vaddq_u32(s3, vld1q_u32(&K32[12])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + for t in (16..64).step_by(16) { + // Rounds t to t + 3 + s0 = vsha256su1q_u32(vsha256su0q_u32(s0, s1), s2, s3); + tmp = vaddq_u32(s0, vld1q_u32(&K32[t])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds t + 4 to t + 7 + s1 = vsha256su1q_u32(vsha256su0q_u32(s1, s2), s3, s0); + tmp = vaddq_u32(s1, vld1q_u32(&K32[t + 4])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds t + 8 to t + 11 + s2 = vsha256su1q_u32(vsha256su0q_u32(s2, s3), s0, s1); + tmp = vaddq_u32(s2, vld1q_u32(&K32[t + 8])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds t + 12 to t + 15 + s3 = vsha256su1q_u32(vsha256su0q_u32(s3, s0), s1, s2); + tmp = vaddq_u32(s3, vld1q_u32(&K32[t + 12])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + } + + // Add the block-specific state to the original state. + abcd = vaddq_u32(abcd, abcd_orig); + efgh = vaddq_u32(efgh, efgh_orig); + } + + // Store vectors into state. + vst1q_u32(state[0..4].as_mut_ptr(), abcd); + vst1q_u32(state[4..8].as_mut_ptr(), efgh); +} + +// TODO remove these polyfills once SHA2 intrinsics land + +#[inline(always)] +unsafe fn vsha256hq_u32( + mut hash_efgh: uint32x4_t, + hash_abcd: uint32x4_t, + wk: uint32x4_t, +) -> uint32x4_t { + asm!( + "SHA256H {:q}, {:q}, {:v}.4S", + inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk, + options(pure, nomem, nostack, preserves_flags) + ); + hash_efgh +} + +#[inline(always)] +unsafe fn vsha256h2q_u32( + mut hash_efgh: uint32x4_t, + hash_abcd: uint32x4_t, + wk: uint32x4_t, +) -> uint32x4_t { + asm!( + "SHA256H2 {:q}, {:q}, {:v}.4S", + inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk, + options(pure, nomem, nostack, preserves_flags) + ); + hash_efgh +} + +#[inline(always)] +unsafe fn vsha256su0q_u32(mut w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { + asm!( + "SHA256SU0 {:v}.4S, {:v}.4S", + inout(vreg) w0_3, in(vreg) w4_7, + options(pure, nomem, nostack, preserves_flags) + ); + w0_3 +} + +#[inline(always)] +unsafe fn vsha256su1q_u32( + mut tw0_3: uint32x4_t, + w8_11: uint32x4_t, + w12_15: uint32x4_t, +) -> uint32x4_t { + asm!( + "SHA256SU1 {:v}.4S, {:v}.4S, {:v}.4S", + inout(vreg) tw0_3, in(vreg) w8_11, in(vreg) w12_15, + options(pure, nomem, nostack, preserves_flags) + ); + tw0_3 +} diff --git a/vendor/sha2/src/sha512.rs b/vendor/sha2/src/sha512.rs index e71fdfa43..af4178c0b 100644 --- a/vendor/sha2/src/sha512.rs +++ b/vendor/sha2/src/sha512.rs @@ -15,6 +15,10 @@ cfg_if::cfg_if! { } mod x86; use x86::compress; + } else if #[cfg(all(feature = "asm", target_arch = "aarch64"))] { + mod soft; + mod aarch64; + use aarch64::compress; } else { mod soft; use soft::compress; diff --git a/vendor/sha2/src/sha512/aarch64.rs b/vendor/sha2/src/sha512/aarch64.rs new file mode 100644 index 000000000..fbf441c21 --- /dev/null +++ b/vendor/sha2/src/sha512/aarch64.rs @@ -0,0 +1,235 @@ +// Implementation adapted from mbedtls. + +use core::arch::{aarch64::*, asm}; + +use crate::consts::K64; + +cpufeatures::new!(sha3_hwcap, "sha3"); + +pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725 + // after stabilization + if sha3_hwcap::get() { + unsafe { sha512_compress(state, blocks) } + } else { + super::soft::compress(state, blocks); + } +} + +#[target_feature(enable = "sha3")] +unsafe fn sha512_compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + // SAFETY: Requires the sha3 feature. + + // Load state into vectors. + let mut ab = vld1q_u64(state[0..2].as_ptr()); + let mut cd = vld1q_u64(state[2..4].as_ptr()); + let mut ef = vld1q_u64(state[4..6].as_ptr()); + let mut gh = vld1q_u64(state[6..8].as_ptr()); + + // Iterate through the message blocks. + for block in blocks { + // Keep original state values. + let ab_orig = ab; + let cd_orig = cd; + let ef_orig = ef; + let gh_orig = gh; + + // Load the message block into vectors, assuming little endianness. + let mut s0 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[0..16].as_ptr()))); + let mut s1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[16..32].as_ptr()))); + let mut s2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[32..48].as_ptr()))); + let mut s3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[48..64].as_ptr()))); + let mut s4 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[64..80].as_ptr()))); + let mut s5 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[80..96].as_ptr()))); + let mut s6 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[96..112].as_ptr()))); + let mut s7 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[112..128].as_ptr()))); + + // Rounds 0 and 1 + let mut initial_sum = vaddq_u64(s0, vld1q_u64(&K64[0])); + let mut sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + let mut intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + // Rounds 2 and 3 + initial_sum = vaddq_u64(s1, vld1q_u64(&K64[2])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + // Rounds 4 and 5 + initial_sum = vaddq_u64(s2, vld1q_u64(&K64[4])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + // Rounds 6 and 7 + initial_sum = vaddq_u64(s3, vld1q_u64(&K64[6])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + // Rounds 8 and 9 + initial_sum = vaddq_u64(s4, vld1q_u64(&K64[8])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + // Rounds 10 and 11 + initial_sum = vaddq_u64(s5, vld1q_u64(&K64[10])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + // Rounds 12 and 13 + initial_sum = vaddq_u64(s6, vld1q_u64(&K64[12])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + // Rounds 14 and 15 + initial_sum = vaddq_u64(s7, vld1q_u64(&K64[14])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + for t in (16..80).step_by(16) { + // Rounds t and t + 1 + s0 = vsha512su1q_u64(vsha512su0q_u64(s0, s1), s7, vextq_u64(s4, s5, 1)); + initial_sum = vaddq_u64(s0, vld1q_u64(&K64[t])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + // Rounds t + 2 and t + 3 + s1 = vsha512su1q_u64(vsha512su0q_u64(s1, s2), s0, vextq_u64(s5, s6, 1)); + initial_sum = vaddq_u64(s1, vld1q_u64(&K64[t + 2])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + // Rounds t + 4 and t + 5 + s2 = vsha512su1q_u64(vsha512su0q_u64(s2, s3), s1, vextq_u64(s6, s7, 1)); + initial_sum = vaddq_u64(s2, vld1q_u64(&K64[t + 4])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + // Rounds t + 6 and t + 7 + s3 = vsha512su1q_u64(vsha512su0q_u64(s3, s4), s2, vextq_u64(s7, s0, 1)); + initial_sum = vaddq_u64(s3, vld1q_u64(&K64[t + 6])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + // Rounds t + 8 and t + 9 + s4 = vsha512su1q_u64(vsha512su0q_u64(s4, s5), s3, vextq_u64(s0, s1, 1)); + initial_sum = vaddq_u64(s4, vld1q_u64(&K64[t + 8])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + // Rounds t + 10 and t + 11 + s5 = vsha512su1q_u64(vsha512su0q_u64(s5, s6), s4, vextq_u64(s1, s2, 1)); + initial_sum = vaddq_u64(s5, vld1q_u64(&K64[t + 10])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + // Rounds t + 12 and t + 13 + s6 = vsha512su1q_u64(vsha512su0q_u64(s6, s7), s5, vextq_u64(s2, s3, 1)); + initial_sum = vaddq_u64(s6, vld1q_u64(&K64[t + 12])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + // Rounds t + 14 and t + 15 + s7 = vsha512su1q_u64(vsha512su0q_u64(s7, s0), s6, vextq_u64(s3, s4, 1)); + initial_sum = vaddq_u64(s7, vld1q_u64(&K64[t + 14])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + } + + // Add the block-specific state to the original state. + ab = vaddq_u64(ab, ab_orig); + cd = vaddq_u64(cd, cd_orig); + ef = vaddq_u64(ef, ef_orig); + gh = vaddq_u64(gh, gh_orig); + } + + // Store vectors into state. + vst1q_u64(state[0..2].as_mut_ptr(), ab); + vst1q_u64(state[2..4].as_mut_ptr(), cd); + vst1q_u64(state[4..6].as_mut_ptr(), ef); + vst1q_u64(state[6..8].as_mut_ptr(), gh); +} + +// TODO remove these polyfills once SHA3 intrinsics land + +#[inline(always)] +unsafe fn vsha512hq_u64( + mut hash_ed: uint64x2_t, + hash_gf: uint64x2_t, + kwh_kwh2: uint64x2_t, +) -> uint64x2_t { + asm!( + "SHA512H {:q}, {:q}, {:v}.2D", + inout(vreg) hash_ed, in(vreg) hash_gf, in(vreg) kwh_kwh2, + options(pure, nomem, nostack, preserves_flags) + ); + hash_ed +} + +#[inline(always)] +unsafe fn vsha512h2q_u64( + mut sum_ab: uint64x2_t, + hash_c_: uint64x2_t, + hash_ab: uint64x2_t, +) -> uint64x2_t { + asm!( + "SHA512H2 {:q}, {:q}, {:v}.2D", + inout(vreg) sum_ab, in(vreg) hash_c_, in(vreg) hash_ab, + options(pure, nomem, nostack, preserves_flags) + ); + sum_ab +} + +#[inline(always)] +unsafe fn vsha512su0q_u64(mut w0_1: uint64x2_t, w2_: uint64x2_t) -> uint64x2_t { + asm!( + "SHA512SU0 {:v}.2D, {:v}.2D", + inout(vreg) w0_1, in(vreg) w2_, + options(pure, nomem, nostack, preserves_flags) + ); + w0_1 +} + +#[inline(always)] +unsafe fn vsha512su1q_u64( + mut s01_s02: uint64x2_t, + w14_15: uint64x2_t, + w9_10: uint64x2_t, +) -> uint64x2_t { + asm!( + "SHA512SU1 {:v}.2D, {:v}.2D, {:v}.2D", + inout(vreg) s01_s02, in(vreg) w14_15, in(vreg) w9_10, + options(pure, nomem, nostack, preserves_flags) + ); + s01_s02 +} |