diff options
Diffstat (limited to 'rust/vendor/aes/src/armv8')
-rw-r--r-- | rust/vendor/aes/src/armv8/decrypt.rs | 72 | ||||
-rw-r--r-- | rust/vendor/aes/src/armv8/encrypt.rs | 72 | ||||
-rw-r--r-- | rust/vendor/aes/src/armv8/expand.rs | 77 | ||||
-rw-r--r-- | rust/vendor/aes/src/armv8/hazmat.rs | 104 |
4 files changed, 325 insertions, 0 deletions
diff --git a/rust/vendor/aes/src/armv8/decrypt.rs b/rust/vendor/aes/src/armv8/decrypt.rs new file mode 100644 index 0000000..cb84193 --- /dev/null +++ b/rust/vendor/aes/src/armv8/decrypt.rs @@ -0,0 +1,72 @@ +//! AES decryption support. + +use crate::{Block, ParBlocks}; +use core::arch::aarch64::*; + +/// Perform AES decryption using the given expanded keys. +#[target_feature(enable = "aes")] +#[target_feature(enable = "neon")] +pub(super) unsafe fn decrypt<const N: usize>(expanded_keys: &[uint8x16_t; N], block: &mut Block) { + let rounds = N - 1; + assert!(rounds == 10 || rounds == 12 || rounds == 14); + + let mut state = vld1q_u8(block.as_ptr()); + + for k in expanded_keys.iter().take(rounds - 1) { + // AES single round decryption + state = vaesdq_u8(state, *k); + + // AES inverse mix columns + state = vaesimcq_u8(state); + } + + // AES single round decryption + state = vaesdq_u8(state, expanded_keys[rounds - 1]); + + // Final add (bitwise XOR) + state = veorq_u8(state, expanded_keys[rounds]); + + vst1q_u8(block.as_mut_ptr(), state); +} + +/// Perform parallel AES decryption 8-blocks-at-a-time using the given expanded keys. +#[target_feature(enable = "aes")] +#[target_feature(enable = "neon")] +pub(super) unsafe fn decrypt8<const N: usize>( + expanded_keys: &[uint8x16_t; N], + blocks: &mut ParBlocks, +) { + let rounds = N - 1; + assert!(rounds == 10 || rounds == 12 || rounds == 14); + + let mut state = [ + vld1q_u8(blocks[0].as_ptr()), + vld1q_u8(blocks[1].as_ptr()), + vld1q_u8(blocks[2].as_ptr()), + vld1q_u8(blocks[3].as_ptr()), + vld1q_u8(blocks[4].as_ptr()), + vld1q_u8(blocks[5].as_ptr()), + vld1q_u8(blocks[6].as_ptr()), + vld1q_u8(blocks[7].as_ptr()), + ]; + + for k in expanded_keys.iter().take(rounds - 1) { + for i in 0..8 { + // AES single round decryption + state[i] = vaesdq_u8(state[i], *k); + + // AES inverse mix columns + state[i] = vaesimcq_u8(state[i]); + } + } + + for i in 0..8 { + // AES single round decryption + state[i] = vaesdq_u8(state[i], expanded_keys[rounds - 1]); + + // Final add (bitwise XOR) + state[i] = veorq_u8(state[i], expanded_keys[rounds]); + + vst1q_u8(blocks[i].as_mut_ptr(), state[i]); + } +} diff --git a/rust/vendor/aes/src/armv8/encrypt.rs b/rust/vendor/aes/src/armv8/encrypt.rs new file mode 100644 index 0000000..8464173 --- /dev/null +++ b/rust/vendor/aes/src/armv8/encrypt.rs @@ -0,0 +1,72 @@ +//! AES encryption support + +use crate::{Block, ParBlocks}; +use core::arch::aarch64::*; + +/// Perform AES encryption using the given expanded keys. +#[target_feature(enable = "aes")] +#[target_feature(enable = "neon")] +pub(super) unsafe fn encrypt<const N: usize>(expanded_keys: &[uint8x16_t; N], block: &mut Block) { + let rounds = N - 1; + assert!(rounds == 10 || rounds == 12 || rounds == 14); + + let mut state = vld1q_u8(block.as_ptr()); + + for k in expanded_keys.iter().take(rounds - 1) { + // AES single round encryption + state = vaeseq_u8(state, *k); + + // AES mix columns + state = vaesmcq_u8(state); + } + + // AES single round encryption + state = vaeseq_u8(state, expanded_keys[rounds - 1]); + + // Final add (bitwise XOR) + state = veorq_u8(state, expanded_keys[rounds]); + + vst1q_u8(block.as_mut_ptr(), state); +} + +/// Perform parallel AES encryption 8-blocks-at-a-time using the given expanded keys. +#[target_feature(enable = "aes")] +#[target_feature(enable = "neon")] +pub(super) unsafe fn encrypt8<const N: usize>( + expanded_keys: &[uint8x16_t; N], + blocks: &mut ParBlocks, +) { + let rounds = N - 1; + assert!(rounds == 10 || rounds == 12 || rounds == 14); + + let mut state = [ + vld1q_u8(blocks[0].as_ptr()), + vld1q_u8(blocks[1].as_ptr()), + vld1q_u8(blocks[2].as_ptr()), + vld1q_u8(blocks[3].as_ptr()), + vld1q_u8(blocks[4].as_ptr()), + vld1q_u8(blocks[5].as_ptr()), + vld1q_u8(blocks[6].as_ptr()), + vld1q_u8(blocks[7].as_ptr()), + ]; + + for k in expanded_keys.iter().take(rounds - 1) { + for i in 0..8 { + // AES single round encryption + state[i] = vaeseq_u8(state[i], *k); + + // AES mix columns + state[i] = vaesmcq_u8(state[i]); + } + } + + for i in 0..8 { + // AES single round encryption + state[i] = vaeseq_u8(state[i], expanded_keys[rounds - 1]); + + // Final add (bitwise XOR) + state[i] = veorq_u8(state[i], expanded_keys[rounds]); + + vst1q_u8(blocks[i].as_mut_ptr(), state[i]); + } +} diff --git a/rust/vendor/aes/src/armv8/expand.rs b/rust/vendor/aes/src/armv8/expand.rs new file mode 100644 index 0000000..2e26e39 --- /dev/null +++ b/rust/vendor/aes/src/armv8/expand.rs @@ -0,0 +1,77 @@ +//! AES key expansion support. + +use core::{arch::aarch64::*, convert::TryInto, mem, slice}; + +/// There are 4 AES words in a block. +const BLOCK_WORDS: usize = 4; + +/// The AES (nee Rijndael) notion of a word is always 32-bits, or 4-bytes. +const WORD_SIZE: usize = 4; + +/// AES round constants. +const ROUND_CONSTS: [u32; 10] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36]; + +/// AES key expansion +// TODO(tarcieri): big endian support? +#[inline] +pub(super) fn expand_key<const L: usize, const N: usize>(key: &[u8; L]) -> [uint8x16_t; N] { + assert!((L == 16 && N == 11) || (L == 24 && N == 13) || (L == 32 && N == 15)); + + let mut expanded_keys: [uint8x16_t; N] = unsafe { mem::zeroed() }; + + // TODO(tarcieri): construct expanded keys using `vreinterpretq_u8_u32` + let ek_words = unsafe { + slice::from_raw_parts_mut(expanded_keys.as_mut_ptr() as *mut u32, N * BLOCK_WORDS) + }; + + for (i, chunk) in key.chunks_exact(WORD_SIZE).enumerate() { + ek_words[i] = u32::from_ne_bytes(chunk.try_into().unwrap()); + } + + // From "The Rijndael Block Cipher" Section 4.1: + // > The number of columns of the Cipher Key is denoted by `Nk` and is + // > equal to the key length divided by 32 [bits]. + let nk = L / WORD_SIZE; + + for i in nk..(N * BLOCK_WORDS) { + let mut word = ek_words[i - 1]; + + if i % nk == 0 { + word = sub_word(word).rotate_right(8) ^ ROUND_CONSTS[i / nk - 1]; + } else if nk > 6 && i % nk == 4 { + word = sub_word(word) + } + + ek_words[i] = ek_words[i - nk] ^ word; + } + + expanded_keys +} + +/// Compute inverse expanded keys (for decryption). +/// +/// This is the reverse of the encryption keys, with the Inverse Mix Columns +/// operation applied to all but the first and last expanded key. +#[inline] +pub(super) fn inv_expanded_keys<const N: usize>(expanded_keys: &mut [uint8x16_t; N]) { + assert!(N == 11 || N == 13 || N == 15); + + for ek in expanded_keys.iter_mut().take(N - 1).skip(1) { + unsafe { *ek = vaesimcq_u8(*ek) } + } + + expanded_keys.reverse(); +} + +/// Sub bytes for a single AES word: used for key expansion. +#[inline(always)] +fn sub_word(input: u32) -> u32 { + unsafe { + let input = vreinterpretq_u8_u32(vdupq_n_u32(input)); + + // AES single round encryption (with a "round" key of all zeros) + let sub_input = vaeseq_u8(input, vdupq_n_u8(0)); + + vgetq_lane_u32(vreinterpretq_u32_u8(sub_input), 0) + } +} diff --git a/rust/vendor/aes/src/armv8/hazmat.rs b/rust/vendor/aes/src/armv8/hazmat.rs new file mode 100644 index 0000000..022db3a --- /dev/null +++ b/rust/vendor/aes/src/armv8/hazmat.rs @@ -0,0 +1,104 @@ +//! Low-level "hazmat" AES functions: ARMv8 Cryptography Extensions support. +//! +//! Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256` +//! implementations in this crate, but instead provides raw AES-NI accelerated +//! access to the AES round function gated under the `hazmat` crate feature. + +use crate::{Block, ParBlocks}; +use core::arch::aarch64::*; + +/// AES cipher (encrypt) round function. +#[allow(clippy::cast_ptr_alignment)] +#[target_feature(enable = "aes")] +pub(crate) unsafe fn cipher_round(block: &mut Block, round_key: &Block) { + let b = vld1q_u8(block.as_ptr()); + let k = vld1q_u8(round_key.as_ptr()); + + // AES single round encryption (all-zero round key, deferred until the end) + let mut state = vaeseq_u8(b, vdupq_n_u8(0)); + + // AES mix columns (the `vaeseq_u8` instruction otherwise omits this step) + state = vaesmcq_u8(state); + + // AES add round key (bitwise XOR) + state = veorq_u8(state, k); + + vst1q_u8(block.as_mut_ptr(), state); +} + +/// AES cipher (encrypt) round function: parallel version. +#[allow(clippy::cast_ptr_alignment)] +#[target_feature(enable = "aes")] +pub(crate) unsafe fn cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) { + for i in 0..8 { + let mut state = vld1q_u8(blocks[i].as_ptr()); + + // AES single round encryption + state = vaeseq_u8(state, vdupq_n_u8(0)); + + // AES mix columns + state = vaesmcq_u8(state); + + // AES add round key (bitwise XOR) + state = veorq_u8(state, vld1q_u8(round_keys[i].as_ptr())); + + vst1q_u8(blocks[i].as_mut_ptr(), state); + } +} + +/// AES equivalent inverse cipher (decrypt) round function. +#[allow(clippy::cast_ptr_alignment)] +#[target_feature(enable = "aes")] +pub(crate) unsafe fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) { + let b = vld1q_u8(block.as_ptr()); + let k = vld1q_u8(round_key.as_ptr()); + + // AES single round decryption (all-zero round key, deferred until the end) + let mut state = vaesdq_u8(b, vdupq_n_u8(0)); + + // AES inverse mix columns (the `vaesdq_u8` instruction otherwise omits this step) + state = vaesimcq_u8(state); + + // AES add round key (bitwise XOR) + state = veorq_u8(state, k); + + vst1q_u8(block.as_mut_ptr(), state); +} + +/// AES equivalent inverse cipher (decrypt) round function: parallel version. +#[allow(clippy::cast_ptr_alignment)] +#[target_feature(enable = "aes")] +pub(crate) unsafe fn equiv_inv_cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) { + for i in 0..8 { + let mut state = vld1q_u8(blocks[i].as_ptr()); + + // AES single round decryption (all-zero round key, deferred until the end) + state = vaesdq_u8(state, vdupq_n_u8(0)); + + // AES inverse mix columns (the `vaesdq_u8` instruction otherwise omits this step) + state = vaesimcq_u8(state); + + // AES add round key (bitwise XOR) + state = veorq_u8(state, vld1q_u8(round_keys[i].as_ptr())); + + vst1q_u8(blocks[i].as_mut_ptr(), state); + } +} + +/// AES mix columns function. +#[allow(clippy::cast_ptr_alignment)] +#[target_feature(enable = "aes")] +pub(crate) unsafe fn mix_columns(block: &mut Block) { + let b = vld1q_u8(block.as_ptr()); + let out = vaesmcq_u8(b); + vst1q_u8(block.as_mut_ptr(), out); +} + +/// AES inverse mix columns function. +#[allow(clippy::cast_ptr_alignment)] +#[target_feature(enable = "aes")] +pub(crate) unsafe fn inv_mix_columns(block: &mut Block) { + let b = vld1q_u8(block.as_ptr()); + let out = vaesimcq_u8(b); + vst1q_u8(block.as_mut_ptr(), out); +} |