4 files changed, 325 insertions, 0 deletions
diff --git a/rust/vendor/aes/src/armv8/decrypt.rs b/rust/vendor/aes/src/armv8/decrypt.rs
new file mode 100644
index 0000000..cb84193
--- /dev/null
+++ b/rust/vendor/aes/src/armv8/decrypt.rs
@@ -0,0 +1,72 @@
+//! AES decryption support.
+
+use crate::{Block, ParBlocks};
+use core::arch::aarch64::*;
+
+/// Perform AES decryption using the given expanded keys.
+#[target_feature(enable = "aes")]
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn decrypt<const N: usize>(expanded_keys: &[uint8x16_t; N], block: &mut Block) {
+    let rounds = N - 1;
+    assert!(rounds == 10 || rounds == 12 || rounds == 14);
+
+    let mut state = vld1q_u8(block.as_ptr());
+
+    for k in expanded_keys.iter().take(rounds - 1) {
+        // AES single round decryption
+        state = vaesdq_u8(state, *k);
+
+        // AES inverse mix columns
+        state = vaesimcq_u8(state);
+    }
+
+    // AES single round decryption
+    state = vaesdq_u8(state, expanded_keys[rounds - 1]);
+
+    // Final add (bitwise XOR)
+    state = veorq_u8(state, expanded_keys[rounds]);
+
+    vst1q_u8(block.as_mut_ptr(), state);
+}
+
+/// Perform parallel AES decryption 8-blocks-at-a-time using the given expanded keys.
+#[target_feature(enable = "aes")]
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn decrypt8<const N: usize>(
+    expanded_keys: &[uint8x16_t; N],
+    blocks: &mut ParBlocks,
+) {
+    let rounds = N - 1;
+    assert!(rounds == 10 || rounds == 12 || rounds == 14);
+
+    let mut state = [
+        vld1q_u8(blocks[0].as_ptr()),
+        vld1q_u8(blocks[1].as_ptr()),
+        vld1q_u8(blocks[2].as_ptr()),
+        vld1q_u8(blocks[3].as_ptr()),
+        vld1q_u8(blocks[4].as_ptr()),
+        vld1q_u8(blocks[5].as_ptr()),
+        vld1q_u8(blocks[6].as_ptr()),
+        vld1q_u8(blocks[7].as_ptr()),
+    ];
+
+    for k in expanded_keys.iter().take(rounds - 1) {
+        for i in 0..8 {
+            // AES single round decryption
+            state[i] = vaesdq_u8(state[i], *k);
+
+            // AES inverse mix columns
+            state[i] = vaesimcq_u8(state[i]);
+        }
+    }
+
+    for i in 0..8 {
+        // AES single round decryption
+        state[i] = vaesdq_u8(state[i], expanded_keys[rounds - 1]);
+
+        // Final add (bitwise XOR)
+        state[i] = veorq_u8(state[i], expanded_keys[rounds]);
+
+        vst1q_u8(blocks[i].as_mut_ptr(), state[i]);
+    }
+}
diff --git a/rust/vendor/aes/src/armv8/encrypt.rs b/rust/vendor/aes/src/armv8/encrypt.rs
new file mode 100644
index 0000000..8464173
--- /dev/null
+++ b/rust/vendor/aes/src/armv8/encrypt.rs
@@ -0,0 +1,72 @@
+//! AES encryption support
+
+use crate::{Block, ParBlocks};
+use core::arch::aarch64::*;
+
+/// Perform AES encryption using the given expanded keys.
+#[target_feature(enable = "aes")]
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn encrypt<const N: usize>(expanded_keys: &[uint8x16_t; N], block: &mut Block) {
+    let rounds = N - 1;
+    assert!(rounds == 10 || rounds == 12 || rounds == 14);
+
+    let mut state = vld1q_u8(block.as_ptr());
+
+    for k in expanded_keys.iter().take(rounds - 1) {
+        // AES single round encryption
+        state = vaeseq_u8(state, *k);
+
+        // AES mix columns
+        state = vaesmcq_u8(state);
+    }
+
+    // AES single round encryption
+    state = vaeseq_u8(state, expanded_keys[rounds - 1]);
+
+    // Final add (bitwise XOR)
+    state = veorq_u8(state, expanded_keys[rounds]);
+
+    vst1q_u8(block.as_mut_ptr(), state);
+}
+
+/// Perform parallel AES encryption 8-blocks-at-a-time using the given expanded keys.
+#[target_feature(enable = "aes")]
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn encrypt8<const N: usize>(
+    expanded_keys: &[uint8x16_t; N],
+    blocks: &mut ParBlocks,
+) {
+    let rounds = N - 1;
+    assert!(rounds == 10 || rounds == 12 || rounds == 14);
+
+    let mut state = [
+        vld1q_u8(blocks[0].as_ptr()),
+        vld1q_u8(blocks[1].as_ptr()),
+        vld1q_u8(blocks[2].as_ptr()),
+        vld1q_u8(blocks[3].as_ptr()),
+        vld1q_u8(blocks[4].as_ptr()),
+        vld1q_u8(blocks[5].as_ptr()),
+        vld1q_u8(blocks[6].as_ptr()),
+        vld1q_u8(blocks[7].as_ptr()),
+    ];
+
+    for k in expanded_keys.iter().take(rounds - 1) {
+        for i in 0..8 {
+            // AES single round encryption
+            state[i] = vaeseq_u8(state[i], *k);
+
+            // AES mix columns
+            state[i] = vaesmcq_u8(state[i]);
+        }
+    }
+
+    for i in 0..8 {
+        // AES single round encryption
+        state[i] = vaeseq_u8(state[i], expanded_keys[rounds - 1]);
+
+        // Final add (bitwise XOR)
+        state[i] = veorq_u8(state[i], expanded_keys[rounds]);
+
+        vst1q_u8(blocks[i].as_mut_ptr(), state[i]);
+    }
+}
diff --git a/rust/vendor/aes/src/armv8/expand.rs b/rust/vendor/aes/src/armv8/expand.rs
new file mode 100644
index 0000000..2e26e39
--- /dev/null
+++ b/rust/vendor/aes/src/armv8/expand.rs
@@ -0,0 +1,77 @@
+//! AES key expansion support.
+
+use core::{arch::aarch64::*, convert::TryInto, mem, slice};
+
+/// There are 4 AES words in a block.
+const BLOCK_WORDS: usize = 4;
+
+/// The AES (nee Rijndael) notion of a word is always 32-bits, or 4-bytes.
+const WORD_SIZE: usize = 4;
+
+/// AES round constants.
+const ROUND_CONSTS: [u32; 10] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36];
+
+/// AES key expansion
+// TODO(tarcieri): big endian support?
+#[inline]
+pub(super) fn expand_key<const L: usize, const N: usize>(key: &[u8; L]) -> [uint8x16_t; N] {
+    assert!((L == 16 && N == 11) || (L == 24 && N == 13) || (L == 32 && N == 15));
+
+    let mut expanded_keys: [uint8x16_t; N] = unsafe { mem::zeroed() };
+
+    // TODO(tarcieri): construct expanded keys using `vreinterpretq_u8_u32`
+    let ek_words = unsafe {
+        slice::from_raw_parts_mut(expanded_keys.as_mut_ptr() as *mut u32, N * BLOCK_WORDS)
+    };
+
+    for (i, chunk) in key.chunks_exact(WORD_SIZE).enumerate() {
+        ek_words[i] = u32::from_ne_bytes(chunk.try_into().unwrap());
+    }
+
+    // From "The Rijndael Block Cipher" Section 4.1:
+    // > The number of columns of the Cipher Key is denoted by `Nk` and is
+    // > equal to the key length divided by 32 [bits].
+    let nk = L / WORD_SIZE;
+
+    for i in nk..(N * BLOCK_WORDS) {
+        let mut word = ek_words[i - 1];
+
+        if i % nk == 0 {
+            word = sub_word(word).rotate_right(8) ^ ROUND_CONSTS[i / nk - 1];
+        } else if nk > 6 && i % nk == 4 {
+            word = sub_word(word)
+        }
+
+        ek_words[i] = ek_words[i - nk] ^ word;
+    }
+
+    expanded_keys
+}
+
+/// Compute inverse expanded keys (for decryption).
+///
+/// This is the reverse of the encryption keys, with the Inverse Mix Columns
+/// operation applied to all but the first and last expanded key.
+#[inline]
+pub(super) fn inv_expanded_keys<const N: usize>(expanded_keys: &mut [uint8x16_t; N]) {
+    assert!(N == 11 || N == 13 || N == 15);
+
+    for ek in expanded_keys.iter_mut().take(N - 1).skip(1) {
+        unsafe { *ek = vaesimcq_u8(*ek) }
+    }
+
+    expanded_keys.reverse();
+}
+
+/// Sub bytes for a single AES word: used for key expansion.
+#[inline(always)]
+fn sub_word(input: u32) -> u32 {
+    unsafe {
+        let input = vreinterpretq_u8_u32(vdupq_n_u32(input));
+
+        // AES single round encryption (with a "round" key of all zeros)
+        let sub_input = vaeseq_u8(input, vdupq_n_u8(0));
+
+        vgetq_lane_u32(vreinterpretq_u32_u8(sub_input), 0)
+    }
+}
diff --git a/rust/vendor/aes/src/armv8/hazmat.rs b/rust/vendor/aes/src/armv8/hazmat.rs
new file mode 100644
index 0000000..022db3a
--- /dev/null
+++ b/rust/vendor/aes/src/armv8/hazmat.rs
@@ -0,0 +1,104 @@
+//! Low-level "hazmat" AES functions: ARMv8 Cryptography Extensions support.
+//!
+//! Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256`
+//! implementations in this crate, but instead provides raw AES-NI accelerated
+//! access to the AES round function gated under the `hazmat` crate feature.
+
+use crate::{Block, ParBlocks};
+use core::arch::aarch64::*;
+
+/// AES cipher (encrypt) round function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn cipher_round(block: &mut Block, round_key: &Block) {
+    let b = vld1q_u8(block.as_ptr());
+    let k = vld1q_u8(round_key.as_ptr());
+
+    // AES single round encryption (all-zero round key, deferred until the end)
+    let mut state = vaeseq_u8(b, vdupq_n_u8(0));
+
+    // AES mix columns (the `vaeseq_u8` instruction otherwise omits this step)
+    state = vaesmcq_u8(state);
+
+    // AES add round key (bitwise XOR)
+    state = veorq_u8(state, k);
+
+    vst1q_u8(block.as_mut_ptr(), state);
+}
+
+/// AES cipher (encrypt) round function: parallel version.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+    for i in 0..8 {
+        let mut state = vld1q_u8(blocks[i].as_ptr());
+
+        // AES single round encryption
+        state = vaeseq_u8(state, vdupq_n_u8(0));
+
+        // AES mix columns
+        state = vaesmcq_u8(state);
+
+        // AES add round key (bitwise XOR)
+        state = veorq_u8(state, vld1q_u8(round_keys[i].as_ptr()));
+
+        vst1q_u8(blocks[i].as_mut_ptr(), state);
+    }
+}
+
+/// AES equivalent inverse cipher (decrypt) round function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) {
+    let b = vld1q_u8(block.as_ptr());
+    let k = vld1q_u8(round_key.as_ptr());
+
+    // AES single round decryption (all-zero round key, deferred until the end)
+    let mut state = vaesdq_u8(b, vdupq_n_u8(0));
+
+    // AES inverse mix columns (the `vaesdq_u8` instruction otherwise omits this step)
+    state = vaesimcq_u8(state);
+
+    // AES add round key (bitwise XOR)
+    state = veorq_u8(state, k);
+
+    vst1q_u8(block.as_mut_ptr(), state);
+}
+
+/// AES equivalent inverse cipher (decrypt) round function: parallel version.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn equiv_inv_cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+    for i in 0..8 {
+        let mut state = vld1q_u8(blocks[i].as_ptr());
+
+        // AES single round decryption (all-zero round key, deferred until the end)
+        state = vaesdq_u8(state, vdupq_n_u8(0));
+
+        // AES inverse mix columns (the `vaesdq_u8` instruction otherwise omits this step)
+        state = vaesimcq_u8(state);
+
+        // AES add round key (bitwise XOR)
+        state = veorq_u8(state, vld1q_u8(round_keys[i].as_ptr()));
+
+        vst1q_u8(blocks[i].as_mut_ptr(), state);
+    }
+}
+
+/// AES mix columns function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn mix_columns(block: &mut Block) {
+    let b = vld1q_u8(block.as_ptr());
+    let out = vaesmcq_u8(b);
+    vst1q_u8(block.as_mut_ptr(), out);
+}
+
+/// AES inverse mix columns function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn inv_mix_columns(block: &mut Block) {
+    let b = vld1q_u8(block.as_ptr());
+    let out = vaesimcq_u8(b);
+    vst1q_u8(block.as_mut_ptr(), out);
+}