summaryrefslogtreecommitdiffstats
path: root/rust/vendor/aes/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 17:39:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 17:39:49 +0000
commita0aa2307322cd47bbf416810ac0292925e03be87 (patch)
tree37076262a026c4b48c8a0e84f44ff9187556ca35 /rust/vendor/aes/src
parentInitial commit. (diff)
downloadsuricata-a0aa2307322cd47bbf416810ac0292925e03be87.tar.xz
suricata-a0aa2307322cd47bbf416810ac0292925e03be87.zip
Adding upstream version 1:7.0.3.upstream/1%7.0.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'rust/vendor/aes/src')
-rw-r--r--rust/vendor/aes/src/armv8.rs376
-rw-r--r--rust/vendor/aes/src/armv8/decrypt.rs72
-rw-r--r--rust/vendor/aes/src/armv8/encrypt.rs72
-rw-r--r--rust/vendor/aes/src/armv8/expand.rs77
-rw-r--r--rust/vendor/aes/src/armv8/hazmat.rs104
-rw-r--r--rust/vendor/aes/src/autodetect.rs259
-rw-r--r--rust/vendor/aes/src/hazmat.rs166
-rw-r--r--rust/vendor/aes/src/lib.rs138
-rw-r--r--rust/vendor/aes/src/ni.rs45
-rw-r--r--rust/vendor/aes/src/ni/aes128.rs163
-rw-r--r--rust/vendor/aes/src/ni/aes128/expand.rs53
-rw-r--r--rust/vendor/aes/src/ni/aes128/test_expand.rs107
-rw-r--r--rust/vendor/aes/src/ni/aes192.rs169
-rw-r--r--rust/vendor/aes/src/ni/aes192/expand.rs108
-rw-r--r--rust/vendor/aes/src/ni/aes192/test_expand.rs93
-rw-r--r--rust/vendor/aes/src/ni/aes256.rs177
-rw-r--r--rust/vendor/aes/src/ni/aes256/expand.rs89
-rw-r--r--rust/vendor/aes/src/ni/aes256/test_expand.rs103
-rw-r--r--rust/vendor/aes/src/ni/ctr.rs229
-rw-r--r--rust/vendor/aes/src/ni/hazmat.rs86
-rw-r--r--rust/vendor/aes/src/ni/utils.rs90
-rw-r--r--rust/vendor/aes/src/soft.rs127
-rw-r--r--rust/vendor/aes/src/soft/ctr.rs17
-rw-r--r--rust/vendor/aes/src/soft/fixslice32.rs1485
-rw-r--r--rust/vendor/aes/src/soft/fixslice64.rs1540
25 files changed, 5945 insertions, 0 deletions
diff --git a/rust/vendor/aes/src/armv8.rs b/rust/vendor/aes/src/armv8.rs
new file mode 100644
index 0000000..187ac1b
--- /dev/null
+++ b/rust/vendor/aes/src/armv8.rs
@@ -0,0 +1,376 @@
+//! AES block cipher implementation using the ARMv8 Cryptography Extensions.
+//!
+//! Based on this C intrinsics implementation:
+//! <https://github.com/noloader/AES-Intrinsics/blob/master/aes-arm.c>
+//!
+//! Original C written and placed in public domain by Jeffrey Walton.
+//! Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and
+//! Barry O'Rourke for the mbedTLS project.
+
+#![allow(clippy::needless_range_loop)]
+
+#[cfg(feature = "hazmat")]
+pub(crate) mod hazmat;
+
+mod decrypt;
+mod encrypt;
+mod expand;
+
+use self::{
+ decrypt::{decrypt, decrypt8},
+ encrypt::{encrypt, encrypt8},
+ expand::{expand_key, inv_expanded_keys},
+};
+use crate::{Block, ParBlocks};
+use cipher::{
+ consts::{U16, U24, U32, U8},
+ generic_array::GenericArray,
+ BlockCipher, BlockDecrypt, BlockEncrypt, NewBlockCipher,
+};
+use core::arch::aarch64::*;
+
+macro_rules! define_aes_impl {
+ (
+ $name:ident,
+ $name_enc:ident,
+ $name_dec:ident,
+ $key_size:ty,
+ $rounds:tt,
+ $doc:expr
+ ) => {
+ #[doc=$doc]
+ #[doc = "block cipher"]
+ #[derive(Clone)]
+ pub struct $name {
+ encrypt: $name_enc,
+ decrypt: $name_dec,
+ }
+
+ impl NewBlockCipher for $name {
+ type KeySize = $key_size;
+
+ #[inline]
+ fn new(key: &GenericArray<u8, $key_size>) -> Self {
+ let encrypt = $name_enc::new(key);
+ let decrypt = $name_dec::from(&encrypt);
+ Self { encrypt, decrypt }
+ }
+ }
+
+ impl BlockCipher for $name {
+ type BlockSize = U16;
+ type ParBlocks = U8;
+ }
+
+ impl BlockEncrypt for $name {
+ #[inline]
+ fn encrypt_block(&self, block: &mut Block) {
+ self.encrypt.encrypt_block(block)
+ }
+
+ #[inline]
+ fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ self.encrypt.encrypt_par_blocks(blocks)
+ }
+ }
+
+ impl BlockDecrypt for $name {
+ #[inline]
+ fn decrypt_block(&self, block: &mut Block) {
+ self.decrypt.decrypt_block(block)
+ }
+
+ #[inline]
+ fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ self.decrypt.decrypt_par_blocks(blocks)
+ }
+ }
+
+ #[doc=$doc]
+ #[doc = "block cipher (encrypt-only)"]
+ #[derive(Clone)]
+ pub struct $name_enc {
+ round_keys: [uint8x16_t; $rounds],
+ }
+
+ impl NewBlockCipher for $name_enc {
+ type KeySize = $key_size;
+
+ fn new(key: &GenericArray<u8, $key_size>) -> Self {
+ Self {
+ round_keys: expand_key(key.as_ref()),
+ }
+ }
+ }
+
+ impl BlockCipher for $name_enc {
+ type BlockSize = U16;
+ type ParBlocks = U8;
+ }
+
+ impl BlockEncrypt for $name_enc {
+ fn encrypt_block(&self, block: &mut Block) {
+ unsafe { encrypt(&self.round_keys, block) }
+ }
+
+ fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ unsafe { encrypt8(&self.round_keys, blocks) }
+ }
+ }
+
+ #[doc=$doc]
+ #[doc = "block cipher (decrypt-only)"]
+ #[derive(Clone)]
+ pub struct $name_dec {
+ round_keys: [uint8x16_t; $rounds],
+ }
+
+ impl NewBlockCipher for $name_dec {
+ type KeySize = $key_size;
+
+ fn new(key: &GenericArray<u8, $key_size>) -> Self {
+ $name_enc::new(key).into()
+ }
+ }
+
+ impl From<$name_enc> for $name_dec {
+ fn from(enc: $name_enc) -> $name_dec {
+ Self::from(&enc)
+ }
+ }
+
+ impl From<&$name_enc> for $name_dec {
+ fn from(enc: &$name_enc) -> $name_dec {
+ let mut round_keys = enc.round_keys;
+ inv_expanded_keys(&mut round_keys);
+ Self { round_keys }
+ }
+ }
+
+ impl BlockCipher for $name_dec {
+ type BlockSize = U16;
+ type ParBlocks = U8;
+ }
+
+ impl BlockDecrypt for $name_dec {
+ fn decrypt_block(&self, block: &mut Block) {
+ unsafe { decrypt(&self.round_keys, block) }
+ }
+
+ fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ unsafe { decrypt8(&self.round_keys, blocks) }
+ }
+ }
+
+ opaque_debug::implement!($name);
+ opaque_debug::implement!($name_enc);
+ opaque_debug::implement!($name_dec);
+ };
+}
+
+define_aes_impl!(Aes128, Aes128Enc, Aes128Dec, U16, 11, "AES-128");
+define_aes_impl!(Aes192, Aes192Enc, Aes192Dec, U24, 13, "AES-192");
+define_aes_impl!(Aes256, Aes256Enc, Aes256Dec, U32, 15, "AES-256");
+
+#[cfg(test)]
+mod tests {
+ use super::{decrypt, decrypt8, encrypt, encrypt8, expand_key, inv_expanded_keys, ParBlocks};
+ use core::{arch::aarch64::*, convert::TryInto};
+ use hex_literal::hex;
+
+ /// FIPS 197, Appendix A.1: AES-128 Cipher Key
+ /// user input, unaligned buffer
+ const AES128_KEY: [u8; 16] = hex!("2b7e151628aed2a6abf7158809cf4f3c");
+
+ /// FIPS 197 Appendix A.1: Expansion of a 128-bit Cipher Key
+ /// library controlled, aligned buffer
+ const AES128_EXP_KEYS: [[u8; 16]; 11] = [
+ AES128_KEY,
+ hex!("a0fafe1788542cb123a339392a6c7605"),
+ hex!("f2c295f27a96b9435935807a7359f67f"),
+ hex!("3d80477d4716fe3e1e237e446d7a883b"),
+ hex!("ef44a541a8525b7fb671253bdb0bad00"),
+ hex!("d4d1c6f87c839d87caf2b8bc11f915bc"),
+ hex!("6d88a37a110b3efddbf98641ca0093fd"),
+ hex!("4e54f70e5f5fc9f384a64fb24ea6dc4f"),
+ hex!("ead27321b58dbad2312bf5607f8d292f"),
+ hex!("ac7766f319fadc2128d12941575c006e"),
+ hex!("d014f9a8c9ee2589e13f0cc8b6630ca6"),
+ ];
+
+ /// Inverse expanded keys for [`AES128_EXPANDED_KEYS`]
+ const AES128_EXP_INVKEYS: [[u8; 16]; 11] = [
+ hex!("d014f9a8c9ee2589e13f0cc8b6630ca6"),
+ hex!("0c7b5a631319eafeb0398890664cfbb4"),
+ hex!("df7d925a1f62b09da320626ed6757324"),
+ hex!("12c07647c01f22c7bc42d2f37555114a"),
+ hex!("6efcd876d2df54807c5df034c917c3b9"),
+ hex!("6ea30afcbc238cf6ae82a4b4b54a338d"),
+ hex!("90884413d280860a12a128421bc89739"),
+ hex!("7c1f13f74208c219c021ae480969bf7b"),
+ hex!("cc7505eb3e17d1ee82296c51c9481133"),
+ hex!("2b3708a7f262d405bc3ebdbf4b617d62"),
+ AES128_KEY,
+ ];
+
+ /// FIPS 197, Appendix A.2: AES-192 Cipher Key
+ /// user input, unaligned buffer
+ const AES192_KEY: [u8; 24] = hex!("8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b");
+
+ /// FIPS 197 Appendix A.2: Expansion of a 192-bit Cipher Key
+ /// library controlled, aligned buffer
+ const AES192_EXP_KEYS: [[u8; 16]; 13] = [
+ hex!("8e73b0f7da0e6452c810f32b809079e5"),
+ hex!("62f8ead2522c6b7bfe0c91f72402f5a5"),
+ hex!("ec12068e6c827f6b0e7a95b95c56fec2"),
+ hex!("4db7b4bd69b5411885a74796e92538fd"),
+ hex!("e75fad44bb095386485af05721efb14f"),
+ hex!("a448f6d94d6dce24aa326360113b30e6"),
+ hex!("a25e7ed583b1cf9a27f939436a94f767"),
+ hex!("c0a69407d19da4e1ec1786eb6fa64971"),
+ hex!("485f703222cb8755e26d135233f0b7b3"),
+ hex!("40beeb282f18a2596747d26b458c553e"),
+ hex!("a7e1466c9411f1df821f750aad07d753"),
+ hex!("ca4005388fcc5006282d166abc3ce7b5"),
+ hex!("e98ba06f448c773c8ecc720401002202"),
+ ];
+
+ /// FIPS 197, Appendix A.3: AES-256 Cipher Key
+ /// user input, unaligned buffer
+ const AES256_KEY: [u8; 32] =
+ hex!("603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4");
+
+ /// FIPS 197 Appendix A.3: Expansion of a 256-bit Cipher Key
+ /// library controlled, aligned buffer
+ const AES256_EXP_KEYS: [[u8; 16]; 15] = [
+ hex!("603deb1015ca71be2b73aef0857d7781"),
+ hex!("1f352c073b6108d72d9810a30914dff4"),
+ hex!("9ba354118e6925afa51a8b5f2067fcde"),
+ hex!("a8b09c1a93d194cdbe49846eb75d5b9a"),
+ hex!("d59aecb85bf3c917fee94248de8ebe96"),
+ hex!("b5a9328a2678a647983122292f6c79b3"),
+ hex!("812c81addadf48ba24360af2fab8b464"),
+ hex!("98c5bfc9bebd198e268c3ba709e04214"),
+ hex!("68007bacb2df331696e939e46c518d80"),
+ hex!("c814e20476a9fb8a5025c02d59c58239"),
+ hex!("de1369676ccc5a71fa2563959674ee15"),
+ hex!("5886ca5d2e2f31d77e0af1fa27cf73c3"),
+ hex!("749c47ab18501ddae2757e4f7401905a"),
+ hex!("cafaaae3e4d59b349adf6acebd10190d"),
+ hex!("fe4890d1e6188d0b046df344706c631e"),
+ ];
+
+ /// FIPS 197, Appendix B input
+ /// user input, unaligned buffer
+ const INPUT: [u8; 16] = hex!("3243f6a8885a308d313198a2e0370734");
+
+ /// FIPS 197, Appendix B output
+ const EXPECTED: [u8; 16] = hex!("3925841d02dc09fbdc118597196a0b32");
+
+ fn load_expanded_keys<const N: usize>(input: [[u8; 16]; N]) -> [uint8x16_t; N] {
+ let mut output = [unsafe { vdupq_n_u8(0) }; N];
+
+ for (src, dst) in input.iter().zip(output.iter_mut()) {
+ *dst = unsafe { vld1q_u8(src.as_ptr()) }
+ }
+
+ output
+ }
+
+ fn store_expanded_keys<const N: usize>(input: [uint8x16_t; N]) -> [[u8; 16]; N] {
+ let mut output = [[0u8; 16]; N];
+
+ for (src, dst) in input.iter().zip(output.iter_mut()) {
+ unsafe { vst1q_u8(dst.as_mut_ptr(), *src) }
+ }
+
+ output
+ }
+
+ #[test]
+ fn aes128_key_expansion() {
+ let ek = expand_key(&AES128_KEY);
+ assert_eq!(store_expanded_keys(ek), AES128_EXP_KEYS);
+ }
+
+ #[test]
+ fn aes128_key_expansion_inv() {
+ let mut ek = load_expanded_keys(AES128_EXP_KEYS);
+ inv_expanded_keys(&mut ek);
+ assert_eq!(store_expanded_keys(ek), AES128_EXP_INVKEYS);
+ }
+
+ #[test]
+ fn aes192_key_expansion() {
+ let ek = expand_key(&AES192_KEY);
+ assert_eq!(store_expanded_keys(ek), AES192_EXP_KEYS);
+ }
+
+ #[test]
+ fn aes256_key_expansion() {
+ let ek = expand_key(&AES256_KEY);
+ assert_eq!(store_expanded_keys(ek), AES256_EXP_KEYS);
+ }
+
+ #[test]
+ fn aes128_encrypt() {
+ // Intentionally misaligned block
+ let mut block = [0u8; 19];
+ block[3..].copy_from_slice(&INPUT);
+
+ unsafe {
+ encrypt(
+ &load_expanded_keys(AES128_EXP_KEYS),
+ (&mut block[3..]).try_into().unwrap(),
+ )
+ };
+
+ assert_eq!(&block[3..], &EXPECTED);
+ }
+
+ #[test]
+ fn aes128_encrypt8() {
+ let mut blocks = ParBlocks::default();
+
+ for block in &mut blocks {
+ block.copy_from_slice(&INPUT);
+ }
+
+ unsafe { encrypt8(&load_expanded_keys(AES128_EXP_KEYS), &mut blocks) };
+
+ for block in &blocks {
+ assert_eq!(block.as_slice(), &EXPECTED);
+ }
+ }
+
+ #[test]
+ fn aes128_decrypt() {
+ // Intentionally misaligned block
+ let mut block = [0u8; 19];
+ block[3..].copy_from_slice(&EXPECTED);
+
+ unsafe {
+ decrypt(
+ &load_expanded_keys(AES128_EXP_INVKEYS),
+ (&mut block[3..]).try_into().unwrap(),
+ )
+ };
+
+ assert_eq!(&block[3..], &INPUT);
+ }
+
+ #[test]
+ fn aes128_decrypt8() {
+ let mut blocks = ParBlocks::default();
+
+ for block in &mut blocks {
+ block.copy_from_slice(&EXPECTED);
+ }
+
+ unsafe { decrypt8(&load_expanded_keys(AES128_EXP_INVKEYS), &mut blocks) };
+
+ for block in &blocks {
+ assert_eq!(block.as_slice(), &INPUT);
+ }
+ }
+}
diff --git a/rust/vendor/aes/src/armv8/decrypt.rs b/rust/vendor/aes/src/armv8/decrypt.rs
new file mode 100644
index 0000000..cb84193
--- /dev/null
+++ b/rust/vendor/aes/src/armv8/decrypt.rs
@@ -0,0 +1,72 @@
+//! AES decryption support.
+
+use crate::{Block, ParBlocks};
+use core::arch::aarch64::*;
+
+/// Perform AES decryption using the given expanded keys.
+#[target_feature(enable = "aes")]
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn decrypt<const N: usize>(expanded_keys: &[uint8x16_t; N], block: &mut Block) {
+ let rounds = N - 1;
+ assert!(rounds == 10 || rounds == 12 || rounds == 14);
+
+ let mut state = vld1q_u8(block.as_ptr());
+
+ for k in expanded_keys.iter().take(rounds - 1) {
+ // AES single round decryption
+ state = vaesdq_u8(state, *k);
+
+ // AES inverse mix columns
+ state = vaesimcq_u8(state);
+ }
+
+ // AES single round decryption
+ state = vaesdq_u8(state, expanded_keys[rounds - 1]);
+
+ // Final add (bitwise XOR)
+ state = veorq_u8(state, expanded_keys[rounds]);
+
+ vst1q_u8(block.as_mut_ptr(), state);
+}
+
+/// Perform parallel AES decryption 8-blocks-at-a-time using the given expanded keys.
+#[target_feature(enable = "aes")]
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn decrypt8<const N: usize>(
+ expanded_keys: &[uint8x16_t; N],
+ blocks: &mut ParBlocks,
+) {
+ let rounds = N - 1;
+ assert!(rounds == 10 || rounds == 12 || rounds == 14);
+
+ let mut state = [
+ vld1q_u8(blocks[0].as_ptr()),
+ vld1q_u8(blocks[1].as_ptr()),
+ vld1q_u8(blocks[2].as_ptr()),
+ vld1q_u8(blocks[3].as_ptr()),
+ vld1q_u8(blocks[4].as_ptr()),
+ vld1q_u8(blocks[5].as_ptr()),
+ vld1q_u8(blocks[6].as_ptr()),
+ vld1q_u8(blocks[7].as_ptr()),
+ ];
+
+ for k in expanded_keys.iter().take(rounds - 1) {
+ for i in 0..8 {
+ // AES single round decryption
+ state[i] = vaesdq_u8(state[i], *k);
+
+ // AES inverse mix columns
+ state[i] = vaesimcq_u8(state[i]);
+ }
+ }
+
+ for i in 0..8 {
+ // AES single round decryption
+ state[i] = vaesdq_u8(state[i], expanded_keys[rounds - 1]);
+
+ // Final add (bitwise XOR)
+ state[i] = veorq_u8(state[i], expanded_keys[rounds]);
+
+ vst1q_u8(blocks[i].as_mut_ptr(), state[i]);
+ }
+}
diff --git a/rust/vendor/aes/src/armv8/encrypt.rs b/rust/vendor/aes/src/armv8/encrypt.rs
new file mode 100644
index 0000000..8464173
--- /dev/null
+++ b/rust/vendor/aes/src/armv8/encrypt.rs
@@ -0,0 +1,72 @@
+//! AES encryption support
+
+use crate::{Block, ParBlocks};
+use core::arch::aarch64::*;
+
+/// Perform AES encryption using the given expanded keys.
+#[target_feature(enable = "aes")]
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn encrypt<const N: usize>(expanded_keys: &[uint8x16_t; N], block: &mut Block) {
+ let rounds = N - 1;
+ assert!(rounds == 10 || rounds == 12 || rounds == 14);
+
+ let mut state = vld1q_u8(block.as_ptr());
+
+ for k in expanded_keys.iter().take(rounds - 1) {
+ // AES single round encryption
+ state = vaeseq_u8(state, *k);
+
+ // AES mix columns
+ state = vaesmcq_u8(state);
+ }
+
+ // AES single round encryption
+ state = vaeseq_u8(state, expanded_keys[rounds - 1]);
+
+ // Final add (bitwise XOR)
+ state = veorq_u8(state, expanded_keys[rounds]);
+
+ vst1q_u8(block.as_mut_ptr(), state);
+}
+
+/// Perform parallel AES encryption 8-blocks-at-a-time using the given expanded keys.
+#[target_feature(enable = "aes")]
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn encrypt8<const N: usize>(
+ expanded_keys: &[uint8x16_t; N],
+ blocks: &mut ParBlocks,
+) {
+ let rounds = N - 1;
+ assert!(rounds == 10 || rounds == 12 || rounds == 14);
+
+ let mut state = [
+ vld1q_u8(blocks[0].as_ptr()),
+ vld1q_u8(blocks[1].as_ptr()),
+ vld1q_u8(blocks[2].as_ptr()),
+ vld1q_u8(blocks[3].as_ptr()),
+ vld1q_u8(blocks[4].as_ptr()),
+ vld1q_u8(blocks[5].as_ptr()),
+ vld1q_u8(blocks[6].as_ptr()),
+ vld1q_u8(blocks[7].as_ptr()),
+ ];
+
+ for k in expanded_keys.iter().take(rounds - 1) {
+ for i in 0..8 {
+ // AES single round encryption
+ state[i] = vaeseq_u8(state[i], *k);
+
+ // AES mix columns
+ state[i] = vaesmcq_u8(state[i]);
+ }
+ }
+
+ for i in 0..8 {
+ // AES single round encryption
+ state[i] = vaeseq_u8(state[i], expanded_keys[rounds - 1]);
+
+ // Final add (bitwise XOR)
+ state[i] = veorq_u8(state[i], expanded_keys[rounds]);
+
+ vst1q_u8(blocks[i].as_mut_ptr(), state[i]);
+ }
+}
diff --git a/rust/vendor/aes/src/armv8/expand.rs b/rust/vendor/aes/src/armv8/expand.rs
new file mode 100644
index 0000000..2e26e39
--- /dev/null
+++ b/rust/vendor/aes/src/armv8/expand.rs
@@ -0,0 +1,77 @@
+//! AES key expansion support.
+
+use core::{arch::aarch64::*, convert::TryInto, mem, slice};
+
+/// There are 4 AES words in a block.
+const BLOCK_WORDS: usize = 4;
+
+/// The AES (nee Rijndael) notion of a word is always 32-bits, or 4-bytes.
+const WORD_SIZE: usize = 4;
+
+/// AES round constants.
+const ROUND_CONSTS: [u32; 10] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36];
+
+/// AES key expansion
+// TODO(tarcieri): big endian support?
+#[inline]
+pub(super) fn expand_key<const L: usize, const N: usize>(key: &[u8; L]) -> [uint8x16_t; N] {
+ assert!((L == 16 && N == 11) || (L == 24 && N == 13) || (L == 32 && N == 15));
+
+ let mut expanded_keys: [uint8x16_t; N] = unsafe { mem::zeroed() };
+
+ // TODO(tarcieri): construct expanded keys using `vreinterpretq_u8_u32`
+ let ek_words = unsafe {
+ slice::from_raw_parts_mut(expanded_keys.as_mut_ptr() as *mut u32, N * BLOCK_WORDS)
+ };
+
+ for (i, chunk) in key.chunks_exact(WORD_SIZE).enumerate() {
+ ek_words[i] = u32::from_ne_bytes(chunk.try_into().unwrap());
+ }
+
+ // From "The Rijndael Block Cipher" Section 4.1:
+ // > The number of columns of the Cipher Key is denoted by `Nk` and is
+ // > equal to the key length divided by 32 [bits].
+ let nk = L / WORD_SIZE;
+
+ for i in nk..(N * BLOCK_WORDS) {
+ let mut word = ek_words[i - 1];
+
+ if i % nk == 0 {
+ word = sub_word(word).rotate_right(8) ^ ROUND_CONSTS[i / nk - 1];
+ } else if nk > 6 && i % nk == 4 {
+ word = sub_word(word)
+ }
+
+ ek_words[i] = ek_words[i - nk] ^ word;
+ }
+
+ expanded_keys
+}
+
+/// Compute inverse expanded keys (for decryption).
+///
+/// This is the reverse of the encryption keys, with the Inverse Mix Columns
+/// operation applied to all but the first and last expanded key.
+#[inline]
+pub(super) fn inv_expanded_keys<const N: usize>(expanded_keys: &mut [uint8x16_t; N]) {
+ assert!(N == 11 || N == 13 || N == 15);
+
+ for ek in expanded_keys.iter_mut().take(N - 1).skip(1) {
+ unsafe { *ek = vaesimcq_u8(*ek) }
+ }
+
+ expanded_keys.reverse();
+}
+
+/// Sub bytes for a single AES word: used for key expansion.
+#[inline(always)]
+fn sub_word(input: u32) -> u32 {
+ unsafe {
+ let input = vreinterpretq_u8_u32(vdupq_n_u32(input));
+
+ // AES single round encryption (with a "round" key of all zeros)
+ let sub_input = vaeseq_u8(input, vdupq_n_u8(0));
+
+ vgetq_lane_u32(vreinterpretq_u32_u8(sub_input), 0)
+ }
+}
diff --git a/rust/vendor/aes/src/armv8/hazmat.rs b/rust/vendor/aes/src/armv8/hazmat.rs
new file mode 100644
index 0000000..022db3a
--- /dev/null
+++ b/rust/vendor/aes/src/armv8/hazmat.rs
@@ -0,0 +1,104 @@
+//! Low-level "hazmat" AES functions: ARMv8 Cryptography Extensions support.
+//!
+//! Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256`
+//! implementations in this crate, but instead provides raw AES-NI accelerated
+//! access to the AES round function gated under the `hazmat` crate feature.
+
+use crate::{Block, ParBlocks};
+use core::arch::aarch64::*;
+
+/// AES cipher (encrypt) round function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn cipher_round(block: &mut Block, round_key: &Block) {
+ let b = vld1q_u8(block.as_ptr());
+ let k = vld1q_u8(round_key.as_ptr());
+
+ // AES single round encryption (all-zero round key, deferred until the end)
+ let mut state = vaeseq_u8(b, vdupq_n_u8(0));
+
+ // AES mix columns (the `vaeseq_u8` instruction otherwise omits this step)
+ state = vaesmcq_u8(state);
+
+ // AES add round key (bitwise XOR)
+ state = veorq_u8(state, k);
+
+ vst1q_u8(block.as_mut_ptr(), state);
+}
+
+/// AES cipher (encrypt) round function: parallel version.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ for i in 0..8 {
+ let mut state = vld1q_u8(blocks[i].as_ptr());
+
+ // AES single round encryption
+ state = vaeseq_u8(state, vdupq_n_u8(0));
+
+ // AES mix columns
+ state = vaesmcq_u8(state);
+
+ // AES add round key (bitwise XOR)
+ state = veorq_u8(state, vld1q_u8(round_keys[i].as_ptr()));
+
+ vst1q_u8(blocks[i].as_mut_ptr(), state);
+ }
+}
+
+/// AES equivalent inverse cipher (decrypt) round function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) {
+ let b = vld1q_u8(block.as_ptr());
+ let k = vld1q_u8(round_key.as_ptr());
+
+ // AES single round decryption (all-zero round key, deferred until the end)
+ let mut state = vaesdq_u8(b, vdupq_n_u8(0));
+
+ // AES inverse mix columns (the `vaesdq_u8` instruction otherwise omits this step)
+ state = vaesimcq_u8(state);
+
+ // AES add round key (bitwise XOR)
+ state = veorq_u8(state, k);
+
+ vst1q_u8(block.as_mut_ptr(), state);
+}
+
+/// AES equivalent inverse cipher (decrypt) round function: parallel version.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn equiv_inv_cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ for i in 0..8 {
+ let mut state = vld1q_u8(blocks[i].as_ptr());
+
+ // AES single round decryption (all-zero round key, deferred until the end)
+ state = vaesdq_u8(state, vdupq_n_u8(0));
+
+ // AES inverse mix columns (the `vaesdq_u8` instruction otherwise omits this step)
+ state = vaesimcq_u8(state);
+
+ // AES add round key (bitwise XOR)
+ state = veorq_u8(state, vld1q_u8(round_keys[i].as_ptr()));
+
+ vst1q_u8(blocks[i].as_mut_ptr(), state);
+ }
+}
+
+/// AES mix columns function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn mix_columns(block: &mut Block) {
+ let b = vld1q_u8(block.as_ptr());
+ let out = vaesmcq_u8(b);
+ vst1q_u8(block.as_mut_ptr(), out);
+}
+
+/// AES inverse mix columns function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn inv_mix_columns(block: &mut Block) {
+ let b = vld1q_u8(block.as_ptr());
+ let out = vaesimcq_u8(b);
+ vst1q_u8(block.as_mut_ptr(), out);
+}
diff --git a/rust/vendor/aes/src/autodetect.rs b/rust/vendor/aes/src/autodetect.rs
new file mode 100644
index 0000000..dbbdeab
--- /dev/null
+++ b/rust/vendor/aes/src/autodetect.rs
@@ -0,0 +1,259 @@
+//! Autodetection support for hardware accelerated AES backends with fallback
+//! to the fixsliced "soft" implementation.
+
+use crate::{soft, Block, ParBlocks};
+use cipher::{
+ consts::{U16, U24, U32, U8},
+ generic_array::GenericArray,
+ BlockCipher, BlockDecrypt, BlockEncrypt, NewBlockCipher,
+};
+use core::mem::ManuallyDrop;
+
+#[cfg(all(target_arch = "aarch64", feature = "armv8"))]
+use crate::armv8 as intrinsics;
+
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+use crate::ni as intrinsics;
+
+cpufeatures::new!(aes_intrinsics, "aes");
+
+macro_rules! define_aes_impl {
+ (
+ $name:tt,
+ $module:tt,
+ $key_size:ty,
+ $doc:expr
+ ) => {
+ #[doc=$doc]
+ pub struct $name {
+ inner: $module::Inner,
+ token: aes_intrinsics::InitToken,
+ }
+
+ mod $module {
+ use super::{intrinsics, soft};
+ use core::mem::ManuallyDrop;
+
+ pub(super) union Inner {
+ pub(super) intrinsics: ManuallyDrop<intrinsics::$name>,
+ pub(super) soft: ManuallyDrop<soft::$name>,
+ }
+ }
+
+ impl NewBlockCipher for $name {
+ type KeySize = $key_size;
+
+ #[inline]
+ fn new(key: &GenericArray<u8, $key_size>) -> Self {
+ let (token, aesni_present) = aes_intrinsics::init_get();
+
+ let inner = if aesni_present {
+ $module::Inner {
+ intrinsics: ManuallyDrop::new(intrinsics::$name::new(key)),
+ }
+ } else {
+ $module::Inner {
+ soft: ManuallyDrop::new(soft::$name::new(key)),
+ }
+ };
+
+ Self { inner, token }
+ }
+ }
+
+ impl Clone for $name {
+ fn clone(&self) -> Self {
+ let inner = if self.token.get() {
+ $module::Inner {
+ intrinsics: unsafe { self.inner.intrinsics.clone() },
+ }
+ } else {
+ $module::Inner {
+ soft: unsafe { self.inner.soft.clone() },
+ }
+ };
+
+ Self {
+ inner,
+ token: self.token,
+ }
+ }
+ }
+
+ impl BlockCipher for $name {
+ type BlockSize = U16;
+ type ParBlocks = U8;
+ }
+
+ impl BlockEncrypt for $name {
+ #[inline]
+ fn encrypt_block(&self, block: &mut Block) {
+ if self.token.get() {
+ unsafe { self.inner.intrinsics.encrypt_block(block) }
+ } else {
+ unsafe { self.inner.soft.encrypt_block(block) }
+ }
+ }
+
+ #[inline]
+ fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ if self.token.get() {
+ unsafe { self.inner.intrinsics.encrypt_par_blocks(blocks) }
+ } else {
+ unsafe { self.inner.soft.encrypt_par_blocks(blocks) }
+ }
+ }
+ }
+
+ impl BlockDecrypt for $name {
+ #[inline]
+ fn decrypt_block(&self, block: &mut Block) {
+ if self.token.get() {
+ unsafe { self.inner.intrinsics.decrypt_block(block) }
+ } else {
+ unsafe { self.inner.soft.decrypt_block(block) }
+ }
+ }
+
+ #[inline]
+ fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ if self.token.get() {
+ unsafe { self.inner.intrinsics.decrypt_par_blocks(blocks) }
+ } else {
+ unsafe { self.inner.soft.decrypt_par_blocks(blocks) }
+ }
+ }
+ }
+
+ opaque_debug::implement!($name);
+ };
+}
+
+define_aes_impl!(Aes128, aes128, U16, "AES-128 block cipher instance");
+define_aes_impl!(Aes192, aes192, U24, "AES-192 block cipher instance");
+define_aes_impl!(Aes256, aes256, U32, "AES-256 block cipher instance");
+
+#[cfg(all(feature = "ctr", target_arch = "aarch64"))]
+pub(crate) mod ctr {
+ use super::{Aes128, Aes192, Aes256};
+
+ /// AES-128 in CTR mode
+ pub type Aes128Ctr = ::ctr::Ctr64BE<Aes128>;
+
+ /// AES-192 in CTR mode
+ pub type Aes192Ctr = ::ctr::Ctr64BE<Aes192>;
+
+ /// AES-256 in CTR mode
+ pub type Aes256Ctr = ::ctr::Ctr64BE<Aes256>;
+}
+
+#[cfg(all(feature = "ctr", any(target_arch = "x86_64", target_arch = "x86")))]
+pub(crate) mod ctr {
+ use super::{Aes128, Aes192, Aes256};
+ use crate::{ni, soft};
+ use cipher::{
+ errors::{LoopError, OverflowError},
+ generic_array::GenericArray,
+ BlockCipher, FromBlockCipher, SeekNum, StreamCipher, StreamCipherSeek,
+ };
+ use core::mem::ManuallyDrop;
+
+ cpufeatures::new!(aes_ssse3_cpuid, "aes", "ssse3");
+
+ macro_rules! define_aes_ctr_impl {
+ (
+ $name:tt,
+ $cipher:ident,
+ $module:tt,
+ $doc:expr
+ ) => {
+ #[doc=$doc]
+ #[cfg_attr(docsrs, doc(cfg(feature = "ctr")))]
+ pub struct $name {
+ inner: $module::Inner,
+ token: aes_ssse3_cpuid::InitToken,
+ }
+
+ mod $module {
+ use crate::{ni, soft};
+ use core::mem::ManuallyDrop;
+
+ pub(super) union Inner {
+ pub(super) ni: ManuallyDrop<ni::$name>,
+ pub(super) soft: ManuallyDrop<soft::$name>,
+ }
+ }
+
+ impl FromBlockCipher for $name {
+ type BlockCipher = $cipher;
+ type NonceSize = <$cipher as BlockCipher>::BlockSize;
+
+ fn from_block_cipher(
+ cipher: $cipher,
+ nonce: &GenericArray<u8, Self::NonceSize>,
+ ) -> Self {
+ let (token, aesni_present) = aes_ssse3_cpuid::init_get();
+
+ let inner = if aesni_present {
+ let ni = ni::$name::from_block_cipher(
+ unsafe { (*cipher.inner.intrinsics).clone() },
+ nonce,
+ );
+
+ $module::Inner {
+ ni: ManuallyDrop::new(ni),
+ }
+ } else {
+ let soft = soft::$name::from_block_cipher(
+ unsafe { (*cipher.inner.soft).clone() },
+ nonce,
+ );
+
+ $module::Inner {
+ soft: ManuallyDrop::new(soft),
+ }
+ };
+
+ Self { inner, token }
+ }
+ }
+
+ impl StreamCipher for $name {
+ #[inline]
+ fn try_apply_keystream(&mut self, data: &mut [u8]) -> Result<(), LoopError> {
+ if self.token.get() {
+ unsafe { (*self.inner.ni).try_apply_keystream(data) }
+ } else {
+ unsafe { (*self.inner.soft).try_apply_keystream(data) }
+ }
+ }
+ }
+
+ impl StreamCipherSeek for $name {
+ #[inline]
+ fn try_current_pos<T: SeekNum>(&self) -> Result<T, OverflowError> {
+ if self.token.get() {
+ unsafe { (*self.inner.ni).try_current_pos() }
+ } else {
+ unsafe { (*self.inner.soft).try_current_pos() }
+ }
+ }
+
+ #[inline]
+ fn try_seek<T: SeekNum>(&mut self, pos: T) -> Result<(), LoopError> {
+ if self.token.get() {
+ unsafe { (*self.inner.ni).try_seek(pos) }
+ } else {
+ unsafe { (*self.inner.soft).try_seek(pos) }
+ }
+ }
+ }
+
+ opaque_debug::implement!($name);
+ };
+ }
+
+ define_aes_ctr_impl!(Aes128Ctr, Aes128, aes128ctr, "AES-128 in CTR mode");
+ define_aes_ctr_impl!(Aes192Ctr, Aes192, aes192ctr, "AES-192 in CTR mode");
+ define_aes_ctr_impl!(Aes256Ctr, Aes256, aes256ctr, "AES-256 in CTR mode");
+}
diff --git a/rust/vendor/aes/src/hazmat.rs b/rust/vendor/aes/src/hazmat.rs
new file mode 100644
index 0000000..ff628e5
--- /dev/null
+++ b/rust/vendor/aes/src/hazmat.rs
@@ -0,0 +1,166 @@
+//! ⚠️ Low-level "hazmat" AES functions.
+//!
+//! # ☢️️ WARNING: HAZARDOUS API ☢️
+//!
+//! This module contains an extremely low-level cryptographic primitive
+//! which is likewise extremely difficult to use correctly.
+//!
+//! There are very few valid uses cases for this API. It's intended to be used
+//! for implementing well-reviewed higher-level constructions.
+//!
+//! We do NOT recommending using it to implement any algorithm which has not
+//! received extensive peer review by cryptographers.
+
+use crate::{soft::fixslice::hazmat as soft, Block, ParBlocks};
+
+#[cfg(all(
+ target_arch = "aarch64",
+ feature = "armv8",
+ not(feature = "force-soft")
+))]
+use crate::armv8::hazmat as intrinsics;
+
+#[cfg(all(
+ any(target_arch = "x86_64", target_arch = "x86"),
+ not(feature = "force-soft")
+))]
+use crate::ni::hazmat as intrinsics;
+
+#[cfg(all(
+ any(
+ target_arch = "x86",
+ target_arch = "x86_64",
+ all(target_arch = "aarch64", feature = "armv8")
+ ),
+ not(feature = "force-soft")
+))]
+cpufeatures::new!(aes_intrinsics, "aes");
+
+/// Execute the provided body if CPU intrinsics are available.
+// TODO(tarcieri): more `cfg-if`-like macro with an else branch?
+macro_rules! if_intrinsics_available {
+ ($body:expr) => {{
+ #[cfg(all(
+ any(
+ target_arch = "x86",
+ target_arch = "x86_64",
+ all(target_arch = "aarch64", feature = "armv8")
+ ),
+ not(feature = "force-soft")
+ ))]
+ if aes_intrinsics::get() {
+ unsafe { $body }
+ return;
+ }
+ }};
+}
+
+/// ⚠️ AES cipher (encrypt) round function.
+///
+/// This API performs the following steps as described in FIPS 197 Appendix C:
+///
+/// - `s_box`: state after `SubBytes()`
+/// - `s_row`: state after `ShiftRows()`
+/// - `m_col`: state after `MixColumns()`
+/// - `k_sch`: key schedule value for `round[r]`
+///
+/// This series of operations is equivalent to the Intel AES-NI `AESENC` instruction.
+///
+/// # ☢️️ WARNING: HAZARDOUS API ☢️
+///
+/// Use this function with great care! See the [module-level documentation][crate::hazmat]
+/// for more information.
+pub fn cipher_round(block: &mut Block, round_key: &Block) {
+ if_intrinsics_available! {
+ intrinsics::cipher_round(block, round_key)
+ }
+
+ soft::cipher_round(block, round_key);
+}
+
+/// ⚠️ AES cipher (encrypt) round function: parallel version.
+///
+/// Equivalent to [`cipher_round`], but acts on 8 blocks-at-a-time, applying
+/// the same number of round keys.
+///
+/// # ☢️️ WARNING: HAZARDOUS API ☢️
+///
+/// Use this function with great care! See the [module-level documentation][crate::hazmat]
+/// for more information.
+pub fn cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ if_intrinsics_available! {
+ intrinsics::cipher_round_par(blocks, round_keys)
+ }
+
+ soft::cipher_round_par(blocks, round_keys);
+}
+
+/// ⚠️ AES equivalent inverse cipher (decrypt) round function.
+///
+/// This API performs the following steps as described in FIPS 197 Appendix C:
+///
+/// - `is_box`: state after `InvSubBytes()`
+/// - `is_row`: state after `InvShiftRows()`
+/// - `im_col`: state after `InvMixColumns()`
+/// - `ik_sch`: key schedule value for `round[r]`
+///
+/// This series of operations is equivalent to the Intel AES-NI `AESDEC` instruction.
+///
+/// # ☢️️ WARNING: HAZARDOUS API ☢️
+///
+/// Use this function with great care! See the [module-level documentation][crate::hazmat]
+/// for more information.
+pub fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) {
+ if_intrinsics_available! {
+ intrinsics::equiv_inv_cipher_round(block, round_key)
+ }
+
+ soft::equiv_inv_cipher_round(block, round_key);
+}
+
+/// ⚠️ AES equivalent inverse cipher (decrypt) round function: parallel version.
+///
+/// Equivalent to [`equiv_inv_cipher_round`], but acts on 8 blocks-at-a-time,
+/// applying the same number of round keys.
+///
+/// # ☢️️ WARNING: HAZARDOUS API ☢️
+///
+/// Use this function with great care! See the [module-level documentation][crate::hazmat]
+/// for more information.
+pub fn equiv_inv_cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ if_intrinsics_available! {
+ intrinsics::equiv_inv_cipher_round_par(blocks, round_keys)
+ }
+
+ soft::equiv_inv_cipher_round_par(blocks, round_keys);
+}
+
+/// ⚠️ AES mix columns function.
+///
+/// # ☢️️ WARNING: HAZARDOUS API ☢️
+///
+/// Use this function with great care! See the [module-level documentation][crate::hazmat]
+/// for more information.
+pub fn mix_columns(block: &mut Block) {
+ if_intrinsics_available! {
+ intrinsics::mix_columns(block)
+ }
+
+ soft::mix_columns(block);
+}
+
+/// ⚠️ AES inverse mix columns function.
+///
+/// This function is equivalent to the Intel AES-NI `AESIMC` instruction.
+///
+/// # ☢️️ WARNING: HAZARDOUS API ☢️
+///
+/// Use this function with great care! See the [module-level documentation][crate::hazmat]
+/// for more information.
+pub fn inv_mix_columns(block: &mut Block) {
+ if_intrinsics_available! {
+ intrinsics::inv_mix_columns(block)
+ }
+
+ soft::inv_mix_columns(block);
+}
diff --git a/rust/vendor/aes/src/lib.rs b/rust/vendor/aes/src/lib.rs
new file mode 100644
index 0000000..f33183c
--- /dev/null
+++ b/rust/vendor/aes/src/lib.rs
@@ -0,0 +1,138 @@
+//! Pure Rust implementation of the Advanced Encryption Standard
+//! (a.k.a. Rijndael)
+//!
+//! # Supported backends
+//! This crate provides multiple backends including a portable pure Rust
+//! backend as well as ones based on CPU intrinsics.
+//!
+//! By default, it performs runtime detection of CPU intrinsics and uses them
+//! if they are available.
+//!
+//! ## "soft" portable backend
+//! As a baseline implementation, this crate provides a constant-time pure Rust
+//! implementation based on [fixslicing], a more advanced form of bitslicing
+//! implemented entirely in terms of bitwise arithmetic with no use of any
+//! lookup tables or data-dependent branches.
+//!
+//! Enabling the `compact` Cargo feature will reduce the code size of this
+//! backend at the cost of decreased performance (using a modified form of
+//! the fixslicing technique called "semi-fixslicing").
+//!
+//! ## ARMv8 intrinsics (nightly-only)
+//! On `aarch64` targets including `aarch64-apple-darwin` (Apple M1) and Linux
+//! targets such as `aarch64-unknown-linux-gnu` and `aarch64-unknown-linux-musl`,
+//! support for using AES intrinsics provided by the ARMv8 Cryptography Extensions
+//! is available when using the nightly compiler, and can be enabled using the
+//! `armv8` crate feature.
+//!
+//! On Linux and macOS, when the `armv8` feature is enabled support for AES
+//! intrinsics is autodetected at runtime. On other platforms the `aes`
+//! target feature must be enabled via RUSTFLAGS.
+//!
+//! ## `x86`/`x86_64` intrinsics (AES-NI)
+//! By default this crate uses runtime detection on `i686`/`x86_64` targets
+//! in order to determine if AES-NI is available, and if it is not, it will
+//! fallback to using a constant-time software implementation.
+//!
+//! Passing `RUSTFLAGS=-Ctarget-feature=+aes,+ssse3` explicitly at compile-time
+//! will override runtime detection and ensure that AES-NI is always used.
+//! Programs built in this manner will crash with an illegal instruction on
+//! CPUs which do not have AES-NI enabled.
+//!
+//! Note: runtime detection is not possible on SGX targets. Please use the
+//! afforementioned `RUSTFLAGS` to leverage AES-NI on these targets.
+//!
+//! # Usage example
+//! ```
+//! use aes::{Aes128, Block, ParBlocks};
+//! use aes::cipher::{
+//! BlockCipher, BlockEncrypt, BlockDecrypt, NewBlockCipher,
+//! generic_array::GenericArray,
+//! };
+//!
+//! let key = GenericArray::from_slice(&[0u8; 16]);
+//! let mut block = Block::default();
+//! let mut block8 = ParBlocks::default();
+//!
+//! // Initialize cipher
+//! let cipher = Aes128::new(&key);
+//!
+//! let block_copy = block.clone();
+//!
+//! // Encrypt block in-place
+//! cipher.encrypt_block(&mut block);
+//!
+//! // And decrypt it back
+//! cipher.decrypt_block(&mut block);
+//! assert_eq!(block, block_copy);
+//!
+//! // We can encrypt 8 blocks simultaneously using
+//! // instruction-level parallelism
+//! let block8_copy = block8.clone();
+//! cipher.encrypt_par_blocks(&mut block8);
+//! cipher.decrypt_par_blocks(&mut block8);
+//! assert_eq!(block8, block8_copy);
+//! ```
+//!
+//! For implementations of block cipher modes of operation see
+//! [`block-modes`] crate.
+//!
+//! [fixslicing]: https://eprint.iacr.org/2020/1123.pdf
+//! [AES-NI]: https://en.wikipedia.org/wiki/AES_instruction_set
+//! [`block-modes`]: https://docs.rs/block-modes
+
+#![no_std]
+#![cfg_attr(
+ all(feature = "armv8", target_arch = "aarch64"),
+ feature(stdsimd, aarch64_target_feature)
+)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc(
+ html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg",
+ html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg"
+)]
+#![warn(missing_docs, rust_2018_idioms)]
+
+#[cfg(feature = "hazmat")]
+pub mod hazmat;
+
+mod soft;
+
+use cfg_if::cfg_if;
+
+cfg_if! {
+ if #[cfg(all(target_arch = "aarch64", feature = "armv8", not(feature = "force-soft")))] {
+ mod armv8;
+ mod autodetect;
+ pub use autodetect::{Aes128, Aes192, Aes256};
+
+ #[cfg(feature = "ctr")]
+ pub use autodetect::ctr::{Aes128Ctr, Aes192Ctr, Aes256Ctr};
+ } else if #[cfg(all(
+ any(target_arch = "x86", target_arch = "x86_64"),
+ not(feature = "force-soft")
+ ))] {
+ mod autodetect;
+ mod ni;
+ pub use autodetect::{Aes128, Aes192, Aes256};
+
+ #[cfg(feature = "ctr")]
+ pub use autodetect::ctr::{Aes128Ctr, Aes192Ctr, Aes256Ctr};
+ } else {
+ pub use soft::{Aes128, Aes192, Aes256};
+
+ #[cfg(feature = "ctr")]
+ pub use soft::{Aes128Ctr, Aes192Ctr, Aes256Ctr};
+ }
+}
+
+pub use cipher::{self, BlockCipher, BlockDecrypt, BlockEncrypt, NewBlockCipher};
+
+/// 128-bit AES block
+pub type Block = cipher::generic_array::GenericArray<u8, cipher::consts::U16>;
+
+/// 8 x 128-bit AES blocks to be processed in parallel
+pub type ParBlocks = cipher::generic_array::GenericArray<Block, cipher::consts::U8>;
+
+/// Size of an AES block (128-bits; 16-bytes)
+pub const BLOCK_SIZE: usize = 16;
diff --git a/rust/vendor/aes/src/ni.rs b/rust/vendor/aes/src/ni.rs
new file mode 100644
index 0000000..56e8e1f
--- /dev/null
+++ b/rust/vendor/aes/src/ni.rs
@@ -0,0 +1,45 @@
+//! AES block ciphers implementation using AES-NI instruction set.
+//!
+//! Ciphers functionality is accessed using `BlockCipher` trait from the
+//! [`cipher`](https://docs.rs/cipher) crate.
+//!
+//! # CTR mode
+//! In addition to core block cipher functionality this crate provides optimized
+//! CTR mode implementation. This functionality requires additional `ssse3`
+//! target feature and feature-gated behind `ctr` feature flag, which is enabled
+//! by default.
+//!
+//! # Vulnerability
+//! Lazy FP state restory vulnerability can allow local process to leak content
+//! of the FPU register, in which round keys are stored. This vulnerability
+//! can be mitigated at the operating system level by installing relevant
+//! patches. (i.e. keep your OS updated!) More info:
+//! - [Intel advisory](https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00145.html)
+//! - [Wikipedia](https://en.wikipedia.org/wiki/Lazy_FP_state_restore)
+//!
+//! # Related documents
+//! - [Intel AES-NI whitepaper](https://software.intel.com/sites/default/files/article/165683/aes-wp-2012-09-22-v01.pdf)
+//! - [Use of the AES Instruction Set](https://www.cosic.esat.kuleuven.be/ecrypt/AESday/slides/Use_of_the_AES_Instruction_Set.pdf)
+
+#[macro_use]
+mod utils;
+
+mod aes128;
+mod aes192;
+mod aes256;
+
+#[cfg(feature = "ctr")]
+mod ctr;
+
+#[cfg(feature = "hazmat")]
+pub(crate) mod hazmat;
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86 as arch;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64 as arch;
+
+pub use self::{aes128::Aes128, aes192::Aes192, aes256::Aes256};
+
+#[cfg(feature = "ctr")]
+pub use self::ctr::{Aes128Ctr, Aes192Ctr, Aes256Ctr};
diff --git a/rust/vendor/aes/src/ni/aes128.rs b/rust/vendor/aes/src/ni/aes128.rs
new file mode 100644
index 0000000..f079fdd
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes128.rs
@@ -0,0 +1,163 @@
+use super::{
+ arch::*,
+ utils::{aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, U128x8},
+};
+use crate::{Block, ParBlocks};
+use cipher::{
+ consts::{U16, U8},
+ generic_array::GenericArray,
+ BlockCipher, BlockDecrypt, BlockEncrypt, NewBlockCipher,
+};
+
+mod expand;
+#[cfg(test)]
+mod test_expand;
+
+/// AES-128 round keys
+type RoundKeys = [__m128i; 11];
+
+/// AES-128 block cipher
+#[derive(Clone)]
+pub struct Aes128 {
+ encrypt_keys: RoundKeys,
+ decrypt_keys: RoundKeys,
+}
+
+impl Aes128 {
+ #[inline(always)]
+ pub(crate) fn encrypt8(&self, mut blocks: U128x8) -> U128x8 {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aesni128_encrypt8(keys: &RoundKeys, blocks: &mut U128x8) {
+ xor8(blocks, keys[0]);
+ aesenc8(blocks, keys[1]);
+ aesenc8(blocks, keys[2]);
+ aesenc8(blocks, keys[3]);
+ aesenc8(blocks, keys[4]);
+ aesenc8(blocks, keys[5]);
+ aesenc8(blocks, keys[6]);
+ aesenc8(blocks, keys[7]);
+ aesenc8(blocks, keys[8]);
+ aesenc8(blocks, keys[9]);
+ aesenclast8(blocks, keys[10]);
+ }
+ unsafe { aesni128_encrypt8(&self.encrypt_keys, &mut blocks) };
+ blocks
+ }
+
+ #[inline(always)]
+ pub(crate) fn encrypt(&self, block: __m128i) -> __m128i {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aesni128_encrypt1(keys: &RoundKeys, mut block: __m128i) -> __m128i {
+ block = _mm_xor_si128(block, keys[0]);
+ block = _mm_aesenc_si128(block, keys[1]);
+ block = _mm_aesenc_si128(block, keys[2]);
+ block = _mm_aesenc_si128(block, keys[3]);
+ block = _mm_aesenc_si128(block, keys[4]);
+ block = _mm_aesenc_si128(block, keys[5]);
+ block = _mm_aesenc_si128(block, keys[6]);
+ block = _mm_aesenc_si128(block, keys[7]);
+ block = _mm_aesenc_si128(block, keys[8]);
+ block = _mm_aesenc_si128(block, keys[9]);
+ _mm_aesenclast_si128(block, keys[10])
+ }
+ unsafe { aesni128_encrypt1(&self.encrypt_keys, block) }
+ }
+}
+
+impl NewBlockCipher for Aes128 {
+ type KeySize = U16;
+
+ #[inline]
+ fn new(key: &GenericArray<u8, U16>) -> Self {
+ let key = unsafe { &*(key as *const _ as *const [u8; 16]) };
+
+ let (encrypt_keys, decrypt_keys) = expand::expand(key);
+
+ Self {
+ encrypt_keys,
+ decrypt_keys,
+ }
+ }
+}
+
+impl BlockCipher for Aes128 {
+ type BlockSize = U16;
+ type ParBlocks = U8;
+}
+
+impl BlockEncrypt for Aes128 {
+ #[inline]
+ fn encrypt_block(&self, block: &mut Block) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ unsafe {
+ let b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+ let b = self.encrypt(b);
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, b);
+ }
+ }
+
+ #[inline]
+ fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ let b = self.encrypt8(load8(blocks));
+ store8(blocks, b);
+ }
+}
+
+impl BlockDecrypt for Aes128 {
+ #[inline]
+ fn decrypt_block(&self, block: &mut Block) {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aes128_decrypt1(block: &mut Block, keys: &RoundKeys) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ let mut b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+
+ b = _mm_xor_si128(b, keys[10]);
+ b = _mm_aesdec_si128(b, keys[9]);
+ b = _mm_aesdec_si128(b, keys[8]);
+ b = _mm_aesdec_si128(b, keys[7]);
+ b = _mm_aesdec_si128(b, keys[6]);
+ b = _mm_aesdec_si128(b, keys[5]);
+ b = _mm_aesdec_si128(b, keys[4]);
+ b = _mm_aesdec_si128(b, keys[3]);
+ b = _mm_aesdec_si128(b, keys[2]);
+ b = _mm_aesdec_si128(b, keys[1]);
+ b = _mm_aesdeclast_si128(b, keys[0]);
+
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, b);
+ }
+
+ unsafe { aes128_decrypt1(block, &self.decrypt_keys) }
+ }
+
+ #[inline]
+ fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aes128_decrypt8(blocks: &mut ParBlocks, keys: &RoundKeys) {
+ let mut b = load8(blocks);
+ xor8(&mut b, keys[10]);
+ aesdec8(&mut b, keys[9]);
+ aesdec8(&mut b, keys[8]);
+ aesdec8(&mut b, keys[7]);
+ aesdec8(&mut b, keys[6]);
+ aesdec8(&mut b, keys[5]);
+ aesdec8(&mut b, keys[4]);
+ aesdec8(&mut b, keys[3]);
+ aesdec8(&mut b, keys[2]);
+ aesdec8(&mut b, keys[1]);
+ aesdeclast8(&mut b, keys[0]);
+ store8(blocks, b);
+ }
+
+ unsafe { aes128_decrypt8(blocks, &self.decrypt_keys) }
+ }
+}
+
+opaque_debug::implement!(Aes128);
diff --git a/rust/vendor/aes/src/ni/aes128/expand.rs b/rust/vendor/aes/src/ni/aes128/expand.rs
new file mode 100644
index 0000000..f7b65b6
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes128/expand.rs
@@ -0,0 +1,53 @@
+use super::RoundKeys;
+use crate::ni::arch::*;
+
+use core::mem;
+
+macro_rules! expand_round {
+ ($enc_keys:expr, $dec_keys:expr, $pos:expr, $round:expr) => {
+ let mut t1 = $enc_keys[$pos - 1];
+ let mut t2;
+ let mut t3;
+
+ t2 = _mm_aeskeygenassist_si128(t1, $round);
+ t2 = _mm_shuffle_epi32(t2, 0xff);
+ t3 = _mm_slli_si128(t1, 0x4);
+ t1 = _mm_xor_si128(t1, t3);
+ t3 = _mm_slli_si128(t3, 0x4);
+ t1 = _mm_xor_si128(t1, t3);
+ t3 = _mm_slli_si128(t3, 0x4);
+ t1 = _mm_xor_si128(t1, t3);
+ t1 = _mm_xor_si128(t1, t2);
+
+ $enc_keys[$pos] = t1;
+ let t1 = if $pos != 10 { _mm_aesimc_si128(t1) } else { t1 };
+ $dec_keys[$pos] = t1;
+ };
+}
+
+#[inline(always)]
+pub(super) fn expand(key: &[u8; 16]) -> (RoundKeys, RoundKeys) {
+ unsafe {
+ let mut enc_keys: RoundKeys = mem::zeroed();
+ let mut dec_keys: RoundKeys = mem::zeroed();
+
+ // Safety: `loadu` supports unaligned loads
+ #[allow(clippy::cast_ptr_alignment)]
+ let k = _mm_loadu_si128(key.as_ptr() as *const __m128i);
+ enc_keys[0] = k;
+ dec_keys[0] = k;
+
+ expand_round!(enc_keys, dec_keys, 1, 0x01);
+ expand_round!(enc_keys, dec_keys, 2, 0x02);
+ expand_round!(enc_keys, dec_keys, 3, 0x04);
+ expand_round!(enc_keys, dec_keys, 4, 0x08);
+ expand_round!(enc_keys, dec_keys, 5, 0x10);
+ expand_round!(enc_keys, dec_keys, 6, 0x20);
+ expand_round!(enc_keys, dec_keys, 7, 0x40);
+ expand_round!(enc_keys, dec_keys, 8, 0x80);
+ expand_round!(enc_keys, dec_keys, 9, 0x1B);
+ expand_round!(enc_keys, dec_keys, 10, 0x36);
+
+ (enc_keys, dec_keys)
+ }
+}
diff --git a/rust/vendor/aes/src/ni/aes128/test_expand.rs b/rust/vendor/aes/src/ni/aes128/test_expand.rs
new file mode 100644
index 0000000..38744e6
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes128/test_expand.rs
@@ -0,0 +1,107 @@
+use super::expand::expand;
+use crate::ni::utils::check;
+
+#[test]
+fn test() {
+ let enc_keys = expand(&[0x00; 16]).0;
+ check(
+ &enc_keys,
+ &[
+ [0x0000000000000000, 0x0000000000000000],
+ [0x6263636362636363, 0x6263636362636363],
+ [0x9b9898c9f9fbfbaa, 0x9b9898c9f9fbfbaa],
+ [0x90973450696ccffa, 0xf2f457330b0fac99],
+ [0xee06da7b876a1581, 0x759e42b27e91ee2b],
+ [0x7f2e2b88f8443e09, 0x8dda7cbbf34b9290],
+ [0xec614b851425758c, 0x99ff09376ab49ba7],
+ [0x217517873550620b, 0xacaf6b3cc61bf09b],
+ [0x0ef903333ba96138, 0x97060a04511dfa9f],
+ [0xb1d4d8e28a7db9da, 0x1d7bb3de4c664941],
+ [0xb4ef5bcb3e92e211, 0x23e951cf6f8f188e],
+ ],
+ );
+
+ let enc_keys = expand(&[0xff; 16]).0;
+ check(
+ &enc_keys,
+ &[
+ [0xffffffffffffffff, 0xffffffffffffffff],
+ [0xe8e9e9e917161616, 0xe8e9e9e917161616],
+ [0xadaeae19bab8b80f, 0x525151e6454747f0],
+ [0x090e2277b3b69a78, 0xe1e7cb9ea4a08c6e],
+ [0xe16abd3e52dc2746, 0xb33becd8179b60b6],
+ [0xe5baf3ceb766d488, 0x045d385013c658e6],
+ [0x71d07db3c6b6a93b, 0xc2eb916bd12dc98d],
+ [0xe90d208d2fbb89b6, 0xed5018dd3c7dd150],
+ [0x96337366b988fad0, 0x54d8e20d68a5335d],
+ [0x8bf03f233278c5f3, 0x66a027fe0e0514a3],
+ [0xd60a3588e472f07b, 0x82d2d7858cd7c326],
+ ],
+ );
+
+ let enc_keys = expand(&[
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+ 0x0f,
+ ])
+ .0;
+ check(
+ &enc_keys,
+ &[
+ [0x0001020304050607, 0x08090a0b0c0d0e0f],
+ [0xd6aa74fdd2af72fa, 0xdaa678f1d6ab76fe],
+ [0xb692cf0b643dbdf1, 0xbe9bc5006830b3fe],
+ [0xb6ff744ed2c2c9bf, 0x6c590cbf0469bf41],
+ [0x47f7f7bc95353e03, 0xf96c32bcfd058dfd],
+ [0x3caaa3e8a99f9deb, 0x50f3af57adf622aa],
+ [0x5e390f7df7a69296, 0xa7553dc10aa31f6b],
+ [0x14f9701ae35fe28c, 0x440adf4d4ea9c026],
+ [0x47438735a41c65b9, 0xe016baf4aebf7ad2],
+ [0x549932d1f0855768, 0x1093ed9cbe2c974e],
+ [0x13111d7fe3944a17, 0xf307a78b4d2b30c5],
+ ],
+ );
+
+ let enc_keys = expand(&[
+ 0x69, 0x20, 0xe2, 0x99, 0xa5, 0x20, 0x2a, 0x6d, 0x65, 0x6e, 0x63, 0x68, 0x69, 0x74, 0x6f,
+ 0x2a,
+ ])
+ .0;
+ check(
+ &enc_keys,
+ &[
+ [0x6920e299a5202a6d, 0x656e636869746f2a],
+ [0xfa8807605fa82d0d, 0x3ac64e6553b2214f],
+ [0xcf75838d90ddae80, 0xaa1be0e5f9a9c1aa],
+ [0x180d2f1488d08194, 0x22cb6171db62a0db],
+ [0xbaed96ad323d1739, 0x10f67648cb94d693],
+ [0x881b4ab2ba265d8b, 0xaad02bc36144fd50],
+ [0xb34f195d096944d6, 0xa3b96f15c2fd9245],
+ [0xa7007778ae6933ae, 0x0dd05cbbcf2dcefe],
+ [0xff8bccf251e2ff5c, 0x5c32a3e7931f6d19],
+ [0x24b7182e7555e772, 0x29674495ba78298c],
+ [0xae127cdadb479ba8, 0xf220df3d4858f6b1],
+ ],
+ );
+
+ let enc_keys = expand(&[
+ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f,
+ 0x3c,
+ ])
+ .0;
+ check(
+ &enc_keys,
+ &[
+ [0x2b7e151628aed2a6, 0xabf7158809cf4f3c],
+ [0xa0fafe1788542cb1, 0x23a339392a6c7605],
+ [0xf2c295f27a96b943, 0x5935807a7359f67f],
+ [0x3d80477d4716fe3e, 0x1e237e446d7a883b],
+ [0xef44a541a8525b7f, 0xb671253bdb0bad00],
+ [0xd4d1c6f87c839d87, 0xcaf2b8bc11f915bc],
+ [0x6d88a37a110b3efd, 0xdbf98641ca0093fd],
+ [0x4e54f70e5f5fc9f3, 0x84a64fb24ea6dc4f],
+ [0xead27321b58dbad2, 0x312bf5607f8d292f],
+ [0xac7766f319fadc21, 0x28d12941575c006e],
+ [0xd014f9a8c9ee2589, 0xe13f0cc8b6630ca6],
+ ],
+ );
+}
diff --git a/rust/vendor/aes/src/ni/aes192.rs b/rust/vendor/aes/src/ni/aes192.rs
new file mode 100644
index 0000000..fb64289
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes192.rs
@@ -0,0 +1,169 @@
+use super::{
+ arch::*,
+ utils::{aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, U128x8},
+};
+use crate::{Block, ParBlocks};
+use cipher::{
+ consts::{U16, U24, U8},
+ generic_array::GenericArray,
+ BlockCipher, BlockDecrypt, BlockEncrypt, NewBlockCipher,
+};
+
+mod expand;
+#[cfg(test)]
+mod test_expand;
+
+/// AES-192 round keys
+type RoundKeys = [__m128i; 13];
+
+/// AES-192 block cipher
+#[derive(Clone)]
+pub struct Aes192 {
+ encrypt_keys: RoundKeys,
+ decrypt_keys: RoundKeys,
+}
+
+impl Aes192 {
+ #[inline(always)]
+ pub(crate) fn encrypt8(&self, mut blocks: U128x8) -> U128x8 {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aesni192_encrypt8(keys: &RoundKeys, blocks: &mut U128x8) {
+ xor8(blocks, keys[0]);
+ aesenc8(blocks, keys[1]);
+ aesenc8(blocks, keys[2]);
+ aesenc8(blocks, keys[3]);
+ aesenc8(blocks, keys[4]);
+ aesenc8(blocks, keys[5]);
+ aesenc8(blocks, keys[6]);
+ aesenc8(blocks, keys[7]);
+ aesenc8(blocks, keys[8]);
+ aesenc8(blocks, keys[9]);
+ aesenc8(blocks, keys[10]);
+ aesenc8(blocks, keys[11]);
+ aesenclast8(blocks, keys[12]);
+ }
+ unsafe { aesni192_encrypt8(&self.encrypt_keys, &mut blocks) };
+ blocks
+ }
+
+ #[inline(always)]
+ pub(crate) fn encrypt(&self, block: __m128i) -> __m128i {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aesni192_encrypt1(keys: &RoundKeys, mut block: __m128i) -> __m128i {
+ block = _mm_xor_si128(block, keys[0]);
+ block = _mm_aesenc_si128(block, keys[1]);
+ block = _mm_aesenc_si128(block, keys[2]);
+ block = _mm_aesenc_si128(block, keys[3]);
+ block = _mm_aesenc_si128(block, keys[4]);
+ block = _mm_aesenc_si128(block, keys[5]);
+ block = _mm_aesenc_si128(block, keys[6]);
+ block = _mm_aesenc_si128(block, keys[7]);
+ block = _mm_aesenc_si128(block, keys[8]);
+ block = _mm_aesenc_si128(block, keys[9]);
+ block = _mm_aesenc_si128(block, keys[10]);
+ block = _mm_aesenc_si128(block, keys[11]);
+ _mm_aesenclast_si128(block, keys[12])
+ }
+ unsafe { aesni192_encrypt1(&self.encrypt_keys, block) }
+ }
+}
+
+impl NewBlockCipher for Aes192 {
+ type KeySize = U24;
+
+ #[inline]
+ fn new(key: &GenericArray<u8, U24>) -> Self {
+ let key = unsafe { &*(key as *const _ as *const [u8; 24]) };
+ let (encrypt_keys, decrypt_keys) = expand::expand(key);
+ Self {
+ encrypt_keys,
+ decrypt_keys,
+ }
+ }
+}
+
+impl BlockCipher for Aes192 {
+ type BlockSize = U16;
+ type ParBlocks = U8;
+}
+
+impl BlockEncrypt for Aes192 {
+ #[inline]
+ fn encrypt_block(&self, block: &mut Block) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ unsafe {
+ let b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+ let b = self.encrypt(b);
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, b);
+ }
+ }
+
+ #[inline]
+ fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ let b = self.encrypt8(load8(blocks));
+ store8(blocks, b);
+ }
+}
+
+impl BlockDecrypt for Aes192 {
+ #[inline]
+ fn decrypt_block(&self, block: &mut Block) {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aes192_decrypt1(block: &mut Block, keys: &RoundKeys) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ let mut b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+
+ b = _mm_xor_si128(b, keys[12]);
+ b = _mm_aesdec_si128(b, keys[11]);
+ b = _mm_aesdec_si128(b, keys[10]);
+ b = _mm_aesdec_si128(b, keys[9]);
+ b = _mm_aesdec_si128(b, keys[8]);
+ b = _mm_aesdec_si128(b, keys[7]);
+ b = _mm_aesdec_si128(b, keys[6]);
+ b = _mm_aesdec_si128(b, keys[5]);
+ b = _mm_aesdec_si128(b, keys[4]);
+ b = _mm_aesdec_si128(b, keys[3]);
+ b = _mm_aesdec_si128(b, keys[2]);
+ b = _mm_aesdec_si128(b, keys[1]);
+ b = _mm_aesdeclast_si128(b, keys[0]);
+
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, b);
+ }
+
+ unsafe { aes192_decrypt1(block, &self.decrypt_keys) }
+ }
+
+ #[inline]
+ fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aes192_decrypt8(blocks: &mut ParBlocks, keys: &RoundKeys) {
+ let mut b = load8(blocks);
+ xor8(&mut b, keys[12]);
+ aesdec8(&mut b, keys[11]);
+ aesdec8(&mut b, keys[10]);
+ aesdec8(&mut b, keys[9]);
+ aesdec8(&mut b, keys[8]);
+ aesdec8(&mut b, keys[7]);
+ aesdec8(&mut b, keys[6]);
+ aesdec8(&mut b, keys[5]);
+ aesdec8(&mut b, keys[4]);
+ aesdec8(&mut b, keys[3]);
+ aesdec8(&mut b, keys[2]);
+ aesdec8(&mut b, keys[1]);
+ aesdeclast8(&mut b, keys[0]);
+ store8(blocks, b);
+ }
+
+ unsafe { aes192_decrypt8(blocks, &self.decrypt_keys) }
+ }
+}
+
+opaque_debug::implement!(Aes192);
diff --git a/rust/vendor/aes/src/ni/aes192/expand.rs b/rust/vendor/aes/src/ni/aes192/expand.rs
new file mode 100644
index 0000000..797b986
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes192/expand.rs
@@ -0,0 +1,108 @@
+use super::RoundKeys;
+use crate::ni::arch::*;
+
+use core::{mem, ptr};
+
+macro_rules! expand_round {
+ ($t1:expr, $t3:expr, $round:expr) => {{
+ let mut t1 = $t1;
+ let mut t2;
+ let mut t3 = $t3;
+ let mut t4;
+
+ t2 = _mm_aeskeygenassist_si128(t3, $round);
+ t2 = _mm_shuffle_epi32(t2, 0x55);
+ t4 = _mm_slli_si128(t1, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t1 = _mm_xor_si128(t1, t2);
+ t2 = _mm_shuffle_epi32(t1, 0xff);
+ t4 = _mm_slli_si128(t3, 0x4);
+ t3 = _mm_xor_si128(t3, t4);
+ t3 = _mm_xor_si128(t3, t2);
+
+ (t1, t3)
+ }};
+}
+
+macro_rules! shuffle {
+ ($a:expr, $b:expr, $imm:expr) => {
+ mem::transmute::<_, __m128i>(_mm_shuffle_pd(mem::transmute($a), mem::transmute($b), $imm))
+ };
+}
+
+#[inline(always)]
+pub(super) fn expand(key: &[u8; 24]) -> (RoundKeys, RoundKeys) {
+ unsafe {
+ let mut enc_keys: RoundKeys = mem::zeroed();
+ let mut dec_keys: RoundKeys = mem::zeroed();
+
+ macro_rules! store {
+ ($i:expr, $k:expr) => {
+ enc_keys[$i] = $k;
+ dec_keys[$i] = _mm_aesimc_si128($k);
+ };
+ }
+
+ // we are being extra pedantic here to remove out-of-bound access.
+ // this should be optimized out into movups, movsd sequence
+ // note that unaligned load MUST be used here, even though we read
+ // from the array (compiler missoptimizes aligned load)
+ let (k0, k1l) = {
+ let mut t = [0u8; 32];
+ ptr::write(t.as_mut_ptr() as *mut [u8; 24], *key);
+
+ // Safety: `loadu` supports unaligned loads
+ #[allow(clippy::cast_ptr_alignment)]
+ (
+ _mm_loadu_si128(t.as_ptr() as *const __m128i),
+ _mm_loadu_si128(t.as_ptr().offset(16) as *const __m128i),
+ )
+ };
+
+ enc_keys[0] = k0;
+ dec_keys[0] = k0;
+
+ let (k1_2, k2r) = expand_round!(k0, k1l, 0x01);
+ let k1 = shuffle!(k1l, k1_2, 0);
+ let k2 = shuffle!(k1_2, k2r, 1);
+ store!(1, k1);
+ store!(2, k2);
+
+ let (k3, k4l) = expand_round!(k1_2, k2r, 0x02);
+ store!(3, k3);
+
+ let (k4_5, k5r) = expand_round!(k3, k4l, 0x04);
+ let k4 = shuffle!(k4l, k4_5, 0);
+ let k5 = shuffle!(k4_5, k5r, 1);
+ store!(4, k4);
+ store!(5, k5);
+
+ let (k6, k7l) = expand_round!(k4_5, k5r, 0x08);
+ store!(6, k6);
+
+ let (k7_8, k8r) = expand_round!(k6, k7l, 0x10);
+ let k7 = shuffle!(k7l, k7_8, 0);
+ let k8 = shuffle!(k7_8, k8r, 1);
+ store!(7, k7);
+ store!(8, k8);
+
+ let (k9, k10l) = expand_round!(k7_8, k8r, 0x20);
+ store!(9, k9);
+
+ let (k10_11, k11r) = expand_round!(k9, k10l, 0x40);
+ let k10 = shuffle!(k10l, k10_11, 0);
+ let k11 = shuffle!(k10_11, k11r, 1);
+ store!(10, k10);
+ store!(11, k11);
+
+ let (k12, _) = expand_round!(k10_11, k11r, 0x80);
+ enc_keys[12] = k12;
+ dec_keys[12] = k12;
+
+ (enc_keys, dec_keys)
+ }
+}
diff --git a/rust/vendor/aes/src/ni/aes192/test_expand.rs b/rust/vendor/aes/src/ni/aes192/test_expand.rs
new file mode 100644
index 0000000..7811d4c
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes192/test_expand.rs
@@ -0,0 +1,93 @@
+use super::expand::expand;
+use crate::ni::utils::check;
+
+#[test]
+fn test() {
+ let enc_keys = expand(&[0x00; 24]).0;
+ check(
+ &enc_keys,
+ &[
+ [0x0000000000000000, 0x0000000000000000],
+ [0x0000000000000000, 0x6263636362636363],
+ [0x6263636362636363, 0x6263636362636363],
+ [0x9b9898c9f9fbfbaa, 0x9b9898c9f9fbfbaa],
+ [0x9b9898c9f9fbfbaa, 0x90973450696ccffa],
+ [0xf2f457330b0fac99, 0x90973450696ccffa],
+ [0xc81d19a9a171d653, 0x53858160588a2df9],
+ [0xc81d19a9a171d653, 0x7bebf49bda9a22c8],
+ [0x891fa3a8d1958e51, 0x198897f8b8f941ab],
+ [0xc26896f718f2b43f, 0x91ed1797407899c6],
+ [0x59f00e3ee1094f95, 0x83ecbc0f9b1e0830],
+ [0x0af31fa74a8b8661, 0x137b885ff272c7ca],
+ [0x432ac886d834c0b6, 0xd2c7df11984c5970],
+ ],
+ );
+
+ let enc_keys = expand(&[0xff; 24]).0;
+ check(
+ &enc_keys,
+ &[
+ [0xffffffffffffffff, 0xffffffffffffffff],
+ [0xffffffffffffffff, 0xe8e9e9e917161616],
+ [0xe8e9e9e917161616, 0xe8e9e9e917161616],
+ [0xadaeae19bab8b80f, 0x525151e6454747f0],
+ [0xadaeae19bab8b80f, 0xc5c2d8ed7f7a60e2],
+ [0x2d2b3104686c76f4, 0xc5c2d8ed7f7a60e2],
+ [0x1712403f686820dd, 0x454311d92d2f672d],
+ [0xe8edbfc09797df22, 0x8f8cd3b7e7e4f36a],
+ [0xa2a7e2b38f88859e, 0x67653a5ef0f2e57c],
+ [0x2655c33bc1b13051, 0x6316d2e2ec9e577c],
+ [0x8bfb6d227b09885e, 0x67919b1aa620ab4b],
+ [0xc53679a929a82ed5, 0xa25343f7d95acba9],
+ [0x598e482fffaee364, 0x3a989acd1330b418],
+ ],
+ );
+
+ let enc_keys = expand(&[
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+ 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ ])
+ .0;
+ check(
+ &enc_keys,
+ &[
+ [0x0001020304050607, 0x08090a0b0c0d0e0f],
+ [0x1011121314151617, 0x5846f2f95c43f4fe],
+ [0x544afef55847f0fa, 0x4856e2e95c43f4fe],
+ [0x40f949b31cbabd4d, 0x48f043b810b7b342],
+ [0x58e151ab04a2a555, 0x7effb5416245080c],
+ [0x2ab54bb43a02f8f6, 0x62e3a95d66410c08],
+ [0xf501857297448d7e, 0xbdf1c6ca87f33e3c],
+ [0xe510976183519b69, 0x34157c9ea351f1e0],
+ [0x1ea0372a99530916, 0x7c439e77ff12051e],
+ [0xdd7e0e887e2fff68, 0x608fc842f9dcc154],
+ [0x859f5f237a8d5a3d, 0xc0c02952beefd63a],
+ [0xde601e7827bcdf2c, 0xa223800fd8aeda32],
+ [0xa4970a331a78dc09, 0xc418c271e3a41d5d],
+ ],
+ );
+
+ let enc_keys = expand(&[
+ 0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52, 0xc8, 0x10, 0xf3, 0x2b, 0x80, 0x90, 0x79,
+ 0xe5, 0x62, 0xf8, 0xea, 0xd2, 0x52, 0x2c, 0x6b, 0x7b,
+ ])
+ .0;
+ check(
+ &enc_keys,
+ &[
+ [0x8e73b0f7da0e6452, 0xc810f32b809079e5],
+ [0x62f8ead2522c6b7b, 0xfe0c91f72402f5a5],
+ [0xec12068e6c827f6b, 0x0e7a95b95c56fec2],
+ [0x4db7b4bd69b54118, 0x85a74796e92538fd],
+ [0xe75fad44bb095386, 0x485af05721efb14f],
+ [0xa448f6d94d6dce24, 0xaa326360113b30e6],
+ [0xa25e7ed583b1cf9a, 0x27f939436a94f767],
+ [0xc0a69407d19da4e1, 0xec1786eb6fa64971],
+ [0x485f703222cb8755, 0xe26d135233f0b7b3],
+ [0x40beeb282f18a259, 0x6747d26b458c553e],
+ [0xa7e1466c9411f1df, 0x821f750aad07d753],
+ [0xca4005388fcc5006, 0x282d166abc3ce7b5],
+ [0xe98ba06f448c773c, 0x8ecc720401002202],
+ ],
+ );
+}
diff --git a/rust/vendor/aes/src/ni/aes256.rs b/rust/vendor/aes/src/ni/aes256.rs
new file mode 100644
index 0000000..9a752c1
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes256.rs
@@ -0,0 +1,177 @@
+use super::{
+ arch::*,
+ utils::{aesdec8, aesdeclast8, aesenc8, aesenclast8, load8, store8, xor8, U128x8},
+};
+use crate::{Block, ParBlocks};
+use cipher::{
+ consts::{U16, U32, U8},
+ generic_array::GenericArray,
+ BlockCipher, BlockDecrypt, BlockEncrypt, NewBlockCipher,
+};
+
+mod expand;
+#[cfg(test)]
+mod test_expand;
+
+/// AES-256 round keys
+type RoundKeys = [__m128i; 15];
+
+/// AES-256 block cipher
+#[derive(Clone)]
+pub struct Aes256 {
+ encrypt_keys: RoundKeys,
+ decrypt_keys: RoundKeys,
+}
+
+impl Aes256 {
+ #[inline(always)]
+ pub(crate) fn encrypt8(&self, mut blocks: U128x8) -> U128x8 {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aesni256_encrypt8(keys: &RoundKeys, blocks: &mut U128x8) {
+ xor8(blocks, keys[0]);
+ aesenc8(blocks, keys[1]);
+ aesenc8(blocks, keys[2]);
+ aesenc8(blocks, keys[3]);
+ aesenc8(blocks, keys[4]);
+ aesenc8(blocks, keys[5]);
+ aesenc8(blocks, keys[6]);
+ aesenc8(blocks, keys[7]);
+ aesenc8(blocks, keys[8]);
+ aesenc8(blocks, keys[9]);
+ aesenc8(blocks, keys[10]);
+ aesenc8(blocks, keys[11]);
+ aesenc8(blocks, keys[12]);
+ aesenc8(blocks, keys[13]);
+ aesenclast8(blocks, keys[14]);
+ }
+ unsafe { aesni256_encrypt8(&self.encrypt_keys, &mut blocks) };
+ blocks
+ }
+
+ #[inline(always)]
+ pub(crate) fn encrypt(&self, block: __m128i) -> __m128i {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aesni256_encrypt1(keys: &RoundKeys, mut block: __m128i) -> __m128i {
+ block = _mm_xor_si128(block, keys[0]);
+ block = _mm_aesenc_si128(block, keys[1]);
+ block = _mm_aesenc_si128(block, keys[2]);
+ block = _mm_aesenc_si128(block, keys[3]);
+ block = _mm_aesenc_si128(block, keys[4]);
+ block = _mm_aesenc_si128(block, keys[5]);
+ block = _mm_aesenc_si128(block, keys[6]);
+ block = _mm_aesenc_si128(block, keys[7]);
+ block = _mm_aesenc_si128(block, keys[8]);
+ block = _mm_aesenc_si128(block, keys[9]);
+ block = _mm_aesenc_si128(block, keys[10]);
+ block = _mm_aesenc_si128(block, keys[11]);
+ block = _mm_aesenc_si128(block, keys[12]);
+ block = _mm_aesenc_si128(block, keys[13]);
+ _mm_aesenclast_si128(block, keys[14])
+ }
+ unsafe { aesni256_encrypt1(&self.encrypt_keys, block) }
+ }
+}
+
+impl NewBlockCipher for Aes256 {
+ type KeySize = U32;
+
+ #[inline]
+ fn new(key: &GenericArray<u8, U32>) -> Self {
+ let key = unsafe { &*(key as *const _ as *const [u8; 32]) };
+ let (encrypt_keys, decrypt_keys) = expand::expand(key);
+ Self {
+ encrypt_keys,
+ decrypt_keys,
+ }
+ }
+}
+
+impl BlockCipher for Aes256 {
+ type BlockSize = U16;
+ type ParBlocks = U8;
+}
+
+impl BlockEncrypt for Aes256 {
+ #[inline]
+ fn encrypt_block(&self, block: &mut Block) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ unsafe {
+ let b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+ let b = self.encrypt(b);
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, b);
+ }
+ }
+
+ #[inline]
+ fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ let b = self.encrypt8(load8(blocks));
+ store8(blocks, b);
+ }
+}
+
+impl BlockDecrypt for Aes256 {
+ #[inline]
+ fn decrypt_block(&self, block: &mut Block) {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aes256_decrypt1(block: &mut Block, keys: &RoundKeys) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ let mut b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+
+ b = _mm_xor_si128(b, keys[14]);
+ b = _mm_aesdec_si128(b, keys[13]);
+ b = _mm_aesdec_si128(b, keys[12]);
+ b = _mm_aesdec_si128(b, keys[11]);
+ b = _mm_aesdec_si128(b, keys[10]);
+ b = _mm_aesdec_si128(b, keys[9]);
+ b = _mm_aesdec_si128(b, keys[8]);
+ b = _mm_aesdec_si128(b, keys[7]);
+ b = _mm_aesdec_si128(b, keys[6]);
+ b = _mm_aesdec_si128(b, keys[5]);
+ b = _mm_aesdec_si128(b, keys[4]);
+ b = _mm_aesdec_si128(b, keys[3]);
+ b = _mm_aesdec_si128(b, keys[2]);
+ b = _mm_aesdec_si128(b, keys[1]);
+ b = _mm_aesdeclast_si128(b, keys[0]);
+
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, b);
+ }
+
+ unsafe { aes256_decrypt1(block, &self.decrypt_keys) }
+ }
+
+ #[inline]
+ fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ #[inline]
+ #[target_feature(enable = "aes")]
+ unsafe fn aes256_decrypt8(blocks: &mut ParBlocks, keys: &RoundKeys) {
+ let mut b = load8(blocks);
+ xor8(&mut b, keys[14]);
+ aesdec8(&mut b, keys[13]);
+ aesdec8(&mut b, keys[12]);
+ aesdec8(&mut b, keys[11]);
+ aesdec8(&mut b, keys[10]);
+ aesdec8(&mut b, keys[9]);
+ aesdec8(&mut b, keys[8]);
+ aesdec8(&mut b, keys[7]);
+ aesdec8(&mut b, keys[6]);
+ aesdec8(&mut b, keys[5]);
+ aesdec8(&mut b, keys[4]);
+ aesdec8(&mut b, keys[3]);
+ aesdec8(&mut b, keys[2]);
+ aesdec8(&mut b, keys[1]);
+ aesdeclast8(&mut b, keys[0]);
+ store8(blocks, b);
+ }
+
+ unsafe { aes256_decrypt8(blocks, &self.decrypt_keys) }
+ }
+}
+
+opaque_debug::implement!(Aes256);
diff --git a/rust/vendor/aes/src/ni/aes256/expand.rs b/rust/vendor/aes/src/ni/aes256/expand.rs
new file mode 100644
index 0000000..88b8558
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes256/expand.rs
@@ -0,0 +1,89 @@
+use super::RoundKeys;
+use crate::ni::arch::*;
+
+use core::mem;
+
+macro_rules! expand_round {
+ ($enc_keys:expr, $dec_keys:expr, $pos:expr, $round:expr) => {
+ let mut t1 = $enc_keys[$pos - 2];
+ let mut t2;
+ let mut t3 = $enc_keys[$pos - 1];
+ let mut t4;
+
+ t2 = _mm_aeskeygenassist_si128(t3, $round);
+ t2 = _mm_shuffle_epi32(t2, 0xff);
+ t4 = _mm_slli_si128(t1, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t1 = _mm_xor_si128(t1, t2);
+
+ $enc_keys[$pos] = t1;
+ $dec_keys[$pos] = _mm_aesimc_si128(t1);
+
+ t4 = _mm_aeskeygenassist_si128(t1, 0x00);
+ t2 = _mm_shuffle_epi32(t4, 0xaa);
+ t4 = _mm_slli_si128(t3, 0x4);
+ t3 = _mm_xor_si128(t3, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ t3 = _mm_xor_si128(t3, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ t3 = _mm_xor_si128(t3, t4);
+ t3 = _mm_xor_si128(t3, t2);
+
+ $enc_keys[$pos + 1] = t3;
+ $dec_keys[$pos + 1] = _mm_aesimc_si128(t3);
+ };
+}
+
+macro_rules! expand_round_last {
+ ($enc_keys:expr, $dec_keys:expr, $pos:expr, $round:expr) => {
+ let mut t1 = $enc_keys[$pos - 2];
+ let mut t2;
+ let t3 = $enc_keys[$pos - 1];
+ let mut t4;
+
+ t2 = _mm_aeskeygenassist_si128(t3, $round);
+ t2 = _mm_shuffle_epi32(t2, 0xff);
+ t4 = _mm_slli_si128(t1, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ t1 = _mm_xor_si128(t1, t4);
+ t1 = _mm_xor_si128(t1, t2);
+
+ $enc_keys[$pos] = t1;
+ $dec_keys[$pos] = t1;
+ };
+}
+
+#[inline(always)]
+pub(super) fn expand(key: &[u8; 32]) -> (RoundKeys, RoundKeys) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ unsafe {
+ let mut enc_keys: RoundKeys = mem::zeroed();
+ let mut dec_keys: RoundKeys = mem::zeroed();
+
+ let kp = key.as_ptr() as *const __m128i;
+ let k1 = _mm_loadu_si128(kp);
+ let k2 = _mm_loadu_si128(kp.offset(1));
+ enc_keys[0] = k1;
+ dec_keys[0] = k1;
+ enc_keys[1] = k2;
+ dec_keys[1] = _mm_aesimc_si128(k2);
+
+ expand_round!(enc_keys, dec_keys, 2, 0x01);
+ expand_round!(enc_keys, dec_keys, 4, 0x02);
+ expand_round!(enc_keys, dec_keys, 6, 0x04);
+ expand_round!(enc_keys, dec_keys, 8, 0x08);
+ expand_round!(enc_keys, dec_keys, 10, 0x10);
+ expand_round!(enc_keys, dec_keys, 12, 0x20);
+ expand_round_last!(enc_keys, dec_keys, 14, 0x40);
+
+ (enc_keys, dec_keys)
+ }
+}
diff --git a/rust/vendor/aes/src/ni/aes256/test_expand.rs b/rust/vendor/aes/src/ni/aes256/test_expand.rs
new file mode 100644
index 0000000..52e728f
--- /dev/null
+++ b/rust/vendor/aes/src/ni/aes256/test_expand.rs
@@ -0,0 +1,103 @@
+use super::expand::expand;
+use crate::ni::utils::check;
+
+#[test]
+fn test() {
+ let enc_keys = expand(&[0x00; 32]).0;
+ check(
+ &enc_keys,
+ &[
+ [0x0000000000000000, 0x0000000000000000],
+ [0x0000000000000000, 0x0000000000000000],
+ [0x6263636362636363, 0x6263636362636363],
+ [0xaafbfbfbaafbfbfb, 0xaafbfbfbaafbfbfb],
+ [0x6f6c6ccf0d0f0fac, 0x6f6c6ccf0d0f0fac],
+ [0x7d8d8d6ad7767691, 0x7d8d8d6ad7767691],
+ [0x5354edc15e5be26d, 0x31378ea23c38810e],
+ [0x968a81c141fcf750, 0x3c717a3aeb070cab],
+ [0x9eaa8f28c0f16d45, 0xf1c6e3e7cdfe62e9],
+ [0x2b312bdf6acddc8f, 0x56bca6b5bdbbaa1e],
+ [0x6406fd52a4f79017, 0x553173f098cf1119],
+ [0x6dbba90b07767584, 0x51cad331ec71792f],
+ [0xe7b0e89c4347788b, 0x16760b7b8eb91a62],
+ [0x74ed0ba1739b7e25, 0x2251ad14ce20d43b],
+ [0x10f80a1753bf729c, 0x45c979e7cb706385],
+ ],
+ );
+
+ let enc_keys = expand(&[0xff; 32]).0;
+ check(
+ &enc_keys,
+ &[
+ [0xffffffffffffffff, 0xffffffffffffffff],
+ [0xffffffffffffffff, 0xffffffffffffffff],
+ [0xe8e9e9e917161616, 0xe8e9e9e917161616],
+ [0x0fb8b8b8f0474747, 0x0fb8b8b8f0474747],
+ [0x4a4949655d5f5f73, 0xb5b6b69aa2a0a08c],
+ [0x355858dcc51f1f9b, 0xcaa7a7233ae0e064],
+ [0xafa80ae5f2f75596, 0x4741e30ce5e14380],
+ [0xeca0421129bf5d8a, 0xe318faa9d9f81acd],
+ [0xe60ab7d014fde246, 0x53bc014ab65d42ca],
+ [0xa2ec6e658b5333ef, 0x684bc946b1b3d38b],
+ [0x9b6c8a188f91685e, 0xdc2d69146a702bde],
+ [0xa0bd9f782beeac97, 0x43a565d1f216b65a],
+ [0xfc22349173b35ccf, 0xaf9e35dbc5ee1e05],
+ [0x0695ed132d7b4184, 0x6ede24559cc8920f],
+ [0x546d424f27de1e80, 0x88402b5b4dae355e],
+ ],
+ );
+
+ let enc_keys = expand(&[
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+ 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
+ 0x1e, 0x1f,
+ ])
+ .0;
+ check(
+ &enc_keys,
+ &[
+ [0x0001020304050607, 0x08090a0b0c0d0e0f],
+ [0x1011121314151617, 0x18191a1b1c1d1e1f],
+ [0xa573c29fa176c498, 0xa97fce93a572c09c],
+ [0x1651a8cd0244beda, 0x1a5da4c10640bade],
+ [0xae87dff00ff11b68, 0xa68ed5fb03fc1567],
+ [0x6de1f1486fa54f92, 0x75f8eb5373b8518d],
+ [0xc656827fc9a79917, 0x6f294cec6cd5598b],
+ [0x3de23a75524775e7, 0x27bf9eb45407cf39],
+ [0x0bdc905fc27b0948, 0xad5245a4c1871c2f],
+ [0x45f5a66017b2d387, 0x300d4d33640a820a],
+ [0x7ccff71cbeb4fe54, 0x13e6bbf0d261a7df],
+ [0xf01afafee7a82979, 0xd7a5644ab3afe640],
+ [0x2541fe719bf50025, 0x8813bbd55a721c0a],
+ [0x4e5a6699a9f24fe0, 0x7e572baacdf8cdea],
+ [0x24fc79ccbf0979e9, 0x371ac23c6d68de36],
+ ],
+ );
+
+ let enc_keys = expand(&[
+ 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77,
+ 0x81, 0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14,
+ 0xdf, 0xf4,
+ ])
+ .0;
+ check(
+ &enc_keys,
+ &[
+ [0x603deb1015ca71be, 0x2b73aef0857d7781],
+ [0x1f352c073b6108d7, 0x2d9810a30914dff4],
+ [0x9ba354118e6925af, 0xa51a8b5f2067fcde],
+ [0xa8b09c1a93d194cd, 0xbe49846eb75d5b9a],
+ [0xd59aecb85bf3c917, 0xfee94248de8ebe96],
+ [0xb5a9328a2678a647, 0x983122292f6c79b3],
+ [0x812c81addadf48ba, 0x24360af2fab8b464],
+ [0x98c5bfc9bebd198e, 0x268c3ba709e04214],
+ [0x68007bacb2df3316, 0x96e939e46c518d80],
+ [0xc814e20476a9fb8a, 0x5025c02d59c58239],
+ [0xde1369676ccc5a71, 0xfa2563959674ee15],
+ [0x5886ca5d2e2f31d7, 0x7e0af1fa27cf73c3],
+ [0x749c47ab18501dda, 0xe2757e4f7401905a],
+ [0xcafaaae3e4d59b34, 0x9adf6acebd10190d],
+ [0xfe4890d1e6188d0b, 0x046df344706c631e],
+ ],
+ );
+}
diff --git a/rust/vendor/aes/src/ni/ctr.rs b/rust/vendor/aes/src/ni/ctr.rs
new file mode 100644
index 0000000..cb78910
--- /dev/null
+++ b/rust/vendor/aes/src/ni/ctr.rs
@@ -0,0 +1,229 @@
+//! AES in counter mode (a.k.a. AES-CTR)
+
+// TODO(tarcieri): support generic CTR API
+
+#![allow(clippy::unreadable_literal)]
+
+use super::arch::*;
+use core::mem;
+
+use super::{Aes128, Aes192, Aes256};
+use crate::BLOCK_SIZE;
+use cipher::{
+ consts::U16,
+ errors::{LoopError, OverflowError},
+ generic_array::GenericArray,
+ BlockCipher, FromBlockCipher, SeekNum, StreamCipher, StreamCipherSeek,
+};
+
+const PAR_BLOCKS: usize = 8;
+const PAR_BLOCKS_SIZE: usize = PAR_BLOCKS * BLOCK_SIZE;
+
+#[inline(always)]
+pub fn xor(buf: &mut [u8], key: &[u8]) {
+ debug_assert_eq!(buf.len(), key.len());
+ for (a, b) in buf.iter_mut().zip(key) {
+ *a ^= *b;
+ }
+}
+
+#[inline(always)]
+fn xor_block8(buf: &mut [u8], ctr: [__m128i; 8]) {
+ debug_assert_eq!(buf.len(), PAR_BLOCKS_SIZE);
+
+ // Safety: `loadu` and `storeu` support unaligned access
+ #[allow(clippy::cast_ptr_alignment)]
+ unsafe {
+ // compiler should unroll this loop
+ for i in 0..8 {
+ let ptr = buf.as_mut_ptr().offset(16 * i) as *mut __m128i;
+ let data = _mm_loadu_si128(ptr);
+ let data = _mm_xor_si128(data, ctr[i as usize]);
+ _mm_storeu_si128(ptr, data);
+ }
+ }
+}
+
+#[inline(always)]
+fn swap_bytes(v: __m128i) -> __m128i {
+ unsafe {
+ let mask = _mm_set_epi64x(0x08090a0b0c0d0e0f, 0x0001020304050607);
+ _mm_shuffle_epi8(v, mask)
+ }
+}
+
+#[inline(always)]
+fn inc_be(v: __m128i) -> __m128i {
+ unsafe { _mm_add_epi64(v, _mm_set_epi64x(1, 0)) }
+}
+
+#[inline(always)]
+fn load(val: &GenericArray<u8, U16>) -> __m128i {
+ // Safety: `loadu` supports unaligned loads
+ #[allow(clippy::cast_ptr_alignment)]
+ unsafe {
+ _mm_loadu_si128(val.as_ptr() as *const __m128i)
+ }
+}
+
+macro_rules! impl_ctr {
+ ($name:ident, $cipher:ty, $doc:expr) => {
+ #[doc=$doc]
+ #[derive(Clone)]
+ #[cfg_attr(docsrs, doc(cfg(feature = "ctr")))]
+ pub struct $name {
+ nonce: __m128i,
+ ctr: __m128i,
+ cipher: $cipher,
+ block: [u8; BLOCK_SIZE],
+ pos: u8,
+ }
+
+ impl $name {
+ #[inline(always)]
+ fn gen_block(&mut self) {
+ let block = self.cipher.encrypt(swap_bytes(self.ctr));
+ self.block = unsafe { mem::transmute(block) }
+ }
+
+ #[inline(always)]
+ fn next_block(&mut self) -> __m128i {
+ let block = swap_bytes(self.ctr);
+ self.ctr = inc_be(self.ctr);
+ self.cipher.encrypt(block)
+ }
+
+ #[inline(always)]
+ fn next_block8(&mut self) -> [__m128i; 8] {
+ let mut ctr = self.ctr;
+ let mut block8: [__m128i; 8] = unsafe { mem::zeroed() };
+ for i in 0..8 {
+ block8[i] = swap_bytes(ctr);
+ ctr = inc_be(ctr);
+ }
+ self.ctr = ctr;
+
+ self.cipher.encrypt8(block8)
+ }
+
+ #[inline(always)]
+ fn get_u64_ctr(&self) -> u64 {
+ let (ctr, nonce) = unsafe {
+ (
+ mem::transmute::<__m128i, [u64; 2]>(self.ctr)[1],
+ mem::transmute::<__m128i, [u64; 2]>(self.nonce)[1],
+ )
+ };
+ ctr.wrapping_sub(nonce)
+ }
+
+ /// Check if provided data will not overflow counter
+ #[inline(always)]
+ fn check_data_len(&self, data: &[u8]) -> Result<(), LoopError> {
+ let bs = BLOCK_SIZE;
+ let leftover_bytes = bs - self.pos as usize;
+ if data.len() < leftover_bytes {
+ return Ok(());
+ }
+ let blocks = 1 + (data.len() - leftover_bytes) / bs;
+ self.get_u64_ctr()
+ .checked_add(blocks as u64)
+ .ok_or(LoopError)
+ .map(|_| ())
+ }
+ }
+
+ impl FromBlockCipher for $name {
+ type BlockCipher = $cipher;
+ type NonceSize = <$cipher as BlockCipher>::BlockSize;
+
+ fn from_block_cipher(
+ cipher: $cipher,
+ nonce: &GenericArray<u8, Self::NonceSize>,
+ ) -> Self {
+ let nonce = swap_bytes(load(nonce));
+ Self {
+ nonce,
+ ctr: nonce,
+ cipher,
+ block: [0u8; BLOCK_SIZE],
+ pos: 0,
+ }
+ }
+ }
+
+ impl StreamCipher for $name {
+ #[inline]
+ fn try_apply_keystream(&mut self, mut data: &mut [u8]) -> Result<(), LoopError> {
+ self.check_data_len(data)?;
+ let bs = BLOCK_SIZE;
+ let pos = self.pos as usize;
+ debug_assert!(bs > pos);
+
+ if pos != 0 {
+ if data.len() < bs - pos {
+ let n = pos + data.len();
+ xor(data, &self.block[pos..n]);
+ self.pos = n as u8;
+ return Ok(());
+ } else {
+ let (l, r) = data.split_at_mut(bs - pos);
+ data = r;
+ xor(l, &self.block[pos..]);
+ self.ctr = inc_be(self.ctr);
+ }
+ }
+
+ let mut chunks = data.chunks_exact_mut(PAR_BLOCKS_SIZE);
+ for chunk in &mut chunks {
+ xor_block8(chunk, self.next_block8());
+ }
+ data = chunks.into_remainder();
+
+ let mut chunks = data.chunks_exact_mut(bs);
+ for chunk in &mut chunks {
+ let block = self.next_block();
+
+ unsafe {
+ let t = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
+ let res = _mm_xor_si128(block, t);
+ _mm_storeu_si128(chunk.as_mut_ptr() as *mut __m128i, res);
+ }
+ }
+
+ let rem = chunks.into_remainder();
+ self.pos = rem.len() as u8;
+ if !rem.is_empty() {
+ self.gen_block();
+ for (a, b) in rem.iter_mut().zip(&self.block) {
+ *a ^= *b;
+ }
+ }
+
+ Ok(())
+ }
+ }
+
+ impl StreamCipherSeek for $name {
+ fn try_current_pos<T: SeekNum>(&self) -> Result<T, OverflowError> {
+ T::from_block_byte(self.get_u64_ctr(), self.pos, BLOCK_SIZE as u8)
+ }
+
+ fn try_seek<T: SeekNum>(&mut self, pos: T) -> Result<(), LoopError> {
+ let res: (u64, u8) = pos.to_block_byte(BLOCK_SIZE as u8)?;
+ self.ctr = unsafe { _mm_add_epi64(self.nonce, _mm_set_epi64x(res.0 as i64, 0)) };
+ self.pos = res.1;
+ if self.pos != 0 {
+ self.gen_block()
+ }
+ Ok(())
+ }
+ }
+
+ opaque_debug::implement!($name);
+ };
+}
+
+impl_ctr!(Aes128Ctr, Aes128, "AES-128 in CTR mode");
+impl_ctr!(Aes192Ctr, Aes192, "AES-192 in CTR mode");
+impl_ctr!(Aes256Ctr, Aes256, "AES-256 in CTR mode");
diff --git a/rust/vendor/aes/src/ni/hazmat.rs b/rust/vendor/aes/src/ni/hazmat.rs
new file mode 100644
index 0000000..5188ad7
--- /dev/null
+++ b/rust/vendor/aes/src/ni/hazmat.rs
@@ -0,0 +1,86 @@
+//! Low-level "hazmat" AES functions: AES-NI support.
+//!
+//! Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256`
+//! implementations in this crate, but instead provides raw AES-NI accelerated
+//! access to the AES round function gated under the `hazmat` crate feature.
+
+use super::{
+ arch::*,
+ utils::{load8, store8},
+};
+use crate::{Block, ParBlocks};
+
+/// AES cipher (encrypt) round function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn cipher_round(block: &mut Block, round_key: &Block) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ let b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+ let k = _mm_loadu_si128(round_key.as_ptr() as *const __m128i);
+ let out = _mm_aesenc_si128(b, k);
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, out);
+}
+
+/// AES cipher (encrypt) round function: parallel version.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ let xmm_keys = load8(round_keys);
+ let mut xmm_blocks = load8(blocks);
+
+ for i in 0..8 {
+ xmm_blocks[i] = _mm_aesenc_si128(xmm_blocks[i], xmm_keys[i]);
+ }
+
+ store8(blocks, xmm_blocks);
+}
+
+/// AES cipher (encrypt) round function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ let b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+ let k = _mm_loadu_si128(round_key.as_ptr() as *const __m128i);
+ let out = _mm_aesdec_si128(b, k);
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, out);
+}
+
+/// AES cipher (encrypt) round function: parallel version.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn equiv_inv_cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ let xmm_keys = load8(round_keys);
+ let mut xmm_blocks = load8(blocks);
+
+ for i in 0..8 {
+ xmm_blocks[i] = _mm_aesdec_si128(xmm_blocks[i], xmm_keys[i]);
+ }
+
+ store8(blocks, xmm_blocks);
+}
+
+/// AES mix columns function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn mix_columns(block: &mut Block) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ let mut state = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+
+ // Emulate mix columns by performing three inverse mix columns operations
+ state = _mm_aesimc_si128(state);
+ state = _mm_aesimc_si128(state);
+ state = _mm_aesimc_si128(state);
+
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, state);
+}
+
+/// AES inverse mix columns function.
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "aes")]
+pub(crate) unsafe fn inv_mix_columns(block: &mut Block) {
+ // Safety: `loadu` and `storeu` support unaligned access
+ let b = _mm_loadu_si128(block.as_ptr() as *const __m128i);
+ let out = _mm_aesimc_si128(b);
+ _mm_storeu_si128(block.as_mut_ptr() as *mut __m128i, out);
+}
diff --git a/rust/vendor/aes/src/ni/utils.rs b/rust/vendor/aes/src/ni/utils.rs
new file mode 100644
index 0000000..1fc3403
--- /dev/null
+++ b/rust/vendor/aes/src/ni/utils.rs
@@ -0,0 +1,90 @@
+//! Utility functions
+
+// TODO(tarcieri): check performance impact / generated assembly changes
+#![allow(clippy::needless_range_loop)]
+
+use super::arch::*;
+use crate::ParBlocks;
+
+pub type U128x8 = [__m128i; 8];
+
+#[cfg(test)]
+pub(crate) fn check(a: &[__m128i], b: &[[u64; 2]]) {
+ for (v1, v2) in a.iter().zip(b) {
+ let t1: [u64; 2] = unsafe { core::mem::transmute(*v1) };
+ let t2 = [v2[0].to_be(), v2[1].to_be()];
+ assert_eq!(t1, t2);
+ }
+}
+
+#[inline(always)]
+pub(crate) fn load8(blocks: &ParBlocks) -> U128x8 {
+ unsafe {
+ [
+ _mm_loadu_si128(blocks[0].as_ptr() as *const __m128i),
+ _mm_loadu_si128(blocks[1].as_ptr() as *const __m128i),
+ _mm_loadu_si128(blocks[2].as_ptr() as *const __m128i),
+ _mm_loadu_si128(blocks[3].as_ptr() as *const __m128i),
+ _mm_loadu_si128(blocks[4].as_ptr() as *const __m128i),
+ _mm_loadu_si128(blocks[5].as_ptr() as *const __m128i),
+ _mm_loadu_si128(blocks[6].as_ptr() as *const __m128i),
+ _mm_loadu_si128(blocks[7].as_ptr() as *const __m128i),
+ ]
+ }
+}
+
+#[inline(always)]
+pub(crate) fn store8(blocks: &mut ParBlocks, b: U128x8) {
+ unsafe {
+ _mm_storeu_si128(blocks[0].as_mut_ptr() as *mut __m128i, b[0]);
+ _mm_storeu_si128(blocks[1].as_mut_ptr() as *mut __m128i, b[1]);
+ _mm_storeu_si128(blocks[2].as_mut_ptr() as *mut __m128i, b[2]);
+ _mm_storeu_si128(blocks[3].as_mut_ptr() as *mut __m128i, b[3]);
+ _mm_storeu_si128(blocks[4].as_mut_ptr() as *mut __m128i, b[4]);
+ _mm_storeu_si128(blocks[5].as_mut_ptr() as *mut __m128i, b[5]);
+ _mm_storeu_si128(blocks[6].as_mut_ptr() as *mut __m128i, b[6]);
+ _mm_storeu_si128(blocks[7].as_mut_ptr() as *mut __m128i, b[7]);
+ }
+}
+
+#[inline(always)]
+pub(crate) fn xor8(b: &mut U128x8, key: __m128i) {
+ unsafe {
+ b[0] = _mm_xor_si128(b[0], key);
+ b[1] = _mm_xor_si128(b[1], key);
+ b[2] = _mm_xor_si128(b[2], key);
+ b[3] = _mm_xor_si128(b[3], key);
+ b[4] = _mm_xor_si128(b[4], key);
+ b[5] = _mm_xor_si128(b[5], key);
+ b[6] = _mm_xor_si128(b[6], key);
+ b[7] = _mm_xor_si128(b[7], key);
+ }
+}
+
+#[inline(always)]
+pub(crate) fn aesenc8(buffer: &mut U128x8, key: __m128i) {
+ for i in 0..8 {
+ buffer[i] = unsafe { _mm_aesenc_si128(buffer[i], key) };
+ }
+}
+
+#[inline(always)]
+pub(crate) fn aesenclast8(buffer: &mut U128x8, key: __m128i) {
+ for i in 0..8 {
+ buffer[i] = unsafe { _mm_aesenclast_si128(buffer[i], key) };
+ }
+}
+
+#[inline(always)]
+pub(crate) fn aesdec8(buffer: &mut U128x8, key: __m128i) {
+ for i in 0..8 {
+ buffer[i] = unsafe { _mm_aesdec_si128(buffer[i], key) };
+ }
+}
+
+#[inline(always)]
+pub(crate) fn aesdeclast8(buffer: &mut U128x8, key: __m128i) {
+ for i in 0..8 {
+ buffer[i] = unsafe { _mm_aesdeclast_si128(buffer[i], key) };
+ }
+}
diff --git a/rust/vendor/aes/src/soft.rs b/rust/vendor/aes/src/soft.rs
new file mode 100644
index 0000000..1b51d22
--- /dev/null
+++ b/rust/vendor/aes/src/soft.rs
@@ -0,0 +1,127 @@
+//! AES block cipher constant-time implementation.
+//!
+//! The implementation uses a technique called [fixslicing][1], an improved
+//! form of bitslicing which represents ciphers in a way which enables
+//! very efficient constant-time implementations in software.
+//!
+//! [1]: https://eprint.iacr.org/2020/1123.pdf
+
+#![deny(unsafe_code)]
+
+#[cfg_attr(not(target_pointer_width = "64"), path = "soft/fixslice32.rs")]
+#[cfg_attr(target_pointer_width = "64", path = "soft/fixslice64.rs")]
+pub(crate) mod fixslice;
+
+#[cfg(feature = "ctr")]
+mod ctr;
+
+#[cfg(feature = "ctr")]
+pub use self::ctr::{Aes128Ctr, Aes192Ctr, Aes256Ctr};
+
+use crate::{Block, ParBlocks};
+use cipher::{
+ consts::{U16, U24, U32, U8},
+ generic_array::GenericArray,
+ BlockCipher, BlockDecrypt, BlockEncrypt, NewBlockCipher,
+};
+use fixslice::{FixsliceKeys128, FixsliceKeys192, FixsliceKeys256, FIXSLICE_BLOCKS};
+
+macro_rules! define_aes_impl {
+ (
+ $name:ident,
+ $key_size:ty,
+ $fixslice_keys:ty,
+ $fixslice_key_schedule:path,
+ $fixslice_decrypt:path,
+ $fixslice_encrypt:path,
+ $doc:expr
+ ) => {
+ #[doc=$doc]
+ #[derive(Clone)]
+ pub struct $name {
+ keys: $fixslice_keys,
+ }
+
+ impl NewBlockCipher for $name {
+ type KeySize = $key_size;
+
+ #[inline]
+ fn new(key: &GenericArray<u8, $key_size>) -> Self {
+ Self {
+ keys: $fixslice_key_schedule(key),
+ }
+ }
+ }
+
+ impl BlockCipher for $name {
+ type BlockSize = U16;
+ type ParBlocks = U8;
+ }
+
+ impl BlockEncrypt for $name {
+ #[inline]
+ fn encrypt_block(&self, block: &mut Block) {
+ let mut blocks = [Block::default(); FIXSLICE_BLOCKS];
+ blocks[0].copy_from_slice(block);
+ $fixslice_encrypt(&self.keys, &mut blocks);
+ block.copy_from_slice(&blocks[0]);
+ }
+
+ #[inline]
+ fn encrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ for chunk in blocks.chunks_mut(FIXSLICE_BLOCKS) {
+ $fixslice_encrypt(&self.keys, chunk);
+ }
+ }
+ }
+
+ impl BlockDecrypt for $name {
+ #[inline]
+ fn decrypt_block(&self, block: &mut Block) {
+ let mut blocks = [Block::default(); FIXSLICE_BLOCKS];
+ blocks[0].copy_from_slice(block);
+ $fixslice_decrypt(&self.keys, &mut blocks);
+ block.copy_from_slice(&blocks[0]);
+ }
+
+ #[inline]
+ fn decrypt_par_blocks(&self, blocks: &mut ParBlocks) {
+ for chunk in blocks.chunks_mut(FIXSLICE_BLOCKS) {
+ $fixslice_decrypt(&self.keys, chunk);
+ }
+ }
+ }
+
+ opaque_debug::implement!($name);
+ };
+}
+
+define_aes_impl!(
+ Aes128,
+ U16,
+ FixsliceKeys128,
+ fixslice::aes128_key_schedule,
+ fixslice::aes128_decrypt,
+ fixslice::aes128_encrypt,
+ "AES-128 block cipher instance"
+);
+
+define_aes_impl!(
+ Aes192,
+ U24,
+ FixsliceKeys192,
+ fixslice::aes192_key_schedule,
+ fixslice::aes192_decrypt,
+ fixslice::aes192_encrypt,
+ "AES-192 block cipher instance"
+);
+
+define_aes_impl!(
+ Aes256,
+ U32,
+ FixsliceKeys256,
+ fixslice::aes256_key_schedule,
+ fixslice::aes256_decrypt,
+ fixslice::aes256_encrypt,
+ "AES-256 block cipher instance"
+);
diff --git a/rust/vendor/aes/src/soft/ctr.rs b/rust/vendor/aes/src/soft/ctr.rs
new file mode 100644
index 0000000..a288ab1
--- /dev/null
+++ b/rust/vendor/aes/src/soft/ctr.rs
@@ -0,0 +1,17 @@
+//! AES in counter mode (a.k.a. AES-CTR)
+
+// TODO(tarcieri): support generic CTR API
+
+use super::{Aes128, Aes192, Aes256};
+
+/// AES-128 in CTR mode
+#[cfg_attr(docsrs, doc(cfg(feature = "ctr")))]
+pub type Aes128Ctr = ::ctr::Ctr64BE<Aes128>;
+
+/// AES-192 in CTR mode
+#[cfg_attr(docsrs, doc(cfg(feature = "ctr")))]
+pub type Aes192Ctr = ::ctr::Ctr64BE<Aes192>;
+
+/// AES-256 in CTR mode
+#[cfg_attr(docsrs, doc(cfg(feature = "ctr")))]
+pub type Aes256Ctr = ::ctr::Ctr64BE<Aes256>;
diff --git a/rust/vendor/aes/src/soft/fixslice32.rs b/rust/vendor/aes/src/soft/fixslice32.rs
new file mode 100644
index 0000000..5dc4834
--- /dev/null
+++ b/rust/vendor/aes/src/soft/fixslice32.rs
@@ -0,0 +1,1485 @@
+//! Fixsliced implementations of AES-128, AES-192 and AES-256 (32-bit)
+//! adapted from the C implementation
+//!
+//! All implementations are fully bitsliced and do not rely on any
+//! Look-Up Table (LUT).
+//!
+//! See the paper at <https://eprint.iacr.org/2020/1123.pdf> for more details.
+//!
+//! # Author (original C code)
+//!
+//! Alexandre Adomnicai, Nanyang Technological University, Singapore
+//! <alexandre.adomnicai@ntu.edu.sg>
+//!
+//! Originally licensed MIT. Relicensed as Apache 2.0+MIT with permission.
+
+#![allow(clippy::unreadable_literal)]
+
+use crate::Block;
+use cipher::{
+ consts::{U16, U24, U32},
+ generic_array::GenericArray,
+};
+use core::convert::TryInto;
+
+/// AES block batch size for this implementation
+pub(crate) const FIXSLICE_BLOCKS: usize = 2;
+
+/// AES-128 round keys
+pub(crate) type FixsliceKeys128 = [u32; 88];
+
+/// AES-192 round keys
+pub(crate) type FixsliceKeys192 = [u32; 104];
+
+/// AES-256 round keys
+pub(crate) type FixsliceKeys256 = [u32; 120];
+
+/// 256-bit internal state
+pub(crate) type State = [u32; 8];
+
+/// Fully bitsliced AES-128 key schedule to match the fully-fixsliced representation.
+pub(crate) fn aes128_key_schedule(key: &GenericArray<u8, U16>) -> FixsliceKeys128 {
+ let mut rkeys = [0u32; 88];
+
+ bitslice(&mut rkeys[..8], key, key);
+
+ let mut rk_off = 0;
+ for rcon in 0..10 {
+ memshift32(&mut rkeys, rk_off);
+ rk_off += 8;
+
+ sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
+ sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
+
+ if rcon < 8 {
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon);
+ } else {
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 8);
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 7);
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 5);
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 4);
+ }
+
+ xor_columns(&mut rkeys, rk_off, 8, ror_distance(1, 3));
+ }
+
+ // Adjust to match fixslicing format
+ #[cfg(feature = "compact")]
+ {
+ for i in (8..88).step_by(16) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ }
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ for i in (8..72).step_by(32) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]);
+ inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]);
+ }
+ inv_shift_rows_1(&mut rkeys[72..80]);
+ }
+
+ // Account for NOTs removed from sub_bytes
+ for i in 1..11 {
+ sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
+ }
+
+ rkeys
+}
+
+/// Fully bitsliced AES-192 key schedule to match the fully-fixsliced representation.
+pub(crate) fn aes192_key_schedule(key: &GenericArray<u8, U24>) -> FixsliceKeys192 {
+ let mut rkeys = [0u32; 104];
+ let mut tmp = [0u32; 8];
+
+ bitslice(&mut rkeys[..8], &key[..16], &key[..16]);
+ bitslice(&mut tmp, &key[8..], &key[8..]);
+
+ let mut rcon = 0;
+ let mut rk_off = 8;
+
+ loop {
+ for i in 0..8 {
+ rkeys[rk_off + i] =
+ (0x0f0f0f0f & (tmp[i] >> 4)) | (0xf0f0f0f0 & (rkeys[(rk_off - 8) + i] << 4));
+ }
+
+ sub_bytes(&mut tmp);
+ sub_bytes_nots(&mut tmp);
+
+ add_round_constant_bit(&mut tmp, rcon);
+ rcon += 1;
+
+ for i in 0..8 {
+ let mut ti = rkeys[rk_off + i];
+ ti ^= 0x30303030 & ror(tmp[i], ror_distance(1, 1));
+ ti ^= 0xc0c0c0c0 & (ti << 2);
+ tmp[i] = ti;
+ }
+ rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp);
+ rk_off += 8;
+
+ for i in 0..8 {
+ let ui = tmp[i];
+ let mut ti = (0x0f0f0f0f & (rkeys[(rk_off - 16) + i] >> 4)) | (0xf0f0f0f0 & (ui << 4));
+ ti ^= 0x03030303 & (ui >> 6);
+ tmp[i] =
+ ti ^ (0xfcfcfcfc & (ti << 2)) ^ (0xf0f0f0f0 & (ti << 4)) ^ (0xc0c0c0c0 & (ti << 6));
+ }
+ rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp);
+ rk_off += 8;
+
+ sub_bytes(&mut tmp);
+ sub_bytes_nots(&mut tmp);
+
+ add_round_constant_bit(&mut tmp, rcon);
+ rcon += 1;
+
+ for i in 0..8 {
+ let mut ti = (0x0f0f0f0f & (rkeys[(rk_off - 16) + i] >> 4))
+ | (0xf0f0f0f0 & (rkeys[(rk_off - 8) + i] << 4));
+ ti ^= 0x03030303 & ror(tmp[i], ror_distance(1, 3));
+ rkeys[rk_off + i] =
+ ti ^ (0xfcfcfcfc & (ti << 2)) ^ (0xf0f0f0f0 & (ti << 4)) ^ (0xc0c0c0c0 & (ti << 6));
+ }
+ rk_off += 8;
+
+ if rcon >= 8 {
+ break;
+ }
+
+ for i in 0..8 {
+ let ui = rkeys[(rk_off - 8) + i];
+ let mut ti = rkeys[(rk_off - 16) + i];
+ ti ^= 0x30303030 & (ui >> 2);
+ ti ^= 0xc0c0c0c0 & (ti << 2);
+ tmp[i] = ti;
+ }
+ }
+
+ // Adjust to match fixslicing format
+ #[cfg(feature = "compact")]
+ {
+ for i in (8..104).step_by(16) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ }
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ for i in (0..96).step_by(32) {
+ inv_shift_rows_1(&mut rkeys[(i + 8)..(i + 16)]);
+ inv_shift_rows_2(&mut rkeys[(i + 16)..(i + 24)]);
+ inv_shift_rows_3(&mut rkeys[(i + 24)..(i + 32)]);
+ }
+ }
+
+ // Account for NOTs removed from sub_bytes
+ for i in 1..13 {
+ sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
+ }
+
+ rkeys
+}
+
+/// Fully bitsliced AES-256 key schedule to match the fully-fixsliced representation.
+pub(crate) fn aes256_key_schedule(key: &GenericArray<u8, U32>) -> FixsliceKeys256 {
+ let mut rkeys = [0u32; 120];
+
+ bitslice(&mut rkeys[..8], &key[..16], &key[..16]);
+ bitslice(&mut rkeys[8..16], &key[16..], &key[16..]);
+
+ let mut rk_off = 8;
+
+ let mut rcon = 0;
+ loop {
+ memshift32(&mut rkeys, rk_off);
+ rk_off += 8;
+
+ sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
+ sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
+
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon);
+ xor_columns(&mut rkeys, rk_off, 16, ror_distance(1, 3));
+ rcon += 1;
+
+ if rcon == 7 {
+ break;
+ }
+
+ memshift32(&mut rkeys, rk_off);
+ rk_off += 8;
+
+ sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
+ sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
+
+ xor_columns(&mut rkeys, rk_off, 16, ror_distance(0, 3));
+ }
+
+ // Adjust to match fixslicing format
+ #[cfg(feature = "compact")]
+ {
+ for i in (8..120).step_by(16) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ }
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ for i in (8..104).step_by(32) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]);
+ inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]);
+ }
+ inv_shift_rows_1(&mut rkeys[104..112]);
+ }
+
+ // Account for NOTs removed from sub_bytes
+ for i in 1..15 {
+ sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
+ }
+
+ rkeys
+}
+
+/// Fully-fixsliced AES-128 decryption (the InvShiftRows is completely omitted).
+///
+/// Decrypts four blocks in-place and in parallel.
+pub(crate) fn aes128_decrypt(rkeys: &FixsliceKeys128, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1]);
+
+ add_round_key(&mut state, &rkeys[80..]);
+ inv_sub_bytes(&mut state);
+
+ #[cfg(not(feature = "compact"))]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+
+ let mut rk_off = 72;
+ loop {
+ #[cfg(feature = "compact")]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_1(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ if rk_off == 0 {
+ break;
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_0(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ #[cfg(not(feature = "compact"))]
+ {
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_3(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_2(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+ }
+ }
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-128 encryption (the ShiftRows is completely omitted).
+///
+/// Encrypts four blocks in-place and in parallel.
+pub(crate) fn aes128_encrypt(rkeys: &FixsliceKeys128, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1]);
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ let mut rk_off = 8;
+ loop {
+ sub_bytes(&mut state);
+ mix_columns_1(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ #[cfg(feature = "compact")]
+ {
+ shift_rows_2(&mut state);
+ }
+
+ if rk_off == 80 {
+ break;
+ }
+
+ #[cfg(not(feature = "compact"))]
+ {
+ sub_bytes(&mut state);
+ mix_columns_2(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ sub_bytes(&mut state);
+ mix_columns_3(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ sub_bytes(&mut state);
+ mix_columns_0(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ #[cfg(not(feature = "compact"))]
+ {
+ shift_rows_2(&mut state);
+ }
+
+ sub_bytes(&mut state);
+ add_round_key(&mut state, &rkeys[80..]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-192 decryption (the InvShiftRows is completely omitted).
+///
+/// Decrypts four blocks in-place and in parallel.
+pub(crate) fn aes192_decrypt(rkeys: &FixsliceKeys192, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1]);
+
+ add_round_key(&mut state, &rkeys[96..]);
+ inv_sub_bytes(&mut state);
+
+ let mut rk_off = 88;
+ loop {
+ #[cfg(feature = "compact")]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_3(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_2(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_1(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ if rk_off == 0 {
+ break;
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_0(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+ }
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-192 encryption (the ShiftRows is completely omitted).
+///
+/// Encrypts four blocks in-place and in parallel.
+pub(crate) fn aes192_encrypt(rkeys: &FixsliceKeys192, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1]);
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ let mut rk_off = 8;
+ loop {
+ sub_bytes(&mut state);
+ mix_columns_1(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ #[cfg(feature = "compact")]
+ {
+ shift_rows_2(&mut state);
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ sub_bytes(&mut state);
+ mix_columns_2(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ sub_bytes(&mut state);
+ mix_columns_3(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ if rk_off == 96 {
+ break;
+ }
+
+ sub_bytes(&mut state);
+ mix_columns_0(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ sub_bytes(&mut state);
+ add_round_key(&mut state, &rkeys[96..]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-256 decryption (the InvShiftRows is completely omitted).
+///
+/// Decrypts four blocks in-place and in parallel.
+pub(crate) fn aes256_decrypt(rkeys: &FixsliceKeys256, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1]);
+
+ add_round_key(&mut state, &rkeys[112..]);
+ inv_sub_bytes(&mut state);
+
+ #[cfg(not(feature = "compact"))]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+
+ let mut rk_off = 104;
+ loop {
+ #[cfg(feature = "compact")]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_1(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ if rk_off == 0 {
+ break;
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_0(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ #[cfg(not(feature = "compact"))]
+ {
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_3(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_2(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+ }
+ }
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-256 encryption (the ShiftRows is completely omitted).
+///
+/// Encrypts four blocks in-place and in parallel.
+pub(crate) fn aes256_encrypt(rkeys: &FixsliceKeys256, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1]);
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ let mut rk_off = 8;
+ loop {
+ sub_bytes(&mut state);
+ mix_columns_1(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ #[cfg(feature = "compact")]
+ {
+ shift_rows_2(&mut state);
+ }
+
+ if rk_off == 112 {
+ break;
+ }
+
+ #[cfg(not(feature = "compact"))]
+ {
+ sub_bytes(&mut state);
+ mix_columns_2(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ sub_bytes(&mut state);
+ mix_columns_3(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ sub_bytes(&mut state);
+ mix_columns_0(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ #[cfg(not(feature = "compact"))]
+ {
+ shift_rows_2(&mut state);
+ }
+
+ sub_bytes(&mut state);
+ add_round_key(&mut state, &rkeys[112..]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Note that the 4 bitwise NOT (^= 0xffffffff) are accounted for here so that it is a true
+/// inverse of 'sub_bytes'.
+fn inv_sub_bytes(state: &mut [u32]) {
+ debug_assert_eq!(state.len(), 8);
+
+ // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler
+ // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4)
+
+ let u7 = state[0];
+ let u6 = state[1];
+ let u5 = state[2];
+ let u4 = state[3];
+ let u3 = state[4];
+ let u2 = state[5];
+ let u1 = state[6];
+ let u0 = state[7];
+
+ let t23 = u0 ^ u3;
+ let t8 = u1 ^ t23;
+ let m2 = t23 & t8;
+ let t4 = u4 ^ t8;
+ let t22 = u1 ^ u3;
+ let t2 = u0 ^ u1;
+ let t1 = u3 ^ u4;
+ // t23 -> stack
+ let t9 = u7 ^ t1;
+ // t8 -> stack
+ let m7 = t22 & t9;
+ // t9 -> stack
+ let t24 = u4 ^ u7;
+ // m7 -> stack
+ let t10 = t2 ^ t24;
+ // u4 -> stack
+ let m14 = t2 & t10;
+ let r5 = u6 ^ u7;
+ // m2 -> stack
+ let t3 = t1 ^ r5;
+ // t2 -> stack
+ let t13 = t2 ^ r5;
+ let t19 = t22 ^ r5;
+ // t3 -> stack
+ let t17 = u2 ^ t19;
+ // t4 -> stack
+ let t25 = u2 ^ t1;
+ let r13 = u1 ^ u6;
+ // t25 -> stack
+ let t20 = t24 ^ r13;
+ // t17 -> stack
+ let m9 = t20 & t17;
+ // t20 -> stack
+ let r17 = u2 ^ u5;
+ // t22 -> stack
+ let t6 = t22 ^ r17;
+ // t13 -> stack
+ let m1 = t13 & t6;
+ let y5 = u0 ^ r17;
+ let m4 = t19 & y5;
+ let m5 = m4 ^ m1;
+ let m17 = m5 ^ t24;
+ let r18 = u5 ^ u6;
+ let t27 = t1 ^ r18;
+ let t15 = t10 ^ t27;
+ // t6 -> stack
+ let m11 = t1 & t15;
+ let m15 = m14 ^ m11;
+ let m21 = m17 ^ m15;
+ // t1 -> stack
+ // t4 <- stack
+ let m12 = t4 & t27;
+ let m13 = m12 ^ m11;
+ let t14 = t10 ^ r18;
+ let m3 = t14 ^ m1;
+ // m2 <- stack
+ let m16 = m3 ^ m2;
+ let m20 = m16 ^ m13;
+ // u4 <- stack
+ let r19 = u2 ^ u4;
+ let t16 = r13 ^ r19;
+ // t3 <- stack
+ let t26 = t3 ^ t16;
+ let m6 = t3 & t16;
+ let m8 = t26 ^ m6;
+ // t10 -> stack
+ // m7 <- stack
+ let m18 = m8 ^ m7;
+ let m22 = m18 ^ m13;
+ let m25 = m22 & m20;
+ let m26 = m21 ^ m25;
+ let m10 = m9 ^ m6;
+ let m19 = m10 ^ m15;
+ // t25 <- stack
+ let m23 = m19 ^ t25;
+ let m28 = m23 ^ m25;
+ let m24 = m22 ^ m23;
+ let m30 = m26 & m24;
+ let m39 = m23 ^ m30;
+ let m48 = m39 & y5;
+ let m57 = m39 & t19;
+ // m48 -> stack
+ let m36 = m24 ^ m25;
+ let m31 = m20 & m23;
+ let m27 = m20 ^ m21;
+ let m32 = m27 & m31;
+ let m29 = m28 & m27;
+ let m37 = m21 ^ m29;
+ // m39 -> stack
+ let m42 = m37 ^ m39;
+ let m52 = m42 & t15;
+ // t27 -> stack
+ // t1 <- stack
+ let m61 = m42 & t1;
+ let p0 = m52 ^ m61;
+ let p16 = m57 ^ m61;
+ // m57 -> stack
+ // t20 <- stack
+ let m60 = m37 & t20;
+ // p16 -> stack
+ // t17 <- stack
+ let m51 = m37 & t17;
+ let m33 = m27 ^ m25;
+ let m38 = m32 ^ m33;
+ let m43 = m37 ^ m38;
+ let m49 = m43 & t16;
+ let p6 = m49 ^ m60;
+ let p13 = m49 ^ m51;
+ let m58 = m43 & t3;
+ // t9 <- stack
+ let m50 = m38 & t9;
+ // t22 <- stack
+ let m59 = m38 & t22;
+ // p6 -> stack
+ let p1 = m58 ^ m59;
+ let p7 = p0 ^ p1;
+ let m34 = m21 & m22;
+ let m35 = m24 & m34;
+ let m40 = m35 ^ m36;
+ let m41 = m38 ^ m40;
+ let m45 = m42 ^ m41;
+ // t27 <- stack
+ let m53 = m45 & t27;
+ let p8 = m50 ^ m53;
+ let p23 = p7 ^ p8;
+ // t4 <- stack
+ let m62 = m45 & t4;
+ let p14 = m49 ^ m62;
+ let s6 = p14 ^ p23;
+ // t10 <- stack
+ let m54 = m41 & t10;
+ let p2 = m54 ^ m62;
+ let p22 = p2 ^ p7;
+ let s0 = p13 ^ p22;
+ let p17 = m58 ^ p2;
+ let p15 = m54 ^ m59;
+ // t2 <- stack
+ let m63 = m41 & t2;
+ // m39 <- stack
+ let m44 = m39 ^ m40;
+ // p17 -> stack
+ // t6 <- stack
+ let m46 = m44 & t6;
+ let p5 = m46 ^ m51;
+ // p23 -> stack
+ let p18 = m63 ^ p5;
+ let p24 = p5 ^ p7;
+ // m48 <- stack
+ let p12 = m46 ^ m48;
+ let s3 = p12 ^ p22;
+ // t13 <- stack
+ let m55 = m44 & t13;
+ let p9 = m55 ^ m63;
+ // p16 <- stack
+ let s7 = p9 ^ p16;
+ // t8 <- stack
+ let m47 = m40 & t8;
+ let p3 = m47 ^ m50;
+ let p19 = p2 ^ p3;
+ let s5 = p19 ^ p24;
+ let p11 = p0 ^ p3;
+ let p26 = p9 ^ p11;
+ // t23 <- stack
+ let m56 = m40 & t23;
+ let p4 = m48 ^ m56;
+ // p6 <- stack
+ let p20 = p4 ^ p6;
+ let p29 = p15 ^ p20;
+ let s1 = p26 ^ p29;
+ // m57 <- stack
+ let p10 = m57 ^ p4;
+ let p27 = p10 ^ p18;
+ // p23 <- stack
+ let s4 = p23 ^ p27;
+ let p25 = p6 ^ p10;
+ let p28 = p11 ^ p25;
+ // p17 <- stack
+ let s2 = p17 ^ p28;
+
+ state[0] = s7;
+ state[1] = s6;
+ state[2] = s5;
+ state[3] = s4;
+ state[4] = s3;
+ state[5] = s2;
+ state[6] = s1;
+ state[7] = s0;
+}
+
+/// Bitsliced implementation of the AES Sbox based on Boyar, Peralta and Calik.
+///
+/// See: <http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt>
+///
+/// Note that the 4 bitwise NOT (^= 0xffffffff) are moved to the key schedule.
+fn sub_bytes(state: &mut [u32]) {
+ debug_assert_eq!(state.len(), 8);
+
+ // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler
+ // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4)
+
+ let u7 = state[0];
+ let u6 = state[1];
+ let u5 = state[2];
+ let u4 = state[3];
+ let u3 = state[4];
+ let u2 = state[5];
+ let u1 = state[6];
+ let u0 = state[7];
+
+ let y14 = u3 ^ u5;
+ let y13 = u0 ^ u6;
+ let y12 = y13 ^ y14;
+ let t1 = u4 ^ y12;
+ let y15 = t1 ^ u5;
+ let t2 = y12 & y15;
+ let y6 = y15 ^ u7;
+ let y20 = t1 ^ u1;
+ // y12 -> stack
+ let y9 = u0 ^ u3;
+ // y20 -> stack
+ let y11 = y20 ^ y9;
+ // y9 -> stack
+ let t12 = y9 & y11;
+ // y6 -> stack
+ let y7 = u7 ^ y11;
+ let y8 = u0 ^ u5;
+ let t0 = u1 ^ u2;
+ let y10 = y15 ^ t0;
+ // y15 -> stack
+ let y17 = y10 ^ y11;
+ // y14 -> stack
+ let t13 = y14 & y17;
+ let t14 = t13 ^ t12;
+ // y17 -> stack
+ let y19 = y10 ^ y8;
+ // y10 -> stack
+ let t15 = y8 & y10;
+ let t16 = t15 ^ t12;
+ let y16 = t0 ^ y11;
+ // y11 -> stack
+ let y21 = y13 ^ y16;
+ // y13 -> stack
+ let t7 = y13 & y16;
+ // y16 -> stack
+ let y18 = u0 ^ y16;
+ let y1 = t0 ^ u7;
+ let y4 = y1 ^ u3;
+ // u7 -> stack
+ let t5 = y4 & u7;
+ let t6 = t5 ^ t2;
+ let t18 = t6 ^ t16;
+ let t22 = t18 ^ y19;
+ let y2 = y1 ^ u0;
+ let t10 = y2 & y7;
+ let t11 = t10 ^ t7;
+ let t20 = t11 ^ t16;
+ let t24 = t20 ^ y18;
+ let y5 = y1 ^ u6;
+ let t8 = y5 & y1;
+ let t9 = t8 ^ t7;
+ let t19 = t9 ^ t14;
+ let t23 = t19 ^ y21;
+ let y3 = y5 ^ y8;
+ // y6 <- stack
+ let t3 = y3 & y6;
+ let t4 = t3 ^ t2;
+ // y20 <- stack
+ let t17 = t4 ^ y20;
+ let t21 = t17 ^ t14;
+ let t26 = t21 & t23;
+ let t27 = t24 ^ t26;
+ let t31 = t22 ^ t26;
+ let t25 = t21 ^ t22;
+ // y4 -> stack
+ let t28 = t25 & t27;
+ let t29 = t28 ^ t22;
+ let z14 = t29 & y2;
+ let z5 = t29 & y7;
+ let t30 = t23 ^ t24;
+ let t32 = t31 & t30;
+ let t33 = t32 ^ t24;
+ let t35 = t27 ^ t33;
+ let t36 = t24 & t35;
+ let t38 = t27 ^ t36;
+ let t39 = t29 & t38;
+ let t40 = t25 ^ t39;
+ let t43 = t29 ^ t40;
+ // y16 <- stack
+ let z3 = t43 & y16;
+ let tc12 = z3 ^ z5;
+ // tc12 -> stack
+ // y13 <- stack
+ let z12 = t43 & y13;
+ let z13 = t40 & y5;
+ let z4 = t40 & y1;
+ let tc6 = z3 ^ z4;
+ let t34 = t23 ^ t33;
+ let t37 = t36 ^ t34;
+ let t41 = t40 ^ t37;
+ // y10 <- stack
+ let z8 = t41 & y10;
+ let z17 = t41 & y8;
+ let t44 = t33 ^ t37;
+ // y15 <- stack
+ let z0 = t44 & y15;
+ // z17 -> stack
+ // y12 <- stack
+ let z9 = t44 & y12;
+ let z10 = t37 & y3;
+ let z1 = t37 & y6;
+ let tc5 = z1 ^ z0;
+ let tc11 = tc6 ^ tc5;
+ // y4 <- stack
+ let z11 = t33 & y4;
+ let t42 = t29 ^ t33;
+ let t45 = t42 ^ t41;
+ // y17 <- stack
+ let z7 = t45 & y17;
+ let tc8 = z7 ^ tc6;
+ // y14 <- stack
+ let z16 = t45 & y14;
+ // y11 <- stack
+ let z6 = t42 & y11;
+ let tc16 = z6 ^ tc8;
+ // z14 -> stack
+ // y9 <- stack
+ let z15 = t42 & y9;
+ let tc20 = z15 ^ tc16;
+ let tc1 = z15 ^ z16;
+ let tc2 = z10 ^ tc1;
+ let tc21 = tc2 ^ z11;
+ let tc3 = z9 ^ tc2;
+ let s0 = tc3 ^ tc16;
+ let s3 = tc3 ^ tc11;
+ let s1 = s3 ^ tc16;
+ let tc13 = z13 ^ tc1;
+ // u7 <- stack
+ let z2 = t33 & u7;
+ let tc4 = z0 ^ z2;
+ let tc7 = z12 ^ tc4;
+ let tc9 = z8 ^ tc7;
+ let tc10 = tc8 ^ tc9;
+ // z14 <- stack
+ let tc17 = z14 ^ tc10;
+ let s5 = tc21 ^ tc17;
+ let tc26 = tc17 ^ tc20;
+ // z17 <- stack
+ let s2 = tc26 ^ z17;
+ // tc12 <- stack
+ let tc14 = tc4 ^ tc12;
+ let tc18 = tc13 ^ tc14;
+ let s6 = tc10 ^ tc18;
+ let s7 = z12 ^ tc18;
+ let s4 = tc14 ^ s3;
+
+ state[0] = s7;
+ state[1] = s6;
+ state[2] = s5;
+ state[3] = s4;
+ state[4] = s3;
+ state[5] = s2;
+ state[6] = s1;
+ state[7] = s0;
+}
+
+/// NOT operations that are omitted in S-box
+#[inline]
+fn sub_bytes_nots(state: &mut [u32]) {
+ debug_assert_eq!(state.len(), 8);
+ state[0] ^= 0xffffffff;
+ state[1] ^= 0xffffffff;
+ state[5] ^= 0xffffffff;
+ state[6] ^= 0xffffffff;
+}
+
+/// Computation of the MixColumns transformation in the fixsliced representation, with different
+/// rotations used according to the round number mod 4.
+///
+/// Based on Käsper-Schwabe, similar to https://github.com/Ko-/aes-armcortexm.
+macro_rules! define_mix_columns {
+ (
+ $name:ident,
+ $name_inv:ident,
+ $first_rotate:path,
+ $second_rotate:path
+ ) => {
+ #[rustfmt::skip]
+ fn $name(state: &mut State) {
+ let (a0, a1, a2, a3, a4, a5, a6, a7) = (
+ state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]
+ );
+ let (b0, b1, b2, b3, b4, b5, b6, b7) = (
+ $first_rotate(a0),
+ $first_rotate(a1),
+ $first_rotate(a2),
+ $first_rotate(a3),
+ $first_rotate(a4),
+ $first_rotate(a5),
+ $first_rotate(a6),
+ $first_rotate(a7),
+ );
+ let (c0, c1, c2, c3, c4, c5, c6, c7) = (
+ a0 ^ b0,
+ a1 ^ b1,
+ a2 ^ b2,
+ a3 ^ b3,
+ a4 ^ b4,
+ a5 ^ b5,
+ a6 ^ b6,
+ a7 ^ b7,
+ );
+ state[0] = b0 ^ c7 ^ $second_rotate(c0);
+ state[1] = b1 ^ c0 ^ c7 ^ $second_rotate(c1);
+ state[2] = b2 ^ c1 ^ $second_rotate(c2);
+ state[3] = b3 ^ c2 ^ c7 ^ $second_rotate(c3);
+ state[4] = b4 ^ c3 ^ c7 ^ $second_rotate(c4);
+ state[5] = b5 ^ c4 ^ $second_rotate(c5);
+ state[6] = b6 ^ c5 ^ $second_rotate(c6);
+ state[7] = b7 ^ c6 ^ $second_rotate(c7);
+ }
+
+ #[rustfmt::skip]
+ fn $name_inv(state: &mut State) {
+ let (a0, a1, a2, a3, a4, a5, a6, a7) = (
+ state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]
+ );
+ let (b0, b1, b2, b3, b4, b5, b6, b7) = (
+ $first_rotate(a0),
+ $first_rotate(a1),
+ $first_rotate(a2),
+ $first_rotate(a3),
+ $first_rotate(a4),
+ $first_rotate(a5),
+ $first_rotate(a6),
+ $first_rotate(a7),
+ );
+ let (c0, c1, c2, c3, c4, c5, c6, c7) = (
+ a0 ^ b0,
+ a1 ^ b1,
+ a2 ^ b2,
+ a3 ^ b3,
+ a4 ^ b4,
+ a5 ^ b5,
+ a6 ^ b6,
+ a7 ^ b7,
+ );
+ let (d0, d1, d2, d3, d4, d5, d6, d7) = (
+ a0 ^ c7,
+ a1 ^ c0 ^ c7,
+ a2 ^ c1,
+ a3 ^ c2 ^ c7,
+ a4 ^ c3 ^ c7,
+ a5 ^ c4,
+ a6 ^ c5,
+ a7 ^ c6,
+ );
+ let (e0, e1, e2, e3, e4, e5, e6, e7) = (
+ c0 ^ d6,
+ c1 ^ d6 ^ d7,
+ c2 ^ d0 ^ d7,
+ c3 ^ d1 ^ d6,
+ c4 ^ d2 ^ d6 ^ d7,
+ c5 ^ d3 ^ d7,
+ c6 ^ d4,
+ c7 ^ d5,
+ );
+ state[0] = d0 ^ e0 ^ $second_rotate(e0);
+ state[1] = d1 ^ e1 ^ $second_rotate(e1);
+ state[2] = d2 ^ e2 ^ $second_rotate(e2);
+ state[3] = d3 ^ e3 ^ $second_rotate(e3);
+ state[4] = d4 ^ e4 ^ $second_rotate(e4);
+ state[5] = d5 ^ e5 ^ $second_rotate(e5);
+ state[6] = d6 ^ e6 ^ $second_rotate(e6);
+ state[7] = d7 ^ e7 ^ $second_rotate(e7);
+ }
+ }
+}
+
+define_mix_columns!(
+ mix_columns_0,
+ inv_mix_columns_0,
+ rotate_rows_1,
+ rotate_rows_2
+);
+
+define_mix_columns!(
+ mix_columns_1,
+ inv_mix_columns_1,
+ rotate_rows_and_columns_1_1,
+ rotate_rows_and_columns_2_2
+);
+
+#[cfg(not(feature = "compact"))]
+define_mix_columns!(
+ mix_columns_2,
+ inv_mix_columns_2,
+ rotate_rows_and_columns_1_2,
+ rotate_rows_2
+);
+
+#[cfg(not(feature = "compact"))]
+define_mix_columns!(
+ mix_columns_3,
+ inv_mix_columns_3,
+ rotate_rows_and_columns_1_3,
+ rotate_rows_and_columns_2_2
+);
+
+#[inline]
+fn delta_swap_1(a: &mut u32, shift: u32, mask: u32) {
+ let t = (*a ^ ((*a) >> shift)) & mask;
+ *a ^= t ^ (t << shift);
+}
+
+#[inline]
+fn delta_swap_2(a: &mut u32, b: &mut u32, shift: u32, mask: u32) {
+ let t = (*a ^ ((*b) >> shift)) & mask;
+ *a ^= t;
+ *b ^= t << shift;
+}
+
+/// Applies ShiftRows once on an AES state (or key).
+#[cfg(any(not(feature = "compact"), feature = "hazmat"))]
+#[inline]
+fn shift_rows_1(state: &mut [u32]) {
+ debug_assert_eq!(state.len(), 8);
+ for x in state.iter_mut() {
+ delta_swap_1(x, 4, 0x0c0f0300);
+ delta_swap_1(x, 2, 0x33003300);
+ }
+}
+
+/// Applies ShiftRows twice on an AES state (or key).
+#[inline]
+fn shift_rows_2(state: &mut [u32]) {
+ debug_assert_eq!(state.len(), 8);
+ for x in state.iter_mut() {
+ delta_swap_1(x, 4, 0x0f000f00);
+ }
+}
+
+/// Applies ShiftRows three times on an AES state (or key).
+#[inline]
+fn shift_rows_3(state: &mut [u32]) {
+ debug_assert_eq!(state.len(), 8);
+ for x in state.iter_mut() {
+ delta_swap_1(x, 4, 0x030f0c00);
+ delta_swap_1(x, 2, 0x33003300);
+ }
+}
+
+#[inline(always)]
+fn inv_shift_rows_1(state: &mut [u32]) {
+ shift_rows_3(state);
+}
+
+#[inline(always)]
+fn inv_shift_rows_2(state: &mut [u32]) {
+ shift_rows_2(state);
+}
+
+#[cfg(not(feature = "compact"))]
+#[inline(always)]
+fn inv_shift_rows_3(state: &mut [u32]) {
+ shift_rows_1(state);
+}
+
+/// XOR the columns after the S-box during the key schedule round function.
+///
+/// The `idx_xor` parameter refers to the index of the previous round key that is
+/// involved in the XOR computation (should be 8 and 16 for AES-128 and AES-256,
+/// respectively).
+///
+/// The `idx_ror` parameter refers to the rotation value, which varies between the
+/// different key schedules.
+fn xor_columns(rkeys: &mut [u32], offset: usize, idx_xor: usize, idx_ror: u32) {
+ for i in 0..8 {
+ let off_i = offset + i;
+ let rk = rkeys[off_i - idx_xor] ^ (0x03030303 & ror(rkeys[off_i], idx_ror));
+ rkeys[off_i] =
+ rk ^ (0xfcfcfcfc & (rk << 2)) ^ (0xf0f0f0f0 & (rk << 4)) ^ (0xc0c0c0c0 & (rk << 6));
+ }
+}
+
+/// Bitslice two 128-bit input blocks input0, input1 into a 256-bit internal state.
+fn bitslice(output: &mut [u32], input0: &[u8], input1: &[u8]) {
+ debug_assert_eq!(output.len(), 8);
+ debug_assert_eq!(input0.len(), 16);
+ debug_assert_eq!(input1.len(), 16);
+
+ // Bitslicing is a bit index manipulation. 256 bits of data means each bit is positioned at an
+ // 8-bit index. AES data is 2 blocks, each one a 4x4 column-major matrix of bytes, so the
+ // index is initially ([b]lock, [c]olumn, [r]ow, [p]osition):
+ // b0 c1 c0 r1 r0 p2 p1 p0
+ //
+ // The desired bitsliced data groups first by bit position, then row, column, block:
+ // p2 p1 p0 r1 r0 c1 c0 b0
+
+ // Interleave the columns on input (note the order of input)
+ // b0 c1 c0 __ __ __ __ __ => c1 c0 b0 __ __ __ __ __
+ let mut t0 = u32::from_le_bytes(input0[0x00..0x04].try_into().unwrap());
+ let mut t2 = u32::from_le_bytes(input0[0x04..0x08].try_into().unwrap());
+ let mut t4 = u32::from_le_bytes(input0[0x08..0x0c].try_into().unwrap());
+ let mut t6 = u32::from_le_bytes(input0[0x0c..0x10].try_into().unwrap());
+ let mut t1 = u32::from_le_bytes(input1[0x00..0x04].try_into().unwrap());
+ let mut t3 = u32::from_le_bytes(input1[0x04..0x08].try_into().unwrap());
+ let mut t5 = u32::from_le_bytes(input1[0x08..0x0c].try_into().unwrap());
+ let mut t7 = u32::from_le_bytes(input1[0x0c..0x10].try_into().unwrap());
+
+ // Bit Index Swap 5 <-> 0:
+ // __ __ b0 __ __ __ __ p0 => __ __ p0 __ __ __ __ b0
+ let m0 = 0x55555555;
+ delta_swap_2(&mut t1, &mut t0, 1, m0);
+ delta_swap_2(&mut t3, &mut t2, 1, m0);
+ delta_swap_2(&mut t5, &mut t4, 1, m0);
+ delta_swap_2(&mut t7, &mut t6, 1, m0);
+
+ // Bit Index Swap 6 <-> 1:
+ // __ c0 __ __ __ __ p1 __ => __ p1 __ __ __ __ c0 __
+ let m1 = 0x33333333;
+ delta_swap_2(&mut t2, &mut t0, 2, m1);
+ delta_swap_2(&mut t3, &mut t1, 2, m1);
+ delta_swap_2(&mut t6, &mut t4, 2, m1);
+ delta_swap_2(&mut t7, &mut t5, 2, m1);
+
+ // Bit Index Swap 7 <-> 2:
+ // c1 __ __ __ __ p2 __ __ => p2 __ __ __ __ c1 __ __
+ let m2 = 0x0f0f0f0f;
+ delta_swap_2(&mut t4, &mut t0, 4, m2);
+ delta_swap_2(&mut t5, &mut t1, 4, m2);
+ delta_swap_2(&mut t6, &mut t2, 4, m2);
+ delta_swap_2(&mut t7, &mut t3, 4, m2);
+
+ // Final bitsliced bit index, as desired:
+ // p2 p1 p0 r1 r0 c1 c0 b0
+ output[0] = t0;
+ output[1] = t1;
+ output[2] = t2;
+ output[3] = t3;
+ output[4] = t4;
+ output[5] = t5;
+ output[6] = t6;
+ output[7] = t7;
+}
+
+/// Un-bitslice a 256-bit internal state into two 128-bit blocks of output.
+fn inv_bitslice(input: &[u32], output: &mut [Block]) {
+ debug_assert_eq!(input.len(), 8);
+ debug_assert_eq!(output.len(), 2);
+
+ // Unbitslicing is a bit index manipulation. 256 bits of data means each bit is positioned at
+ // an 8-bit index. AES data is 2 blocks, each one a 4x4 column-major matrix of bytes, so the
+ // desired index for the output is ([b]lock, [c]olumn, [r]ow, [p]osition):
+ // b0 c1 c0 r1 r0 p2 p1 p0
+ //
+ // The initially bitsliced data groups first by bit position, then row, column, block:
+ // p2 p1 p0 r1 r0 c1 c0 b0
+
+ let mut t0 = input[0];
+ let mut t1 = input[1];
+ let mut t2 = input[2];
+ let mut t3 = input[3];
+ let mut t4 = input[4];
+ let mut t5 = input[5];
+ let mut t6 = input[6];
+ let mut t7 = input[7];
+
+ // TODO: these bit index swaps are identical to those in 'packing'
+
+ // Bit Index Swap 5 <-> 0:
+ // __ __ p0 __ __ __ __ b0 => __ __ b0 __ __ __ __ p0
+ let m0 = 0x55555555;
+ delta_swap_2(&mut t1, &mut t0, 1, m0);
+ delta_swap_2(&mut t3, &mut t2, 1, m0);
+ delta_swap_2(&mut t5, &mut t4, 1, m0);
+ delta_swap_2(&mut t7, &mut t6, 1, m0);
+
+ // Bit Index Swap 6 <-> 1:
+ // __ p1 __ __ __ __ c0 __ => __ c0 __ __ __ __ p1 __
+ let m1 = 0x33333333;
+ delta_swap_2(&mut t2, &mut t0, 2, m1);
+ delta_swap_2(&mut t3, &mut t1, 2, m1);
+ delta_swap_2(&mut t6, &mut t4, 2, m1);
+ delta_swap_2(&mut t7, &mut t5, 2, m1);
+
+ // Bit Index Swap 7 <-> 2:
+ // p2 __ __ __ __ c1 __ __ => c1 __ __ __ __ p2 __ __
+ let m2 = 0x0f0f0f0f;
+ delta_swap_2(&mut t4, &mut t0, 4, m2);
+ delta_swap_2(&mut t5, &mut t1, 4, m2);
+ delta_swap_2(&mut t6, &mut t2, 4, m2);
+ delta_swap_2(&mut t7, &mut t3, 4, m2);
+
+ // De-interleave the columns on output (note the order of output)
+ // c1 c0 b0 __ __ __ __ __ => b0 c1 c0 __ __ __ __ __
+ output[0][0x00..0x04].copy_from_slice(&t0.to_le_bytes());
+ output[0][0x04..0x08].copy_from_slice(&t2.to_le_bytes());
+ output[0][0x08..0x0c].copy_from_slice(&t4.to_le_bytes());
+ output[0][0x0c..0x10].copy_from_slice(&t6.to_le_bytes());
+ output[1][0x00..0x04].copy_from_slice(&t1.to_le_bytes());
+ output[1][0x04..0x08].copy_from_slice(&t3.to_le_bytes());
+ output[1][0x08..0x0c].copy_from_slice(&t5.to_le_bytes());
+ output[1][0x0c..0x10].copy_from_slice(&t7.to_le_bytes());
+
+ // Final AES bit index, as desired:
+ // b0 c1 c0 r1 r0 p2 p1 p0
+}
+
+/// Copy 32-bytes within the provided slice to an 8-byte offset
+fn memshift32(buffer: &mut [u32], src_offset: usize) {
+ debug_assert_eq!(src_offset % 8, 0);
+
+ let dst_offset = src_offset + 8;
+ debug_assert!(dst_offset + 8 <= buffer.len());
+
+ for i in (0..8).rev() {
+ buffer[dst_offset + i] = buffer[src_offset + i];
+ }
+}
+
+/// XOR the round key to the internal state. The round keys are expected to be
+/// pre-computed and to be packed in the fixsliced representation.
+#[inline]
+fn add_round_key(state: &mut State, rkey: &[u32]) {
+ debug_assert_eq!(rkey.len(), 8);
+ for (a, b) in state.iter_mut().zip(rkey) {
+ *a ^= b;
+ }
+}
+
+#[inline(always)]
+fn add_round_constant_bit(state: &mut [u32], bit: usize) {
+ state[bit] ^= 0x0000c000;
+}
+
+#[inline(always)]
+fn ror(x: u32, y: u32) -> u32 {
+ x.rotate_right(y)
+}
+
+#[inline(always)]
+fn ror_distance(rows: u32, cols: u32) -> u32 {
+ (rows << 3) + (cols << 1)
+}
+
+#[inline(always)]
+fn rotate_rows_1(x: u32) -> u32 {
+ ror(x, ror_distance(1, 0))
+}
+
+#[inline(always)]
+fn rotate_rows_2(x: u32) -> u32 {
+ ror(x, ror_distance(2, 0))
+}
+
+#[inline(always)]
+#[rustfmt::skip]
+fn rotate_rows_and_columns_1_1(x: u32) -> u32 {
+ (ror(x, ror_distance(1, 1)) & 0x3f3f3f3f) |
+ (ror(x, ror_distance(0, 1)) & 0xc0c0c0c0)
+}
+
+#[cfg(not(feature = "compact"))]
+#[inline(always)]
+#[rustfmt::skip]
+fn rotate_rows_and_columns_1_2(x: u32) -> u32 {
+ (ror(x, ror_distance(1, 2)) & 0x0f0f0f0f) |
+ (ror(x, ror_distance(0, 2)) & 0xf0f0f0f0)
+}
+
+#[cfg(not(feature = "compact"))]
+#[inline(always)]
+#[rustfmt::skip]
+fn rotate_rows_and_columns_1_3(x: u32) -> u32 {
+ (ror(x, ror_distance(1, 3)) & 0x03030303) |
+ (ror(x, ror_distance(0, 3)) & 0xfcfcfcfc)
+}
+
+#[inline(always)]
+#[rustfmt::skip]
+fn rotate_rows_and_columns_2_2(x: u32) -> u32 {
+ (ror(x, ror_distance(2, 2)) & 0x0f0f0f0f) |
+ (ror(x, ror_distance(1, 2)) & 0xf0f0f0f0)
+}
+
+/// Low-level "hazmat" AES functions.
+///
+/// Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256`
+/// implementations in this crate, but instead provides raw access to
+/// the AES round function gated under the `hazmat` crate feature.
+#[cfg(feature = "hazmat")]
+pub(crate) mod hazmat {
+ use super::{
+ bitslice, inv_bitslice, inv_mix_columns_0, inv_shift_rows_1, inv_sub_bytes, mix_columns_0,
+ shift_rows_1, sub_bytes, sub_bytes_nots, State,
+ };
+ use crate::{Block, ParBlocks};
+
+ /// XOR the `src` block into the `dst` block in-place.
+ fn xor_in_place(dst: &mut Block, src: &Block) {
+ for (a, b) in dst.iter_mut().zip(src.as_slice()) {
+ *a ^= *b;
+ }
+ }
+
+ /// Perform a bitslice operation, loading a single block.
+ fn bitslice_block(block: &Block) -> State {
+ let mut state = State::default();
+ bitslice(&mut state, block, block);
+ state
+ }
+
+ /// Perform an inverse bitslice operation, extracting a single block.
+ fn inv_bitslice_block(block: &mut Block, state: &State) {
+ let mut out = [Block::default(); 2];
+ inv_bitslice(state, &mut out);
+ block.copy_from_slice(&out[0]);
+ }
+
+ /// AES cipher (encrypt) round function.
+ #[inline]
+ pub(crate) fn cipher_round(block: &mut Block, round_key: &Block) {
+ let mut state = bitslice_block(block);
+ sub_bytes(&mut state);
+ sub_bytes_nots(&mut state);
+ shift_rows_1(&mut state);
+ mix_columns_0(&mut state);
+ inv_bitslice_block(block, &state);
+ xor_in_place(block, round_key);
+ }
+
+ /// AES cipher (encrypt) round function: parallel version.
+ #[inline]
+ pub(crate) fn cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ for (chunk, keys) in blocks.chunks_exact_mut(2).zip(round_keys.chunks_exact(2)) {
+ let mut state = State::default();
+ bitslice(&mut state, &chunk[0], &chunk[1]);
+ sub_bytes(&mut state);
+ sub_bytes_nots(&mut state);
+ shift_rows_1(&mut state);
+ mix_columns_0(&mut state);
+ inv_bitslice(&state, chunk);
+
+ for i in 0..2 {
+ xor_in_place(&mut chunk[i], &keys[i]);
+ }
+ }
+ }
+
+ /// AES cipher (encrypt) round function.
+ #[inline]
+ pub(crate) fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) {
+ let mut state = bitslice_block(block);
+ sub_bytes_nots(&mut state);
+ inv_sub_bytes(&mut state);
+ inv_shift_rows_1(&mut state);
+ inv_mix_columns_0(&mut state);
+ inv_bitslice_block(block, &state);
+ xor_in_place(block, round_key);
+ }
+
+ /// AES cipher (encrypt) round function: parallel version.
+ #[inline]
+ pub(crate) fn equiv_inv_cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ for (chunk, keys) in blocks.chunks_exact_mut(2).zip(round_keys.chunks_exact(2)) {
+ let mut state = State::default();
+ bitslice(&mut state, &chunk[0], &chunk[1]);
+ sub_bytes_nots(&mut state);
+ inv_sub_bytes(&mut state);
+ inv_shift_rows_1(&mut state);
+ inv_mix_columns_0(&mut state);
+ inv_bitslice(&state, chunk);
+
+ for i in 0..2 {
+ xor_in_place(&mut chunk[i], &keys[i]);
+ }
+ }
+ }
+
+ /// AES mix columns function.
+ #[inline]
+ pub(crate) fn mix_columns(block: &mut Block) {
+ let mut state = bitslice_block(block);
+ mix_columns_0(&mut state);
+ inv_bitslice_block(block, &state);
+ }
+
+ /// AES inverse mix columns function.
+ #[inline]
+ pub(crate) fn inv_mix_columns(block: &mut Block) {
+ let mut state = bitslice_block(block);
+ inv_mix_columns_0(&mut state);
+ inv_bitslice_block(block, &state);
+ }
+}
diff --git a/rust/vendor/aes/src/soft/fixslice64.rs b/rust/vendor/aes/src/soft/fixslice64.rs
new file mode 100644
index 0000000..18315b7
--- /dev/null
+++ b/rust/vendor/aes/src/soft/fixslice64.rs
@@ -0,0 +1,1540 @@
+//! Fixsliced implementations of AES-128, AES-192 and AES-256 (64-bit)
+//! adapted from the C implementation.
+//!
+//! All implementations are fully bitsliced and do not rely on any
+//! Look-Up Table (LUT).
+//!
+//! See the paper at <https://eprint.iacr.org/2020/1123.pdf> for more details.
+//!
+//! # Author (original C code)
+//!
+//! Alexandre Adomnicai, Nanyang Technological University, Singapore
+//! <alexandre.adomnicai@ntu.edu.sg>
+//!
+//! Originally licensed MIT. Relicensed as Apache 2.0+MIT with permission.
+
+#![allow(clippy::unreadable_literal)]
+
+use crate::Block;
+use cipher::{
+ consts::{U16, U24, U32},
+ generic_array::GenericArray,
+};
+
+/// AES block batch size for this implementation
+pub(crate) const FIXSLICE_BLOCKS: usize = 4;
+
+/// AES-128 round keys
+pub(crate) type FixsliceKeys128 = [u64; 88];
+
+/// AES-192 round keys
+pub(crate) type FixsliceKeys192 = [u64; 104];
+
+/// AES-256 round keys
+pub(crate) type FixsliceKeys256 = [u64; 120];
+
+/// 512-bit internal state
+pub(crate) type State = [u64; 8];
+
+/// Fully bitsliced AES-128 key schedule to match the fully-fixsliced representation.
+pub(crate) fn aes128_key_schedule(key: &GenericArray<u8, U16>) -> FixsliceKeys128 {
+ let mut rkeys = [0u64; 88];
+
+ bitslice(&mut rkeys[..8], key, key, key, key);
+
+ let mut rk_off = 0;
+ for rcon in 0..10 {
+ memshift32(&mut rkeys, rk_off);
+ rk_off += 8;
+
+ sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
+ sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
+
+ if rcon < 8 {
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon);
+ } else {
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 8);
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 7);
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 5);
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 4);
+ }
+
+ xor_columns(&mut rkeys, rk_off, 8, ror_distance(1, 3));
+ }
+
+ // Adjust to match fixslicing format
+ #[cfg(feature = "compact")]
+ {
+ for i in (8..88).step_by(16) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ }
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ for i in (8..72).step_by(32) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]);
+ inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]);
+ }
+ inv_shift_rows_1(&mut rkeys[72..80]);
+ }
+
+ // Account for NOTs removed from sub_bytes
+ for i in 1..11 {
+ sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
+ }
+
+ rkeys
+}
+
+/// Fully bitsliced AES-192 key schedule to match the fully-fixsliced representation.
+pub(crate) fn aes192_key_schedule(key: &GenericArray<u8, U24>) -> FixsliceKeys192 {
+ let mut rkeys = [0u64; 104];
+ let mut tmp = [0u64; 8];
+
+ bitslice(
+ &mut rkeys[..8],
+ &key[..16],
+ &key[..16],
+ &key[..16],
+ &key[..16],
+ );
+ bitslice(&mut tmp, &key[8..], &key[8..], &key[8..], &key[8..]);
+
+ let mut rcon = 0;
+ let mut rk_off = 8;
+
+ loop {
+ for i in 0..8 {
+ rkeys[rk_off + i] = (0x00ff00ff00ff00ff & (tmp[i] >> 8))
+ | (0xff00ff00ff00ff00 & (rkeys[(rk_off - 8) + i] << 8));
+ }
+
+ sub_bytes(&mut tmp);
+ sub_bytes_nots(&mut tmp);
+
+ add_round_constant_bit(&mut tmp, rcon);
+ rcon += 1;
+
+ for i in 0..8 {
+ let mut ti = rkeys[rk_off + i];
+ ti ^= 0x0f000f000f000f00 & ror(tmp[i], ror_distance(1, 1));
+ ti ^= 0xf000f000f000f000 & (ti << 4);
+ tmp[i] = ti;
+ }
+ rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp);
+ rk_off += 8;
+
+ for i in 0..8 {
+ let ui = tmp[i];
+ let mut ti = (0x00ff00ff00ff00ff & (rkeys[(rk_off - 16) + i] >> 8))
+ | (0xff00ff00ff00ff00 & (ui << 8));
+ ti ^= 0x000f000f000f000f & (ui >> 12);
+ tmp[i] = ti
+ ^ (0xfff0fff0fff0fff0 & (ti << 4))
+ ^ (0xff00ff00ff00ff00 & (ti << 8))
+ ^ (0xf000f000f000f000 & (ti << 12));
+ }
+ rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp);
+ rk_off += 8;
+
+ sub_bytes(&mut tmp);
+ sub_bytes_nots(&mut tmp);
+
+ add_round_constant_bit(&mut tmp, rcon);
+ rcon += 1;
+
+ for i in 0..8 {
+ let mut ti = (0x00ff00ff00ff00ff & (rkeys[(rk_off - 16) + i] >> 8))
+ | (0xff00ff00ff00ff00 & (rkeys[(rk_off - 8) + i] << 8));
+ ti ^= 0x000f000f000f000f & ror(tmp[i], ror_distance(1, 3));
+ rkeys[rk_off + i] = ti
+ ^ (0xfff0fff0fff0fff0 & (ti << 4))
+ ^ (0xff00ff00ff00ff00 & (ti << 8))
+ ^ (0xf000f000f000f000 & (ti << 12));
+ }
+ rk_off += 8;
+
+ if rcon >= 8 {
+ break;
+ }
+
+ for i in 0..8 {
+ let ui = rkeys[(rk_off - 8) + i];
+ let mut ti = rkeys[(rk_off - 16) + i];
+ ti ^= 0x0f000f000f000f00 & (ui >> 4);
+ ti ^= 0xf000f000f000f000 & (ti << 4);
+ tmp[i] = ti;
+ }
+ }
+
+ // Adjust to match fixslicing format
+ #[cfg(feature = "compact")]
+ {
+ for i in (8..104).step_by(16) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ }
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ for i in (0..96).step_by(32) {
+ inv_shift_rows_1(&mut rkeys[(i + 8)..(i + 16)]);
+ inv_shift_rows_2(&mut rkeys[(i + 16)..(i + 24)]);
+ inv_shift_rows_3(&mut rkeys[(i + 24)..(i + 32)]);
+ }
+ }
+
+ // Account for NOTs removed from sub_bytes
+ for i in 1..13 {
+ sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
+ }
+
+ rkeys
+}
+
+/// Fully bitsliced AES-256 key schedule to match the fully-fixsliced representation.
+pub(crate) fn aes256_key_schedule(key: &GenericArray<u8, U32>) -> FixsliceKeys256 {
+ let mut rkeys = [0u64; 120];
+
+ bitslice(
+ &mut rkeys[..8],
+ &key[..16],
+ &key[..16],
+ &key[..16],
+ &key[..16],
+ );
+ bitslice(
+ &mut rkeys[8..16],
+ &key[16..],
+ &key[16..],
+ &key[16..],
+ &key[16..],
+ );
+
+ let mut rk_off = 8;
+
+ let mut rcon = 0;
+ loop {
+ memshift32(&mut rkeys, rk_off);
+ rk_off += 8;
+
+ sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
+ sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
+
+ add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon);
+ xor_columns(&mut rkeys, rk_off, 16, ror_distance(1, 3));
+ rcon += 1;
+
+ if rcon == 7 {
+ break;
+ }
+
+ memshift32(&mut rkeys, rk_off);
+ rk_off += 8;
+
+ sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]);
+ sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]);
+
+ xor_columns(&mut rkeys, rk_off, 16, ror_distance(0, 3));
+ }
+
+ // Adjust to match fixslicing format
+ #[cfg(feature = "compact")]
+ {
+ for i in (8..120).step_by(16) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ }
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ for i in (8..104).step_by(32) {
+ inv_shift_rows_1(&mut rkeys[i..(i + 8)]);
+ inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]);
+ inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]);
+ }
+ inv_shift_rows_1(&mut rkeys[104..112]);
+ }
+
+ // Account for NOTs removed from sub_bytes
+ for i in 1..15 {
+ sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]);
+ }
+
+ rkeys
+}
+
+/// Fully-fixsliced AES-128 decryption (the InvShiftRows is completely omitted).
+///
+/// Decrypts four blocks in-place and in parallel.
+pub(crate) fn aes128_decrypt(rkeys: &FixsliceKeys128, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
+
+ add_round_key(&mut state, &rkeys[80..]);
+ inv_sub_bytes(&mut state);
+
+ #[cfg(not(feature = "compact"))]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+
+ let mut rk_off = 72;
+ loop {
+ #[cfg(feature = "compact")]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_1(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ if rk_off == 0 {
+ break;
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_0(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ #[cfg(not(feature = "compact"))]
+ {
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_3(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_2(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+ }
+ }
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-128 encryption (the ShiftRows is completely omitted).
+///
+/// Encrypts four blocks in-place and in parallel.
+pub(crate) fn aes128_encrypt(rkeys: &FixsliceKeys128, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ let mut rk_off = 8;
+ loop {
+ sub_bytes(&mut state);
+ mix_columns_1(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ #[cfg(feature = "compact")]
+ {
+ shift_rows_2(&mut state);
+ }
+
+ if rk_off == 80 {
+ break;
+ }
+
+ #[cfg(not(feature = "compact"))]
+ {
+ sub_bytes(&mut state);
+ mix_columns_2(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ sub_bytes(&mut state);
+ mix_columns_3(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ sub_bytes(&mut state);
+ mix_columns_0(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ #[cfg(not(feature = "compact"))]
+ {
+ shift_rows_2(&mut state);
+ }
+
+ sub_bytes(&mut state);
+ add_round_key(&mut state, &rkeys[80..]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-192 decryption (the InvShiftRows is completely omitted).
+///
+/// Decrypts four blocks in-place and in parallel.
+pub(crate) fn aes192_decrypt(rkeys: &FixsliceKeys192, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
+
+ add_round_key(&mut state, &rkeys[96..]);
+ inv_sub_bytes(&mut state);
+
+ let mut rk_off = 88;
+ loop {
+ #[cfg(feature = "compact")]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_3(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_2(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_1(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ if rk_off == 0 {
+ break;
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_0(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+ }
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-192 encryption (the ShiftRows is completely omitted).
+///
+/// Encrypts four blocks in-place and in parallel.
+pub(crate) fn aes192_encrypt(rkeys: &FixsliceKeys192, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ let mut rk_off = 8;
+ loop {
+ sub_bytes(&mut state);
+ mix_columns_1(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ #[cfg(feature = "compact")]
+ {
+ shift_rows_2(&mut state);
+ }
+ #[cfg(not(feature = "compact"))]
+ {
+ sub_bytes(&mut state);
+ mix_columns_2(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ sub_bytes(&mut state);
+ mix_columns_3(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ if rk_off == 96 {
+ break;
+ }
+
+ sub_bytes(&mut state);
+ mix_columns_0(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ sub_bytes(&mut state);
+ add_round_key(&mut state, &rkeys[96..]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-256 decryption (the InvShiftRows is completely omitted).
+///
+/// Decrypts four blocks in-place and in parallel.
+pub(crate) fn aes256_decrypt(rkeys: &FixsliceKeys256, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
+
+ add_round_key(&mut state, &rkeys[112..]);
+ inv_sub_bytes(&mut state);
+
+ #[cfg(not(feature = "compact"))]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+
+ let mut rk_off = 104;
+ loop {
+ #[cfg(feature = "compact")]
+ {
+ inv_shift_rows_2(&mut state);
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_1(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ if rk_off == 0 {
+ break;
+ }
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_0(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ #[cfg(not(feature = "compact"))]
+ {
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_3(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ inv_mix_columns_2(&mut state);
+ inv_sub_bytes(&mut state);
+ rk_off -= 8;
+ }
+ }
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Fully-fixsliced AES-256 encryption (the ShiftRows is completely omitted).
+///
+/// Encrypts four blocks in-place and in parallel.
+pub(crate) fn aes256_encrypt(rkeys: &FixsliceKeys256, blocks: &mut [Block]) {
+ debug_assert_eq!(blocks.len(), FIXSLICE_BLOCKS);
+ let mut state = State::default();
+
+ bitslice(&mut state, &blocks[0], &blocks[1], &blocks[2], &blocks[3]);
+
+ add_round_key(&mut state, &rkeys[..8]);
+
+ let mut rk_off = 8;
+ loop {
+ sub_bytes(&mut state);
+ mix_columns_1(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ #[cfg(feature = "compact")]
+ {
+ shift_rows_2(&mut state);
+ }
+
+ if rk_off == 112 {
+ break;
+ }
+
+ #[cfg(not(feature = "compact"))]
+ {
+ sub_bytes(&mut state);
+ mix_columns_2(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+
+ sub_bytes(&mut state);
+ mix_columns_3(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ sub_bytes(&mut state);
+ mix_columns_0(&mut state);
+ add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]);
+ rk_off += 8;
+ }
+
+ #[cfg(not(feature = "compact"))]
+ {
+ shift_rows_2(&mut state);
+ }
+
+ sub_bytes(&mut state);
+ add_round_key(&mut state, &rkeys[112..]);
+
+ inv_bitslice(&state, blocks);
+}
+
+/// Note that the 4 bitwise NOT (^= 0xffffffffffffffff) are accounted for here so that it is a true
+/// inverse of 'sub_bytes'.
+fn inv_sub_bytes(state: &mut [u64]) {
+ debug_assert_eq!(state.len(), 8);
+
+ // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler
+ // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4)
+
+ let u7 = state[0];
+ let u6 = state[1];
+ let u5 = state[2];
+ let u4 = state[3];
+ let u3 = state[4];
+ let u2 = state[5];
+ let u1 = state[6];
+ let u0 = state[7];
+
+ let t23 = u0 ^ u3;
+ let t8 = u1 ^ t23;
+ let m2 = t23 & t8;
+ let t4 = u4 ^ t8;
+ let t22 = u1 ^ u3;
+ let t2 = u0 ^ u1;
+ let t1 = u3 ^ u4;
+ // t23 -> stack
+ let t9 = u7 ^ t1;
+ // t8 -> stack
+ let m7 = t22 & t9;
+ // t9 -> stack
+ let t24 = u4 ^ u7;
+ // m7 -> stack
+ let t10 = t2 ^ t24;
+ // u4 -> stack
+ let m14 = t2 & t10;
+ let r5 = u6 ^ u7;
+ // m2 -> stack
+ let t3 = t1 ^ r5;
+ // t2 -> stack
+ let t13 = t2 ^ r5;
+ let t19 = t22 ^ r5;
+ // t3 -> stack
+ let t17 = u2 ^ t19;
+ // t4 -> stack
+ let t25 = u2 ^ t1;
+ let r13 = u1 ^ u6;
+ // t25 -> stack
+ let t20 = t24 ^ r13;
+ // t17 -> stack
+ let m9 = t20 & t17;
+ // t20 -> stack
+ let r17 = u2 ^ u5;
+ // t22 -> stack
+ let t6 = t22 ^ r17;
+ // t13 -> stack
+ let m1 = t13 & t6;
+ let y5 = u0 ^ r17;
+ let m4 = t19 & y5;
+ let m5 = m4 ^ m1;
+ let m17 = m5 ^ t24;
+ let r18 = u5 ^ u6;
+ let t27 = t1 ^ r18;
+ let t15 = t10 ^ t27;
+ // t6 -> stack
+ let m11 = t1 & t15;
+ let m15 = m14 ^ m11;
+ let m21 = m17 ^ m15;
+ // t1 -> stack
+ // t4 <- stack
+ let m12 = t4 & t27;
+ let m13 = m12 ^ m11;
+ let t14 = t10 ^ r18;
+ let m3 = t14 ^ m1;
+ // m2 <- stack
+ let m16 = m3 ^ m2;
+ let m20 = m16 ^ m13;
+ // u4 <- stack
+ let r19 = u2 ^ u4;
+ let t16 = r13 ^ r19;
+ // t3 <- stack
+ let t26 = t3 ^ t16;
+ let m6 = t3 & t16;
+ let m8 = t26 ^ m6;
+ // t10 -> stack
+ // m7 <- stack
+ let m18 = m8 ^ m7;
+ let m22 = m18 ^ m13;
+ let m25 = m22 & m20;
+ let m26 = m21 ^ m25;
+ let m10 = m9 ^ m6;
+ let m19 = m10 ^ m15;
+ // t25 <- stack
+ let m23 = m19 ^ t25;
+ let m28 = m23 ^ m25;
+ let m24 = m22 ^ m23;
+ let m30 = m26 & m24;
+ let m39 = m23 ^ m30;
+ let m48 = m39 & y5;
+ let m57 = m39 & t19;
+ // m48 -> stack
+ let m36 = m24 ^ m25;
+ let m31 = m20 & m23;
+ let m27 = m20 ^ m21;
+ let m32 = m27 & m31;
+ let m29 = m28 & m27;
+ let m37 = m21 ^ m29;
+ // m39 -> stack
+ let m42 = m37 ^ m39;
+ let m52 = m42 & t15;
+ // t27 -> stack
+ // t1 <- stack
+ let m61 = m42 & t1;
+ let p0 = m52 ^ m61;
+ let p16 = m57 ^ m61;
+ // m57 -> stack
+ // t20 <- stack
+ let m60 = m37 & t20;
+ // p16 -> stack
+ // t17 <- stack
+ let m51 = m37 & t17;
+ let m33 = m27 ^ m25;
+ let m38 = m32 ^ m33;
+ let m43 = m37 ^ m38;
+ let m49 = m43 & t16;
+ let p6 = m49 ^ m60;
+ let p13 = m49 ^ m51;
+ let m58 = m43 & t3;
+ // t9 <- stack
+ let m50 = m38 & t9;
+ // t22 <- stack
+ let m59 = m38 & t22;
+ // p6 -> stack
+ let p1 = m58 ^ m59;
+ let p7 = p0 ^ p1;
+ let m34 = m21 & m22;
+ let m35 = m24 & m34;
+ let m40 = m35 ^ m36;
+ let m41 = m38 ^ m40;
+ let m45 = m42 ^ m41;
+ // t27 <- stack
+ let m53 = m45 & t27;
+ let p8 = m50 ^ m53;
+ let p23 = p7 ^ p8;
+ // t4 <- stack
+ let m62 = m45 & t4;
+ let p14 = m49 ^ m62;
+ let s6 = p14 ^ p23;
+ // t10 <- stack
+ let m54 = m41 & t10;
+ let p2 = m54 ^ m62;
+ let p22 = p2 ^ p7;
+ let s0 = p13 ^ p22;
+ let p17 = m58 ^ p2;
+ let p15 = m54 ^ m59;
+ // t2 <- stack
+ let m63 = m41 & t2;
+ // m39 <- stack
+ let m44 = m39 ^ m40;
+ // p17 -> stack
+ // t6 <- stack
+ let m46 = m44 & t6;
+ let p5 = m46 ^ m51;
+ // p23 -> stack
+ let p18 = m63 ^ p5;
+ let p24 = p5 ^ p7;
+ // m48 <- stack
+ let p12 = m46 ^ m48;
+ let s3 = p12 ^ p22;
+ // t13 <- stack
+ let m55 = m44 & t13;
+ let p9 = m55 ^ m63;
+ // p16 <- stack
+ let s7 = p9 ^ p16;
+ // t8 <- stack
+ let m47 = m40 & t8;
+ let p3 = m47 ^ m50;
+ let p19 = p2 ^ p3;
+ let s5 = p19 ^ p24;
+ let p11 = p0 ^ p3;
+ let p26 = p9 ^ p11;
+ // t23 <- stack
+ let m56 = m40 & t23;
+ let p4 = m48 ^ m56;
+ // p6 <- stack
+ let p20 = p4 ^ p6;
+ let p29 = p15 ^ p20;
+ let s1 = p26 ^ p29;
+ // m57 <- stack
+ let p10 = m57 ^ p4;
+ let p27 = p10 ^ p18;
+ // p23 <- stack
+ let s4 = p23 ^ p27;
+ let p25 = p6 ^ p10;
+ let p28 = p11 ^ p25;
+ // p17 <- stack
+ let s2 = p17 ^ p28;
+
+ state[0] = s7;
+ state[1] = s6;
+ state[2] = s5;
+ state[3] = s4;
+ state[4] = s3;
+ state[5] = s2;
+ state[6] = s1;
+ state[7] = s0;
+}
+
+/// Bitsliced implementation of the AES Sbox based on Boyar, Peralta and Calik.
+///
+/// See: <http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt>
+///
+/// Note that the 4 bitwise NOT (^= 0xffffffffffffffff) are moved to the key schedule.
+fn sub_bytes(state: &mut [u64]) {
+ debug_assert_eq!(state.len(), 8);
+
+ // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler
+ // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4)
+
+ let u7 = state[0];
+ let u6 = state[1];
+ let u5 = state[2];
+ let u4 = state[3];
+ let u3 = state[4];
+ let u2 = state[5];
+ let u1 = state[6];
+ let u0 = state[7];
+
+ let y14 = u3 ^ u5;
+ let y13 = u0 ^ u6;
+ let y12 = y13 ^ y14;
+ let t1 = u4 ^ y12;
+ let y15 = t1 ^ u5;
+ let t2 = y12 & y15;
+ let y6 = y15 ^ u7;
+ let y20 = t1 ^ u1;
+ // y12 -> stack
+ let y9 = u0 ^ u3;
+ // y20 -> stack
+ let y11 = y20 ^ y9;
+ // y9 -> stack
+ let t12 = y9 & y11;
+ // y6 -> stack
+ let y7 = u7 ^ y11;
+ let y8 = u0 ^ u5;
+ let t0 = u1 ^ u2;
+ let y10 = y15 ^ t0;
+ // y15 -> stack
+ let y17 = y10 ^ y11;
+ // y14 -> stack
+ let t13 = y14 & y17;
+ let t14 = t13 ^ t12;
+ // y17 -> stack
+ let y19 = y10 ^ y8;
+ // y10 -> stack
+ let t15 = y8 & y10;
+ let t16 = t15 ^ t12;
+ let y16 = t0 ^ y11;
+ // y11 -> stack
+ let y21 = y13 ^ y16;
+ // y13 -> stack
+ let t7 = y13 & y16;
+ // y16 -> stack
+ let y18 = u0 ^ y16;
+ let y1 = t0 ^ u7;
+ let y4 = y1 ^ u3;
+ // u7 -> stack
+ let t5 = y4 & u7;
+ let t6 = t5 ^ t2;
+ let t18 = t6 ^ t16;
+ let t22 = t18 ^ y19;
+ let y2 = y1 ^ u0;
+ let t10 = y2 & y7;
+ let t11 = t10 ^ t7;
+ let t20 = t11 ^ t16;
+ let t24 = t20 ^ y18;
+ let y5 = y1 ^ u6;
+ let t8 = y5 & y1;
+ let t9 = t8 ^ t7;
+ let t19 = t9 ^ t14;
+ let t23 = t19 ^ y21;
+ let y3 = y5 ^ y8;
+ // y6 <- stack
+ let t3 = y3 & y6;
+ let t4 = t3 ^ t2;
+ // y20 <- stack
+ let t17 = t4 ^ y20;
+ let t21 = t17 ^ t14;
+ let t26 = t21 & t23;
+ let t27 = t24 ^ t26;
+ let t31 = t22 ^ t26;
+ let t25 = t21 ^ t22;
+ // y4 -> stack
+ let t28 = t25 & t27;
+ let t29 = t28 ^ t22;
+ let z14 = t29 & y2;
+ let z5 = t29 & y7;
+ let t30 = t23 ^ t24;
+ let t32 = t31 & t30;
+ let t33 = t32 ^ t24;
+ let t35 = t27 ^ t33;
+ let t36 = t24 & t35;
+ let t38 = t27 ^ t36;
+ let t39 = t29 & t38;
+ let t40 = t25 ^ t39;
+ let t43 = t29 ^ t40;
+ // y16 <- stack
+ let z3 = t43 & y16;
+ let tc12 = z3 ^ z5;
+ // tc12 -> stack
+ // y13 <- stack
+ let z12 = t43 & y13;
+ let z13 = t40 & y5;
+ let z4 = t40 & y1;
+ let tc6 = z3 ^ z4;
+ let t34 = t23 ^ t33;
+ let t37 = t36 ^ t34;
+ let t41 = t40 ^ t37;
+ // y10 <- stack
+ let z8 = t41 & y10;
+ let z17 = t41 & y8;
+ let t44 = t33 ^ t37;
+ // y15 <- stack
+ let z0 = t44 & y15;
+ // z17 -> stack
+ // y12 <- stack
+ let z9 = t44 & y12;
+ let z10 = t37 & y3;
+ let z1 = t37 & y6;
+ let tc5 = z1 ^ z0;
+ let tc11 = tc6 ^ tc5;
+ // y4 <- stack
+ let z11 = t33 & y4;
+ let t42 = t29 ^ t33;
+ let t45 = t42 ^ t41;
+ // y17 <- stack
+ let z7 = t45 & y17;
+ let tc8 = z7 ^ tc6;
+ // y14 <- stack
+ let z16 = t45 & y14;
+ // y11 <- stack
+ let z6 = t42 & y11;
+ let tc16 = z6 ^ tc8;
+ // z14 -> stack
+ // y9 <- stack
+ let z15 = t42 & y9;
+ let tc20 = z15 ^ tc16;
+ let tc1 = z15 ^ z16;
+ let tc2 = z10 ^ tc1;
+ let tc21 = tc2 ^ z11;
+ let tc3 = z9 ^ tc2;
+ let s0 = tc3 ^ tc16;
+ let s3 = tc3 ^ tc11;
+ let s1 = s3 ^ tc16;
+ let tc13 = z13 ^ tc1;
+ // u7 <- stack
+ let z2 = t33 & u7;
+ let tc4 = z0 ^ z2;
+ let tc7 = z12 ^ tc4;
+ let tc9 = z8 ^ tc7;
+ let tc10 = tc8 ^ tc9;
+ // z14 <- stack
+ let tc17 = z14 ^ tc10;
+ let s5 = tc21 ^ tc17;
+ let tc26 = tc17 ^ tc20;
+ // z17 <- stack
+ let s2 = tc26 ^ z17;
+ // tc12 <- stack
+ let tc14 = tc4 ^ tc12;
+ let tc18 = tc13 ^ tc14;
+ let s6 = tc10 ^ tc18;
+ let s7 = z12 ^ tc18;
+ let s4 = tc14 ^ s3;
+
+ state[0] = s7;
+ state[1] = s6;
+ state[2] = s5;
+ state[3] = s4;
+ state[4] = s3;
+ state[5] = s2;
+ state[6] = s1;
+ state[7] = s0;
+}
+
+/// NOT operations that are omitted in S-box
+#[inline]
+fn sub_bytes_nots(state: &mut [u64]) {
+ debug_assert_eq!(state.len(), 8);
+ state[0] ^= 0xffffffffffffffff;
+ state[1] ^= 0xffffffffffffffff;
+ state[5] ^= 0xffffffffffffffff;
+ state[6] ^= 0xffffffffffffffff;
+}
+
+/// Computation of the MixColumns transformation in the fixsliced representation, with different
+/// rotations used according to the round number mod 4.
+///
+/// Based on Käsper-Schwabe, similar to https://github.com/Ko-/aes-armcortexm.
+macro_rules! define_mix_columns {
+ (
+ $name:ident,
+ $name_inv:ident,
+ $first_rotate:path,
+ $second_rotate:path
+ ) => {
+ #[rustfmt::skip]
+ fn $name(state: &mut State) {
+ let (a0, a1, a2, a3, a4, a5, a6, a7) = (
+ state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]
+ );
+ let (b0, b1, b2, b3, b4, b5, b6, b7) = (
+ $first_rotate(a0),
+ $first_rotate(a1),
+ $first_rotate(a2),
+ $first_rotate(a3),
+ $first_rotate(a4),
+ $first_rotate(a5),
+ $first_rotate(a6),
+ $first_rotate(a7),
+ );
+ let (c0, c1, c2, c3, c4, c5, c6, c7) = (
+ a0 ^ b0,
+ a1 ^ b1,
+ a2 ^ b2,
+ a3 ^ b3,
+ a4 ^ b4,
+ a5 ^ b5,
+ a6 ^ b6,
+ a7 ^ b7,
+ );
+ state[0] = b0 ^ c7 ^ $second_rotate(c0);
+ state[1] = b1 ^ c0 ^ c7 ^ $second_rotate(c1);
+ state[2] = b2 ^ c1 ^ $second_rotate(c2);
+ state[3] = b3 ^ c2 ^ c7 ^ $second_rotate(c3);
+ state[4] = b4 ^ c3 ^ c7 ^ $second_rotate(c4);
+ state[5] = b5 ^ c4 ^ $second_rotate(c5);
+ state[6] = b6 ^ c5 ^ $second_rotate(c6);
+ state[7] = b7 ^ c6 ^ $second_rotate(c7);
+ }
+
+ #[rustfmt::skip]
+ fn $name_inv(state: &mut State) {
+ let (a0, a1, a2, a3, a4, a5, a6, a7) = (
+ state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]
+ );
+ let (b0, b1, b2, b3, b4, b5, b6, b7) = (
+ $first_rotate(a0),
+ $first_rotate(a1),
+ $first_rotate(a2),
+ $first_rotate(a3),
+ $first_rotate(a4),
+ $first_rotate(a5),
+ $first_rotate(a6),
+ $first_rotate(a7),
+ );
+ let (c0, c1, c2, c3, c4, c5, c6, c7) = (
+ a0 ^ b0,
+ a1 ^ b1,
+ a2 ^ b2,
+ a3 ^ b3,
+ a4 ^ b4,
+ a5 ^ b5,
+ a6 ^ b6,
+ a7 ^ b7,
+ );
+ let (d0, d1, d2, d3, d4, d5, d6, d7) = (
+ a0 ^ c7,
+ a1 ^ c0 ^ c7,
+ a2 ^ c1,
+ a3 ^ c2 ^ c7,
+ a4 ^ c3 ^ c7,
+ a5 ^ c4,
+ a6 ^ c5,
+ a7 ^ c6,
+ );
+ let (e0, e1, e2, e3, e4, e5, e6, e7) = (
+ c0 ^ d6,
+ c1 ^ d6 ^ d7,
+ c2 ^ d0 ^ d7,
+ c3 ^ d1 ^ d6,
+ c4 ^ d2 ^ d6 ^ d7,
+ c5 ^ d3 ^ d7,
+ c6 ^ d4,
+ c7 ^ d5,
+ );
+ state[0] = d0 ^ e0 ^ $second_rotate(e0);
+ state[1] = d1 ^ e1 ^ $second_rotate(e1);
+ state[2] = d2 ^ e2 ^ $second_rotate(e2);
+ state[3] = d3 ^ e3 ^ $second_rotate(e3);
+ state[4] = d4 ^ e4 ^ $second_rotate(e4);
+ state[5] = d5 ^ e5 ^ $second_rotate(e5);
+ state[6] = d6 ^ e6 ^ $second_rotate(e6);
+ state[7] = d7 ^ e7 ^ $second_rotate(e7);
+ }
+ }
+}
+
+define_mix_columns!(
+ mix_columns_0,
+ inv_mix_columns_0,
+ rotate_rows_1,
+ rotate_rows_2
+);
+
+define_mix_columns!(
+ mix_columns_1,
+ inv_mix_columns_1,
+ rotate_rows_and_columns_1_1,
+ rotate_rows_and_columns_2_2
+);
+
+#[cfg(not(feature = "compact"))]
+define_mix_columns!(
+ mix_columns_2,
+ inv_mix_columns_2,
+ rotate_rows_and_columns_1_2,
+ rotate_rows_2
+);
+
+#[cfg(not(feature = "compact"))]
+define_mix_columns!(
+ mix_columns_3,
+ inv_mix_columns_3,
+ rotate_rows_and_columns_1_3,
+ rotate_rows_and_columns_2_2
+);
+
+#[inline]
+fn delta_swap_1(a: &mut u64, shift: u32, mask: u64) {
+ let t = (*a ^ ((*a) >> shift)) & mask;
+ *a ^= t ^ (t << shift);
+}
+
+#[inline]
+fn delta_swap_2(a: &mut u64, b: &mut u64, shift: u32, mask: u64) {
+ let t = (*a ^ ((*b) >> shift)) & mask;
+ *a ^= t;
+ *b ^= t << shift;
+}
+
+/// Applies ShiftRows once on an AES state (or key).
+#[cfg(any(not(feature = "compact"), feature = "hazmat"))]
+#[inline]
+fn shift_rows_1(state: &mut [u64]) {
+ debug_assert_eq!(state.len(), 8);
+ for x in state.iter_mut() {
+ delta_swap_1(x, 8, 0x00f000ff000f0000);
+ delta_swap_1(x, 4, 0x0f0f00000f0f0000);
+ }
+}
+
+/// Applies ShiftRows twice on an AES state (or key).
+#[inline]
+fn shift_rows_2(state: &mut [u64]) {
+ debug_assert_eq!(state.len(), 8);
+ for x in state.iter_mut() {
+ delta_swap_1(x, 8, 0x00ff000000ff0000);
+ }
+}
+
+/// Applies ShiftRows three times on an AES state (or key).
+#[inline]
+fn shift_rows_3(state: &mut [u64]) {
+ debug_assert_eq!(state.len(), 8);
+ for x in state.iter_mut() {
+ delta_swap_1(x, 8, 0x000f00ff00f00000);
+ delta_swap_1(x, 4, 0x0f0f00000f0f0000);
+ }
+}
+
+#[inline(always)]
+fn inv_shift_rows_1(state: &mut [u64]) {
+ shift_rows_3(state);
+}
+
+#[inline(always)]
+fn inv_shift_rows_2(state: &mut [u64]) {
+ shift_rows_2(state);
+}
+
+#[cfg(not(feature = "compact"))]
+#[inline(always)]
+fn inv_shift_rows_3(state: &mut [u64]) {
+ shift_rows_1(state);
+}
+
+/// XOR the columns after the S-box during the key schedule round function.
+///
+/// The `idx_xor` parameter refers to the index of the previous round key that is
+/// involved in the XOR computation (should be 8 and 16 for AES-128 and AES-256,
+/// respectively).
+///
+/// The `idx_ror` parameter refers to the rotation value, which varies between the
+/// different key schedules.
+fn xor_columns(rkeys: &mut [u64], offset: usize, idx_xor: usize, idx_ror: u32) {
+ for i in 0..8 {
+ let off_i = offset + i;
+ let rk = rkeys[off_i - idx_xor] ^ (0x000f000f000f000f & ror(rkeys[off_i], idx_ror));
+ rkeys[off_i] = rk
+ ^ (0xfff0fff0fff0fff0 & (rk << 4))
+ ^ (0xff00ff00ff00ff00 & (rk << 8))
+ ^ (0xf000f000f000f000 & (rk << 12));
+ }
+}
+
+/// Bitslice four 128-bit input blocks input0, input1, input2, input3 into a 512-bit internal state.
+fn bitslice(output: &mut [u64], input0: &[u8], input1: &[u8], input2: &[u8], input3: &[u8]) {
+ debug_assert_eq!(output.len(), 8);
+ debug_assert_eq!(input0.len(), 16);
+ debug_assert_eq!(input1.len(), 16);
+ debug_assert_eq!(input2.len(), 16);
+ debug_assert_eq!(input3.len(), 16);
+
+ // Bitslicing is a bit index manipulation. 512 bits of data means each bit is positioned at a
+ // 9-bit index. AES data is 4 blocks, each one a 4x4 column-major matrix of bytes, so the
+ // index is initially ([b]lock, [c]olumn, [r]ow, [p]osition):
+ // b1 b0 c1 c0 r1 r0 p2 p1 p0
+ //
+ // The desired bitsliced data groups first by bit position, then row, column, block:
+ // p2 p1 p0 r1 r0 c1 c0 b1 b0
+
+ #[rustfmt::skip]
+ fn read_reordered(input: &[u8]) -> u64 {
+ (u64::from(input[0x0]) ) |
+ (u64::from(input[0x1]) << 0x10) |
+ (u64::from(input[0x2]) << 0x20) |
+ (u64::from(input[0x3]) << 0x30) |
+ (u64::from(input[0x8]) << 0x08) |
+ (u64::from(input[0x9]) << 0x18) |
+ (u64::from(input[0xa]) << 0x28) |
+ (u64::from(input[0xb]) << 0x38)
+ }
+
+ // Reorder each block's bytes on input
+ // __ __ c1 c0 r1 r0 __ __ __ => __ __ c0 r1 r0 c1 __ __ __
+ // Reorder by relabeling (note the order of input)
+ // b1 b0 c0 __ __ __ __ __ __ => c0 b1 b0 __ __ __ __ __ __
+ let mut t0 = read_reordered(&input0[0x00..0x0c]);
+ let mut t4 = read_reordered(&input0[0x04..0x10]);
+ let mut t1 = read_reordered(&input1[0x00..0x0c]);
+ let mut t5 = read_reordered(&input1[0x04..0x10]);
+ let mut t2 = read_reordered(&input2[0x00..0x0c]);
+ let mut t6 = read_reordered(&input2[0x04..0x10]);
+ let mut t3 = read_reordered(&input3[0x00..0x0c]);
+ let mut t7 = read_reordered(&input3[0x04..0x10]);
+
+ // Bit Index Swap 6 <-> 0:
+ // __ __ b0 __ __ __ __ __ p0 => __ __ p0 __ __ __ __ __ b0
+ let m0 = 0x5555555555555555;
+ delta_swap_2(&mut t1, &mut t0, 1, m0);
+ delta_swap_2(&mut t3, &mut t2, 1, m0);
+ delta_swap_2(&mut t5, &mut t4, 1, m0);
+ delta_swap_2(&mut t7, &mut t6, 1, m0);
+
+ // Bit Index Swap 7 <-> 1:
+ // __ b1 __ __ __ __ __ p1 __ => __ p1 __ __ __ __ __ b1 __
+ let m1 = 0x3333333333333333;
+ delta_swap_2(&mut t2, &mut t0, 2, m1);
+ delta_swap_2(&mut t3, &mut t1, 2, m1);
+ delta_swap_2(&mut t6, &mut t4, 2, m1);
+ delta_swap_2(&mut t7, &mut t5, 2, m1);
+
+ // Bit Index Swap 8 <-> 2:
+ // c0 __ __ __ __ __ p2 __ __ => p2 __ __ __ __ __ c0 __ __
+ let m2 = 0x0f0f0f0f0f0f0f0f;
+ delta_swap_2(&mut t4, &mut t0, 4, m2);
+ delta_swap_2(&mut t5, &mut t1, 4, m2);
+ delta_swap_2(&mut t6, &mut t2, 4, m2);
+ delta_swap_2(&mut t7, &mut t3, 4, m2);
+
+ // Final bitsliced bit index, as desired:
+ // p2 p1 p0 r1 r0 c1 c0 b1 b0
+ output[0] = t0;
+ output[1] = t1;
+ output[2] = t2;
+ output[3] = t3;
+ output[4] = t4;
+ output[5] = t5;
+ output[6] = t6;
+ output[7] = t7;
+}
+
+/// Un-bitslice a 512-bit internal state into four 128-bit blocks of output.
+fn inv_bitslice(input: &[u64], output: &mut [Block]) {
+ debug_assert_eq!(input.len(), 8);
+ debug_assert_eq!(output.len(), 4);
+
+ // Unbitslicing is a bit index manipulation. 512 bits of data means each bit is positioned at
+ // a 9-bit index. AES data is 4 blocks, each one a 4x4 column-major matrix of bytes, so the
+ // desired index for the output is ([b]lock, [c]olumn, [r]ow, [p]osition):
+ // b1 b0 c1 c0 r1 r0 p2 p1 p0
+ //
+ // The initially bitsliced data groups first by bit position, then row, column, block:
+ // p2 p1 p0 r1 r0 c1 c0 b1 b0
+
+ let mut t0 = input[0];
+ let mut t1 = input[1];
+ let mut t2 = input[2];
+ let mut t3 = input[3];
+ let mut t4 = input[4];
+ let mut t5 = input[5];
+ let mut t6 = input[6];
+ let mut t7 = input[7];
+
+ // TODO: these bit index swaps are identical to those in 'packing'
+
+ // Bit Index Swap 6 <-> 0:
+ // __ __ p0 __ __ __ __ __ b0 => __ __ b0 __ __ __ __ __ p0
+ let m0 = 0x5555555555555555;
+ delta_swap_2(&mut t1, &mut t0, 1, m0);
+ delta_swap_2(&mut t3, &mut t2, 1, m0);
+ delta_swap_2(&mut t5, &mut t4, 1, m0);
+ delta_swap_2(&mut t7, &mut t6, 1, m0);
+
+ // Bit Index Swap 7 <-> 1:
+ // __ p1 __ __ __ __ __ b1 __ => __ b1 __ __ __ __ __ p1 __
+ let m1 = 0x3333333333333333;
+ delta_swap_2(&mut t2, &mut t0, 2, m1);
+ delta_swap_2(&mut t3, &mut t1, 2, m1);
+ delta_swap_2(&mut t6, &mut t4, 2, m1);
+ delta_swap_2(&mut t7, &mut t5, 2, m1);
+
+ // Bit Index Swap 8 <-> 2:
+ // p2 __ __ __ __ __ c0 __ __ => c0 __ __ __ __ __ p2 __ __
+ let m2 = 0x0f0f0f0f0f0f0f0f;
+ delta_swap_2(&mut t4, &mut t0, 4, m2);
+ delta_swap_2(&mut t5, &mut t1, 4, m2);
+ delta_swap_2(&mut t6, &mut t2, 4, m2);
+ delta_swap_2(&mut t7, &mut t3, 4, m2);
+
+ #[rustfmt::skip]
+ fn write_reordered(columns: u64, output: &mut [u8]) {
+ output[0x0] = (columns ) as u8;
+ output[0x1] = (columns >> 0x10) as u8;
+ output[0x2] = (columns >> 0x20) as u8;
+ output[0x3] = (columns >> 0x30) as u8;
+ output[0x8] = (columns >> 0x08) as u8;
+ output[0x9] = (columns >> 0x18) as u8;
+ output[0xa] = (columns >> 0x28) as u8;
+ output[0xb] = (columns >> 0x38) as u8;
+ }
+
+ // Reorder by relabeling (note the order of output)
+ // c0 b1 b0 __ __ __ __ __ __ => b1 b0 c0 __ __ __ __ __ __
+ // Reorder each block's bytes on output
+ // __ __ c0 r1 r0 c1 __ __ __ => __ __ c1 c0 r1 r0 __ __ __
+ write_reordered(t0, &mut output[0][0x00..0x0c]);
+ write_reordered(t4, &mut output[0][0x04..0x10]);
+ write_reordered(t1, &mut output[1][0x00..0x0c]);
+ write_reordered(t5, &mut output[1][0x04..0x10]);
+ write_reordered(t2, &mut output[2][0x00..0x0c]);
+ write_reordered(t6, &mut output[2][0x04..0x10]);
+ write_reordered(t3, &mut output[3][0x00..0x0c]);
+ write_reordered(t7, &mut output[3][0x04..0x10]);
+
+ // Final AES bit index, as desired:
+ // b1 b0 c1 c0 r1 r0 p2 p1 p0
+}
+
+/// Copy 32-bytes within the provided slice to an 8-byte offset
+fn memshift32(buffer: &mut [u64], src_offset: usize) {
+ debug_assert_eq!(src_offset % 8, 0);
+
+ let dst_offset = src_offset + 8;
+ debug_assert!(dst_offset + 8 <= buffer.len());
+
+ for i in (0..8).rev() {
+ buffer[dst_offset + i] = buffer[src_offset + i];
+ }
+}
+
+/// XOR the round key to the internal state. The round keys are expected to be
+/// pre-computed and to be packed in the fixsliced representation.
+#[inline]
+fn add_round_key(state: &mut State, rkey: &[u64]) {
+ debug_assert_eq!(rkey.len(), 8);
+ for (a, b) in state.iter_mut().zip(rkey) {
+ *a ^= b;
+ }
+}
+
+#[inline(always)]
+fn add_round_constant_bit(state: &mut [u64], bit: usize) {
+ state[bit] ^= 0x00000000f0000000;
+}
+
+#[inline(always)]
+fn ror(x: u64, y: u32) -> u64 {
+ x.rotate_right(y)
+}
+
+#[inline(always)]
+fn ror_distance(rows: u32, cols: u32) -> u32 {
+ (rows << 4) + (cols << 2)
+}
+
+#[inline(always)]
+fn rotate_rows_1(x: u64) -> u64 {
+ ror(x, ror_distance(1, 0))
+}
+
+#[inline(always)]
+fn rotate_rows_2(x: u64) -> u64 {
+ ror(x, ror_distance(2, 0))
+}
+
+#[inline(always)]
+#[rustfmt::skip]
+fn rotate_rows_and_columns_1_1(x: u64) -> u64 {
+ (ror(x, ror_distance(1, 1)) & 0x0fff0fff0fff0fff) |
+ (ror(x, ror_distance(0, 1)) & 0xf000f000f000f000)
+}
+
+#[cfg(not(feature = "compact"))]
+#[inline(always)]
+#[rustfmt::skip]
+fn rotate_rows_and_columns_1_2(x: u64) -> u64 {
+ (ror(x, ror_distance(1, 2)) & 0x00ff00ff00ff00ff) |
+ (ror(x, ror_distance(0, 2)) & 0xff00ff00ff00ff00)
+}
+
+#[cfg(not(feature = "compact"))]
+#[inline(always)]
+#[rustfmt::skip]
+fn rotate_rows_and_columns_1_3(x: u64) -> u64 {
+ (ror(x, ror_distance(1, 3)) & 0x000f000f000f000f) |
+ (ror(x, ror_distance(0, 3)) & 0xfff0fff0fff0fff0)
+}
+
+#[inline(always)]
+#[rustfmt::skip]
+fn rotate_rows_and_columns_2_2(x: u64) -> u64 {
+ (ror(x, ror_distance(2, 2)) & 0x00ff00ff00ff00ff) |
+ (ror(x, ror_distance(1, 2)) & 0xff00ff00ff00ff00)
+}
+
+/// Low-level "hazmat" AES functions.
+///
+/// Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256`
+/// implementations in this crate, but instead provides raw access to
+/// the AES round function gated under the `hazmat` crate feature.
+#[cfg(feature = "hazmat")]
+pub(crate) mod hazmat {
+ use super::{
+ bitslice, inv_bitslice, inv_mix_columns_0, inv_shift_rows_1, inv_sub_bytes, mix_columns_0,
+ shift_rows_1, sub_bytes, sub_bytes_nots, State,
+ };
+ use crate::{Block, ParBlocks};
+
+ /// XOR the `src` block into the `dst` block in-place.
+ fn xor_in_place(dst: &mut Block, src: &Block) {
+ for (a, b) in dst.iter_mut().zip(src.as_slice()) {
+ *a ^= *b;
+ }
+ }
+
+ /// Perform a bitslice operation, loading a single block.
+ fn bitslice_block(block: &Block) -> State {
+ let mut state = State::default();
+ bitslice(&mut state, block, block, block, block);
+ state
+ }
+
+ /// Perform an inverse bitslice operation, extracting a single block.
+ fn inv_bitslice_block(block: &mut Block, state: &State) {
+ let mut out = [Block::default(); 4];
+ inv_bitslice(state, &mut out);
+ block.copy_from_slice(&out[0]);
+ }
+
+ /// AES cipher (encrypt) round function.
+ #[inline]
+ pub(crate) fn cipher_round(block: &mut Block, round_key: &Block) {
+ let mut state = bitslice_block(block);
+ sub_bytes(&mut state);
+ sub_bytes_nots(&mut state);
+ shift_rows_1(&mut state);
+ mix_columns_0(&mut state);
+ inv_bitslice_block(block, &state);
+ xor_in_place(block, round_key);
+ }
+
+ /// AES cipher (encrypt) round function: parallel version.
+ #[inline]
+ pub(crate) fn cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ for (chunk, keys) in blocks.chunks_exact_mut(4).zip(round_keys.chunks_exact(4)) {
+ let mut state = State::default();
+ bitslice(&mut state, &chunk[0], &chunk[1], &chunk[2], &chunk[3]);
+ sub_bytes(&mut state);
+ sub_bytes_nots(&mut state);
+ shift_rows_1(&mut state);
+ mix_columns_0(&mut state);
+ inv_bitslice(&state, chunk);
+
+ for i in 0..4 {
+ xor_in_place(&mut chunk[i], &keys[i]);
+ }
+ }
+ }
+
+ /// AES cipher (encrypt) round function.
+ #[inline]
+ pub(crate) fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) {
+ let mut state = State::default();
+ bitslice(&mut state, &block, &block, &block, &block);
+ sub_bytes_nots(&mut state);
+ inv_sub_bytes(&mut state);
+ inv_shift_rows_1(&mut state);
+ inv_mix_columns_0(&mut state);
+ inv_bitslice_block(block, &state);
+ xor_in_place(block, round_key);
+ }
+
+ /// AES cipher (encrypt) round function: parallel version.
+ #[inline]
+ pub(crate) fn equiv_inv_cipher_round_par(blocks: &mut ParBlocks, round_keys: &ParBlocks) {
+ for (chunk, keys) in blocks.chunks_exact_mut(4).zip(round_keys.chunks_exact(4)) {
+ let mut state = State::default();
+ bitslice(&mut state, &chunk[0], &chunk[1], &chunk[2], &chunk[3]);
+ sub_bytes_nots(&mut state);
+ inv_sub_bytes(&mut state);
+ inv_shift_rows_1(&mut state);
+ inv_mix_columns_0(&mut state);
+ inv_bitslice(&state, chunk);
+
+ for i in 0..4 {
+ xor_in_place(&mut chunk[i], &keys[i]);
+ }
+ }
+ }
+
+ /// AES mix columns function.
+ #[inline]
+ pub(crate) fn mix_columns(block: &mut Block) {
+ let mut state = bitslice_block(block);
+ mix_columns_0(&mut state);
+ inv_bitslice_block(block, &state);
+ }
+
+ /// AES inverse mix columns function.
+ #[inline]
+ pub(crate) fn inv_mix_columns(block: &mut Block) {
+ let mut state = bitslice_block(block);
+ inv_mix_columns_0(&mut state);
+ inv_bitslice_block(block, &state);
+ }
+}