diff options
Diffstat (limited to 'src/spdk/intel-ipsec-mb/avx512/aes_cbc_dec_vaes_avx512.asm')
-rw-r--r-- | src/spdk/intel-ipsec-mb/avx512/aes_cbc_dec_vaes_avx512.asm | 477 |
1 files changed, 477 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/avx512/aes_cbc_dec_vaes_avx512.asm b/src/spdk/intel-ipsec-mb/avx512/aes_cbc_dec_vaes_avx512.asm new file mode 100644 index 000000000..ce33caa92 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/avx512/aes_cbc_dec_vaes_avx512.asm @@ -0,0 +1,477 @@ +;; +;; Copyright (c) 2019, Intel Corporation +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: +;; +;; * Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; * Neither the name of Intel Corporation nor the names of its contributors +;; may be used to endorse or promote products derived from this software +;; without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;; + +%include "include/os.asm" +%include "include/reg_sizes.asm" +%include "include/aes_common.asm" + +%define zIV zmm0 +%define zBLK_0_3 zmm1 +%define zBLK_4_7 zmm2 +%define zBLK_8_11 zmm3 +%define zBLK_12_15 zmm4 +%define zTMP0 zmm5 +%define zTMP1 zmm6 +%define zTMP2 zmm7 +%define zTMP3 zmm8 + +%define ZKEY0 zmm17 +%define ZKEY1 zmm18 +%define ZKEY2 zmm19 +%define ZKEY3 zmm20 +%define ZKEY4 zmm21 +%define ZKEY5 zmm22 +%define ZKEY6 zmm23 +%define ZKEY7 zmm24 +%define ZKEY8 zmm25 +%define ZKEY9 zmm26 +%define ZKEY10 zmm27 +%define ZKEY11 zmm28 +%define ZKEY12 zmm29 +%define ZKEY13 zmm30 +%define ZKEY14 zmm31 + +%ifdef LINUX +%define p_in rdi +%define p_IV rsi +%define p_keys rdx +%define p_out rcx +%define num_bytes r8 +%else +%define p_in rcx +%define p_IV rdx +%define p_keys r8 +%define p_out r9 +%define num_bytes rax +%endif + +%define tmp r10 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; macro to preload keys +;;; - uses ZKEY[0-14] registers (ZMM) +%macro LOAD_KEYS 2 +%define %%KEYS %1 ; [in] key pointer +%define %%NROUNDS %2 ; [in] numerical value, number of AES rounds + ; excluding 1st and last rounds. + ; Example: AES-128 -> value 9 + +%assign i 0 +%rep (%%NROUNDS + 2) + vbroadcastf64x2 ZKEY %+ i, [%%KEYS + 16*i] +%assign i (i + 1) +%endrep + +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This macro is used to "cool down" pipeline after DECRYPT_16_PARALLEL macro +;;; code as the number of final blocks is variable. +;;; Processes the last %%num_final_blocks blocks (1 to 15, can't be 0) + +%macro FINAL_BLOCKS 14 +%define %%PLAIN_OUT %1 ; [in] output buffer +%define %%CIPH_IN %2 ; [in] input buffer +%define %%LAST_CIPH_BLK %3 ; [in/out] ZMM with IV/last cipher blk (in idx 3) +%define %%num_final_blocks %4 ; [in] numerical value (1 - 15) +%define %%CIPHER_PLAIN_0_3 %5 ; [out] ZMM next 0-3 cipher blocks +%define %%CIPHER_PLAIN_4_7 %6 ; [out] ZMM next 4-7 cipher blocks +%define %%CIPHER_PLAIN_8_11 %7 ; [out] ZMM next 8-11 cipher blocks +%define %%CIPHER_PLAIN_12_15 %8 ; [out] ZMM next 12-15 cipher blocks +%define %%ZT1 %9 ; [clobbered] ZMM temporary +%define %%ZT2 %10 ; [clobbered] ZMM temporary +%define %%ZT3 %11 ; [clobbered] ZMM temporary +%define %%ZT4 %12 ; [clobbered] ZMM temporary +%define %%IA0 %13 ; [clobbered] GP temporary +%define %%NROUNDS %14 ; [in] number of rounds; numerical value + + ;; load plain/cipher text + ZMM_LOAD_BLOCKS_0_16 %%num_final_blocks, %%CIPH_IN, 0, \ + %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \ + %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15 + + ;; Prepare final cipher text blocks to + ;; be XOR'd later after AESDEC + valignq %%ZT1, %%CIPHER_PLAIN_0_3, %%LAST_CIPH_BLK, 6 +%if %%num_final_blocks > 4 + valignq %%ZT2, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_0_3, 6 +%endif +%if %%num_final_blocks > 8 + valignq %%ZT3, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_4_7, 6 +%endif +%if %%num_final_blocks > 12 + valignq %%ZT4, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_8_11, 6 +%endif + + ;; Update IV with last cipher block + ;; to be used later in DECRYPT_16_PARALLEL +%if %%num_final_blocks == 1 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 2 +%elif %%num_final_blocks == 2 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 4 +%elif %%num_final_blocks == 3 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, 6 +%elif %%num_final_blocks == 4 + vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_0_3 +%elif %%num_final_blocks == 5 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 2 +%elif %%num_final_blocks == 6 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 4 +%elif %%num_final_blocks == 7 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, 6 +%elif %%num_final_blocks == 8 + vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_4_7 +%elif %%num_final_blocks == 9 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 2 +%elif %%num_final_blocks == 10 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 4 +%elif %%num_final_blocks == 11 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, 6 +%elif %%num_final_blocks == 12 + vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_8_11 +%elif %%num_final_blocks == 13 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 2 +%elif %%num_final_blocks == 14 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 4 +%elif %%num_final_blocks == 15 + valignq %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, 6 +%endif + + ;; AES rounds +%assign j 0 +%rep (%%NROUNDS + 2) + ZMM_AESDEC_ROUND_BLOCKS_0_16 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \ + %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15, \ + ZKEY %+ j, j, no_data, no_data, no_data, no_data, \ + %%num_final_blocks, %%NROUNDS +%assign j (j + 1) +%endrep + + ;; XOR with decrypted blocks to get plain text + vpxorq %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, %%ZT1 +%if %%num_final_blocks > 4 + vpxorq %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, %%ZT2 +%endif +%if %%num_final_blocks > 8 + vpxorq %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, %%ZT3 +%endif +%if %%num_final_blocks > 12 + vpxorq %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, %%ZT4 +%endif + + ;; write plain text back to output + ZMM_STORE_BLOCKS_0_16 %%num_final_blocks, %%PLAIN_OUT, 0, \ + %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \ + %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15 + +%endmacro ; FINAL_BLOCKS + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Main AES-CBC decrypt macro +;;; - operates on single stream +;;; - decrypts 16 blocks at a time +%macro DECRYPT_16_PARALLEL 14 +%define %%PLAIN_OUT %1 ; [in] output buffer +%define %%CIPH_IN %2 ; [in] input buffer +%define %%LENGTH %3 ; [in/out] number of bytes to process +%define %%LAST_CIPH_BLK %4 ; [in/out] ZMM with IV (first block) or last cipher block (idx 3) +%define %%CIPHER_PLAIN_0_3 %5 ; [out] ZMM next 0-3 cipher blocks +%define %%CIPHER_PLAIN_4_7 %6 ; [out] ZMM next 4-7 cipher blocks +%define %%CIPHER_PLAIN_8_11 %7 ; [out] ZMM next 8-11 cipher blocks +%define %%CIPHER_PLAIN_12_15 %8 ; [out] ZMM next 12-15 cipher blocks +%define %%ZT1 %9 ; [clobbered] ZMM temporary +%define %%ZT2 %10 ; [clobbered] ZMM temporary +%define %%ZT3 %11 ; [clobbered] ZMM temporary +%define %%ZT4 %12 ; [clobbered] ZMM temporary +%define %%NROUNDS %13 ; [in] number of rounds; numerical value +%define %%IA0 %14 ; [clobbered] GP temporary + + vmovdqu8 %%CIPHER_PLAIN_0_3, [%%CIPH_IN] + vmovdqu8 %%CIPHER_PLAIN_4_7, [%%CIPH_IN + 64] + vmovdqu8 %%CIPHER_PLAIN_8_11, [%%CIPH_IN + 128] + vmovdqu8 %%CIPHER_PLAIN_12_15, [%%CIPH_IN + 192] + + ;; prepare first set of cipher blocks for later XOR'ing + valignq %%ZT1, %%CIPHER_PLAIN_0_3, %%LAST_CIPH_BLK, 6 + valignq %%ZT2, %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_0_3, 6 + valignq %%ZT3, %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_4_7, 6 + valignq %%ZT4, %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_8_11, 6 + + ;; store last cipher text block to be used for next 16 blocks + vmovdqa64 %%LAST_CIPH_BLK, %%CIPHER_PLAIN_12_15 + + ;; AES rounds +%assign j 0 +%rep (%%NROUNDS + 2) + ZMM_AESDEC_ROUND_BLOCKS_0_16 %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_4_7, \ + %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_12_15, \ + ZKEY %+ j, j, no_data, no_data, no_data, no_data, \ + 16, %%NROUNDS +%assign j (j + 1) +%endrep + + ;; XOR with decrypted blocks to get plain text + vpxorq %%CIPHER_PLAIN_0_3, %%CIPHER_PLAIN_0_3, %%ZT1 + vpxorq %%CIPHER_PLAIN_4_7, %%CIPHER_PLAIN_4_7, %%ZT2 + vpxorq %%CIPHER_PLAIN_8_11, %%CIPHER_PLAIN_8_11, %%ZT3 + vpxorq %%CIPHER_PLAIN_12_15, %%CIPHER_PLAIN_12_15, %%ZT4 + + ;; write plain text back to output + vmovdqu8 [%%PLAIN_OUT], %%CIPHER_PLAIN_0_3 + vmovdqu8 [%%PLAIN_OUT + 64], %%CIPHER_PLAIN_4_7 + vmovdqu8 [%%PLAIN_OUT + 128], %%CIPHER_PLAIN_8_11 + vmovdqu8 [%%PLAIN_OUT + 192], %%CIPHER_PLAIN_12_15 + + ;; adjust input pointer and length + sub %%LENGTH, (16 * 16) + add %%CIPH_IN, (16 * 16) + add %%PLAIN_OUT, (16 * 16) + +%endmacro ; DECRYPT_16_PARALLEL + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; AES_CBC_DEC macro decrypts given data. +;;; Flow: +;;; - Decrypt all blocks (multiple of 16) up to final 1-15 blocks +;;; - Decrypt final blocks (1-15 blocks) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro AES_CBC_DEC 7 +%define %%CIPH_IN %1 ;; [in] pointer to input buffer +%define %%PLAIN_OUT %2 ;; [in] pointer to output buffer +%define %%KEYS %3 ;; [in] pointer to expanded keys +%define %%IV %4 ;; [in] pointer to IV +%define %%LENGTH %5 ;; [in/out] GP register with length in bytes +%define %%NROUNDS %6 ;; [in] Number of AES rounds; numerical value +%define %%TMP %7 ;; [clobbered] GP register + + cmp %%LENGTH, 0 + je %%cbc_dec_done + + vinserti64x2 zIV, zIV, [%%IV], 3 + + ;; preload keys + LOAD_KEYS %%KEYS, %%NROUNDS + +%%decrypt_16_parallel: + cmp %%LENGTH, 256 + jb %%final_blocks + + DECRYPT_16_PARALLEL %%PLAIN_OUT, %%CIPH_IN, %%LENGTH, zIV, \ + zBLK_0_3, zBLK_4_7, zBLK_8_11, zBLK_12_15, \ + zTMP0, zTMP1, zTMP2, zTMP3, %%NROUNDS, %%TMP + jmp %%decrypt_16_parallel + +%%final_blocks: + ;; get num final blocks + shr %%LENGTH, 4 + and %%LENGTH, 0xf + je %%cbc_dec_done + + cmp %%LENGTH, 8 + je %%final_num_blocks_is_8 + jl %%final_blocks_is_1_7 + + ; Final blocks 9-15 + cmp %%LENGTH, 12 + je %%final_num_blocks_is_12 + jl %%final_blocks_is_9_11 + + ; Final blocks 13-15 + cmp %%LENGTH, 15 + je %%final_num_blocks_is_15 + cmp %%LENGTH, 14 + je %%final_num_blocks_is_14 + cmp %%LENGTH, 13 + je %%final_num_blocks_is_13 + +%%final_blocks_is_9_11: + cmp %%LENGTH, 11 + je %%final_num_blocks_is_11 + cmp %%LENGTH, 10 + je %%final_num_blocks_is_10 + cmp %%LENGTH, 9 + je %%final_num_blocks_is_9 + +%%final_blocks_is_1_7: + cmp %%LENGTH, 4 + je %%final_num_blocks_is_4 + jl %%final_blocks_is_1_3 + + ; Final blocks 5-7 + cmp %%LENGTH, 7 + je %%final_num_blocks_is_7 + cmp %%LENGTH, 6 + je %%final_num_blocks_is_6 + cmp %%LENGTH, 5 + je %%final_num_blocks_is_5 + +%%final_blocks_is_1_3: + cmp %%LENGTH, 3 + je %%final_num_blocks_is_3 + cmp %%LENGTH, 2 + je %%final_num_blocks_is_2 + jmp %%final_num_blocks_is_1 + + +%%final_num_blocks_is_15: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 15, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_14: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 14, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_13: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 13, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_12: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 12, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_11: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 11, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_10: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 10, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_9: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 9, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_8: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 8, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_7: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 7, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_6: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 6, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_5: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 5, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_4: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 4, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_3: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 3, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_2: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 2, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + jmp %%cbc_dec_done + +%%final_num_blocks_is_1: + FINAL_BLOCKS %%PLAIN_OUT, %%CIPH_IN, zIV, 1, zBLK_0_3, zBLK_4_7, \ + zBLK_8_11, zBLK_12_15, zTMP0, zTMP1, zTMP2, zTMP3, \ + %%TMP, %%NROUNDS + +%%cbc_dec_done: +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +section .text + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; aes_cbc_dec_128_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +MKGLOBAL(aes_cbc_dec_128_vaes_avx512,function,internal) +aes_cbc_dec_128_vaes_avx512: +%ifndef LINUX + mov num_bytes, [rsp + 8*5] +%endif + AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 9, tmp + + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; aes_cbc_dec_192_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +MKGLOBAL(aes_cbc_dec_192_vaes_avx512,function,internal) +aes_cbc_dec_192_vaes_avx512: +%ifndef LINUX + mov num_bytes, [rsp + 8*5] +%endif + AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 11, tmp + + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; aes_cbc_dec_256_vaes_avx512(void *in, void *IV, void *keys, void *out, UINT64 num_bytes) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +MKGLOBAL(aes_cbc_dec_256_vaes_avx512,function,internal) +aes_cbc_dec_256_vaes_avx512: +%ifndef LINUX + mov num_bytes, [rsp + 8*5] +%endif + AES_CBC_DEC p_in, p_out, p_keys, p_IV, num_bytes, 13, tmp + + ret + +%ifdef LINUX +section .note.GNU-stack noalloc noexec nowrite progbits +%endif + |