diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/crypto/isa-l/isa-l_crypto/aes/cbc_dec_256_x4_sse.asm | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/aes/cbc_dec_256_x4_sse.asm | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/aes/cbc_dec_256_x4_sse.asm b/src/crypto/isa-l/isa-l_crypto/aes/cbc_dec_256_x4_sse.asm new file mode 100644 index 000000000..b6c081ffb --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/aes/cbc_dec_256_x4_sse.asm @@ -0,0 +1,160 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; routine to do AES cbc decrypt on 16n bytes doing AES +; XMM registers are clobbered. Saving/restoring must be done at a higher level + +; void aes_cbc_dec_256_sse(void *in, +; uint8_t *IV, +; uint8_t keys, +; void *out, +; uint64_t len_bytes); +; +; arg 1: rcx: pointer to input (cipher text) +; arg 2: rdx: pointer to IV +; arg 3: r8: pointer to keys +; arg 4: r9: pointer to output (plain text) +; arg 5: sp: length in bytes (multiple of 16) +; + +%include "reg_sizes.asm" + +%define MOVDQ movdqu + +%ifidn __OUTPUT_FORMAT__, elf64 +%define IN rdi +%define IV rsi +%define KEYS rdx +%define OUT rcx +%define LEN r8 +%define func(x) x: +%define FUNC_SAVE +%define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 +%define IN rcx +%define IV rdx +%define KEYS r8 +%define OUT r9 +%define LEN r10 +%define PS 8 +%define stack_size 10*16 + 1*8 ; must be an odd multiple of 8 +%define arg(x) [rsp + stack_size + PS + PS*x] + +%define func(x) proc_frame x +%macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + end_prolog + mov LEN, arg(4) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + add rsp, stack_size +%endmacro +%endif + +; configuration paramaters for AES-CBC +%define KEY_ROUNDS 15 +%define XMM_USAGE (16) +%define EARLY_BLOCKS (4) +%define PARALLEL_BLOCKS (11) +%define IV_CNT (1) + +; instruction set specific operation definitions +%define MOVDQ movdqu +%define PXOR pxor +%define AES_DEC aesdec +%define AES_DEC_LAST aesdeclast + +%include "cbc_common.asm" + +global aes_cbc_dec_256_sse:function +func(aes_cbc_dec_256_sse) + FUNC_SAVE + + FILL_KEY_CACHE CKEY_CNT, FIRST_CKEY, KEYS, MOVDQ + + MOVDQ reg(IV_IDX), [IV] ; Load IV for next round of block decrypt + mov IDX, 0 + cmp LEN, PARALLEL_BLOCKS*16 + jge main_loop ; if enough data blocks remain enter main_loop + jmp partials + +main_loop: + CBC_DECRYPT_BLOCKS KEY_ROUNDS, PARALLEL_BLOCKS, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN + cmp LEN, PARALLEL_BLOCKS*16 + jge main_loop ; enough blocks to do another full parallel set + jz done + +partials: ; fewer than 'PARALLEL_BLOCKS' left do in groups of 4, 2 or 1 + cmp LEN, 0 + je done + cmp LEN, 4*16 + jge initial_4 + cmp LEN, 2*16 + jge initial_2 + +initial_1: + CBC_DECRYPT_BLOCKS KEY_ROUNDS, 1, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN + jmp done + +initial_2: + CBC_DECRYPT_BLOCKS KEY_ROUNDS, 2, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN + jz done + jmp partials + +initial_4: + CBC_DECRYPT_BLOCKS KEY_ROUNDS, 4, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN + jnz partials +done: + FUNC_RESTORE + ret + +endproc_frame |