diff options
Diffstat (limited to '')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/aes/aarch64/cbc_dec_aes.S | 482 |
1 files changed, 482 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/aes/aarch64/cbc_dec_aes.S b/src/crypto/isa-l/isa-l_crypto/aes/aarch64/cbc_dec_aes.S new file mode 100644 index 000000000..11bd90a71 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/aes/aarch64/cbc_dec_aes.S @@ -0,0 +1,482 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text +#include "cbc_common.S" + .altmacro +.macro _aes_decrypt_round block:req,key:req + aesd v\block\().16b,vKey\key\().16b + .if \key < 13 + aesimc v\block\().16b,v\block\().16b + .endif + .if \key > 13 + .error "erro her" + .endif +.endm + +.macro aes_decrypt_round block,reg,key + _aes_decrypt_round In\reg\()_\block,\key +.endm + +.macro load_keys first_key + .if \first_key == 4 + ld1 {vKey4.4s -vKey6.4s},[keys],3*16 + .endif + .ifc 2 , \first_key + ldr qKey2,[keys],1*16 + ld1 {vKey3.16b -vKey6.16b},[keys],4*16 + .endif + .ifc 0 , \first_key + ld1 {vKey0.16b -vKey2.16b},[keys],3*16 + ld1 {vKey3.16b -vKey6.16b},[keys],4*16 + .endif + ld1 {vKey7.16b -vKey10.16b},[keys],4*16 + ld1 {vKey11.16b-vKey14.16b},[keys],4*16 +.endm + +.macro aes_decrypt_blocks_round blocks,key_idx,key_reg,next_keyreg,first_idx + .if \key_idx == 12 + ldr q\next_keyreg,[keys],(\first_idx-13)*16 + .else + ldr q\next_keyreg,[keys],16 + .endif + n=0 + .rept \blocks + _aes_decrypt_round %n,\key_reg + n=n+1 + .endr +.endm + +.macro aes_decrypt_rounds blocks,key_st,key_end,first_idx + j=key_st + .rept \key_end - \key_st + 1 + aes_decrypt_blocks_round \blocks,%j,%(j%2),%((j+1)%2),\first_idx + j=j+1 + .endr +.endm + +.macro aes_cbc_decrypt_rounds blocks,first_idx,reg,next_reg + aes_decrypt_rounds \blocks,\first_idx,12,\first_idx +.endm + +.macro declare_prefix idx,reg,prefix + declare_var_vector_reg \prefix\()\idx,\reg +.endm + +.macro mldr reg,block,addr + ldr qIn\reg\()_\block,[\addr],16 +.endm + +.macro mldrin reg,blocks,addr + .if \blocks == 1 + ldr qIn\reg\()_0,[\addr],16 + .exitm + .endif + .if \blocks == 2 + ldp qIn\reg\()_0,qIn\reg\()_1,[\addr],2*16 + .exitm + .endif + .if \blocks == 3 + ldr qIn\reg\()_0,[\addr],16 + ldp qIn\reg\()_1,qIn\reg\()_2,[\addr],2*16 + .exitm + .endif + .if \blocks == 4 + ld1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 + .exitm + .endif + .if \blocks == 5 + ldr qIn\reg\()_0,[\addr],16 + ld1 {vIn\reg\()_1.16b-vIn\reg\()_4.16b},[\addr],4*16 + .exitm + .endif + .if \blocks == 6 + ldp qIn\reg\()_0,qIn\reg\()_1,[\addr],2*16 + ld1 {vIn\reg\()_2.16b-vIn\reg\()_5.16b},[\addr],4*16 + .exitm + .endif + .if \blocks == 7 + ld1 {vIn\reg\()_0.16b-vIn\reg\()_2.16b},[\addr],3*16 + ld1 {vIn\reg\()_3.16b-vIn\reg\()_6.16b},[\addr],4*16 + .exitm + .endif + + .if \blocks == 8 + ld1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 + ld1 {vIn\reg\()_4.16b-vIn\reg\()_7.16b},[\addr],4*16 + .exitm + .endif + .if \blocks == 9 + ld1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 + ld1 {vIn\reg\()_4.16b-vIn\reg\()_7.16b},[\addr],4*16 + ldr qIn\reg\()_8,[\addr],16 + .exitm + .endif +.endm + +.macro mstrout reg,blocks,addr + .if \blocks == 1 + str qIn\reg\()_0,[\addr],16 + .exitm + .endif + .if \blocks == 2 + stp qIn\reg\()_0,qIn\reg\()_1,[\addr],2*16 + .exitm + .endif + .if \blocks == 3 + str qIn\reg\()_0,[\addr],16 + stp qIn\reg\()_1,qIn\reg\()_2,[\addr],2*16 + .exitm + .endif + .if \blocks == 4 + st1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 + .exitm + .endif + .if \blocks == 5 + str qIn\reg\()_0,[\addr],16 + st1 {vIn\reg\()_1.16b-vIn\reg\()_4.16b},[\addr],4*16 + .exitm + .endif + .if \blocks == 6 + stp qIn\reg\()_0,qIn\reg\()_1,[\addr],2*16 + st1 {vIn\reg\()_2.16b-vIn\reg\()_5.16b},[\addr],4*16 + .exitm + .endif + .if \blocks == 7 + st1 {vIn\reg\()_0.16b-vIn\reg\()_2.16b},[\addr],3*16 + st1 {vIn\reg\()_3.16b-vIn\reg\()_6.16b},[\addr],4*16 + .exitm + .endif + + .if \blocks == 8 + st1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 + st1 {vIn\reg\()_4.16b-vIn\reg\()_7.16b},[\addr],4*16 + .exitm + .endif + .if \blocks == 9 + st1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 + st1 {vIn\reg\()_4.16b-vIn\reg\()_7.16b},[\addr],4*16 + str qIn\reg\()_8,[\addr],16 + .exitm + .endif +.endm + +.macro eorkey14 block,reg + eor vBlock\block\().16b,vKey14.16b,vState\reg\()_\block\().16b +.endm + +.macro eorblock block,reg + eor vIn\reg\()_\block\().16b,vBlock\block\().16b,vIn\reg\()_\block\().16b +.endm + +.macro movstate0 block,reg + mov vState\reg\()_0.16b,vIn\reg\()_\block\().16b +.endm + +.macro cbc_decrypt_rounds blocks,reg,first_key,cur_blocks + .ifb \cur_blocks + _blocks=\blocks + .else + _blocks=\cur_blocks + .endif + key=\first_key + 1 + .if 3*\blocks+1 >= 32-15+\first_key + ldr_key %key,\first_key + .endif + n=0 + .rept _blocks - 1 + eorkey14 %((n+1)%_blocks),\reg + aes_decrypt_round %n,\reg,\first_key + n=n+1 + .endr + eorkey14 0,\reg + movstate0 %(_blocks-1),\reg + aes_decrypt_round %n,\reg,\first_key + + k=0 + .rept 15-\first_key-3 + n=0 + .if 3*\blocks+1 >= 32-15+\first_key + ldr_key %(key+k+1),\first_key + .endif + + .rept _blocks + aes_decrypt_round %n,\reg,%(key+k) + n=n+1 + .endr + k=k+1 + .endr + n=0 + .if 3*\blocks+1 >= 32-15+\first_key + ldr_key \first_key,\first_key + .endif + .rept _blocks + aes_decrypt_round %n,\reg,13 + eorblock %n,\reg + n=n+1 + .endr +.endm + +.macro print_macro a,b,c,d,e + .print "print_macro,\a \b \c \d \e" +.endm + +.macro remainder_process blocks,first_key,curblk +.if \blocks > (1<<\curblk) + tbz xlen_remainder,\curblk,1f + mldrin 0,%(1<<\curblk),in + cbc_decrypt_rounds \blocks,0,\first_key,%(1<<\curblk) + mstrout 0,%(1<<\curblk),out +1: +.endif +.endm + +.macro aes_cbc_decrypt_blocks first_key,blocks + division \blocks, len_bytes,len_remainder,tmp0,tmp1 + mov xlen_quotient_in,xlen_quotient + /* + input regs(2*\block) + tmp regs(\blocks) + State reg(1) + + key regs(15-\first_key) < 32 + */ + .if 3*\blocks+1 < 32-15+\first_key + n=\first_key + .rept 15-\first_key + declare_prefix %n,%(n+17),Key + n=n+1 + .endr + load_keys \first_key + .else + n=\first_key + .rept 14-\first_key + declare_prefix %n,%((n%2)+29),Key + n=n+1 + .endr + declare_prefix 14,31,Key + /* load first key */ + ldr_key \first_key,\first_key + /* load last key */ + ldr_key 14,\first_key + .endif + m=\blocks + l=\blocks-1 + declare_prefix 0,0,State0_ + declare_prefix 0,0,State1_ + n=0 + .rept \blocks + declare_prefix %n,%(n+1),In0_ + declare_prefix %n,%(n+m+1),In1_ + declare_prefix %n,%(n+2*m+1),Block + n=n+1 + .endr + n=1 + .rept \blocks -1 + declare_prefix %n,%(n),State0_ + declare_prefix %n,%(n+m),State1_ + n=n+1 + .endr + ldr qState0_0,[IV] + cbz xlen_quotient,9f + mldrin 0,\blocks,in + sub xlen_quotient_in,xlen_quotient_in,1 + b 5f + +3: + sub xlen_quotient,xlen_quotient,1 + mstrout 1,\blocks,out + cbz xlen_quotient,9f +5: + cbz xlen_quotient_in,1f + mldrin 1,\blocks,in + sub xlen_quotient_in,xlen_quotient_in,1 +1: + cbc_decrypt_rounds \blocks,0,\first_key + sub xlen_quotient,xlen_quotient,1 + mstrout 0,\blocks,out + cbz xlen_quotient,9f + + cbz xlen_quotient_in,1f + mldrin 0,\blocks,in + sub xlen_quotient_in,xlen_quotient_in,1 +1: + cbc_decrypt_rounds \blocks,1,\first_key + b 3b +9: + remainder_process \blocks,\first_key,3 + remainder_process \blocks,\first_key,2 + remainder_process \blocks,\first_key,1 + remainder_process \blocks,\first_key,0 +.endm + + +.macro division blocks,quotient,remainder,tmp0,tmp1 + .if \blocks == 1 + mov x\remainder, 0 + .exitm + .endif + .if \blocks == 2 + and x\remainder, x\quotient, 1 + lsr x\quotient, x\quotient, 1 + .exitm + .endif + .if \blocks == 3 + mov x\tmp0, -6148914691236517206 + mov x\remainder, x\quotient + movk x\tmp0, 0xaaab, lsl 0 + umulh x\tmp0, x\quotient, x\tmp0 + and x\tmp1, x\tmp0, -2 + lsr x\quotient, x\tmp0, 1 + add x\tmp1, x\tmp1, x\quotient + sub x\remainder, x\remainder, x\tmp1 + .exitm + .endif + .if \blocks == 4 + and x\remainder, x\quotient, 3 + lsr x\quotient, x\quotient, 2 + .exitm + .endif + .if \blocks == 5 + mov x\tmp0, -3689348814741910324 + mov x\remainder, x\quotient + movk x\tmp0, 0xcccd, lsl 0 + umulh x\tmp0, x\quotient, x\tmp0 + and x\tmp1, x\tmp0, -4 + lsr x\quotient, x\tmp0, 2 + add x\tmp1, x\tmp1, x\quotient + sub x\remainder, x\remainder, x\tmp1 + .exitm + .endif + .if \blocks == 6 + mov x\tmp0, -6148914691236517206 + mov x\tmp1, x\quotient + movk x\tmp0, 0xaaab, lsl 0 + umulh x\tmp0, x\quotient, x\tmp0 + lsr x\quotient, x\tmp0, 2 + add x\remainder, x\quotient, x\quotient, lsl 1 + sub x\remainder, x\tmp1, x\remainder, lsl 1 + .exitm + .endif + .if \blocks == 7 + mov x\tmp0, 9363 + mov x\tmp1, x\quotient + movk x\tmp0, 0x9249, lsl 16 + movk x\tmp0, 0x4924, lsl 32 + movk x\tmp0, 0x2492, lsl 48 + umulh x\quotient, x\quotient, x\tmp0 + sub x\tmp0, x\tmp1, x\quotient + add x\tmp0, x\quotient, x\tmp0, lsr 1 + lsr x\quotient, x\tmp0, 2 + lsl x\remainder, x\quotient, 3 + sub x\remainder, x\remainder, x\quotient + sub x\remainder, x\tmp1, x\remainder + .exitm + .endif + .if \blocks == 8 + and x\remainder, x\quotient, 7 + lsr x\quotient, x\quotient, 3 + .exitm + .endif + .if \blocks == 9 + mov x\tmp0, 58255 + mov x\remainder, x\quotient + movk x\tmp0, 0x8e38, lsl 16 + movk x\tmp0, 0x38e3, lsl 32 + movk x\tmp0, 0xe38e, lsl 48 + umulh x\tmp0, x\quotient, x\tmp0 + and x\tmp1, x\tmp0, -8 + lsr x\quotient, x\tmp0, 3 + add x\tmp1, x\tmp1, x\quotient + sub x\remainder, x\remainder, x\tmp1 + .exitm + .endif +.endm + +.macro ldr_key num,first_key + ldr qKey\num,[keys,16*(\num - \first_key)] +.endm +#ifndef CBC_DECRYPT_BLOCKS_NUM +#define CBC_DECRYPT_BLOCKS_NUM 8 +#endif + +.macro cbc_decrypt first_key:req,blocks + lsr xlen_bytes,xlen_bytes,4 + cbz xlen_bytes,10f + push_stack + aes_cbc_decrypt_blocks \first_key,\blocks + pop_stack +10: +.endm + +.set stack_size,64 +.macro push_stack + stp d8, d9,[sp,-stack_size]! + stp d10,d11,[sp,16] + stp d12,d13,[sp,32] + stp d14,d15,[sp,48] +.endm + +.macro pop_stack + ldp d10,d11,[sp,16] + ldp d12,d13,[sp,32] + ldp d14,d15,[sp,48] + ldp d8, d9, [sp], stack_size +.endm + +/* +void aes_cbc_dec_128( + void *in, //!< Input cipher text + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data + void *out, //!< Output plain text + uint64_t len_bytes //!< Must be a multiple of 16 bytes + ); +*/ + declare_var_generic_reg in ,0 + declare_var_generic_reg IV ,1 + declare_var_generic_reg keys ,2 + declare_var_generic_reg out ,3 + declare_var_generic_reg len_bytes ,4 + declare_var_generic_reg len_quotient,4 + declare_var_generic_reg len_remainder,5 + declare_var_generic_reg tmp0 ,6 + declare_var_generic_reg tmp1 ,7 + declare_var_generic_reg len_quotient_in,6 + +.macro define_aes_cbc_dec_func mode:req,blocks:req + .global aes_cbc_dec_\mode\()_aes_\blocks +aes_cbc_dec_\mode\()_aes_\blocks: + cbc_decrypt %((256-mode)/32),\blocks + ret + .size aes_cbc_dec_\mode\()_aes_\blocks, . - aes_cbc_dec_\mode\()_aes_\blocks +.endm + +.irp blocks,1,2,3,4,5,6,7,8,9 + define_aes_cbc_dec_func 128,\blocks + define_aes_cbc_dec_func 192,\blocks + define_aes_cbc_dec_func 256,\blocks +.endr |