diff options
Diffstat (limited to '')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/sm3_mb/aarch64/sm3_mb_asimd_x1.S | 387 |
1 files changed, 387 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sm3_mb/aarch64/sm3_mb_asimd_x1.S b/src/crypto/isa-l/isa-l_crypto/sm3_mb/aarch64/sm3_mb_asimd_x1.S new file mode 100644 index 000000000..c7362de90 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sm3_mb/aarch64/sm3_mb_asimd_x1.S @@ -0,0 +1,387 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTmsgARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED msgARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED msgARRANTIES OF MERCHANTABILITY AND FITNESS FOR + dig_A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OmsgNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOmsgEVER CAUSED AND ON ANY + THEORY OF LIABILITY, msgHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERmsgISE) ARISING IN ANY msgAY OUT OF THE USE + OF THIS SOFTmsgARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8.2-a + .text + .align 2 + .p2align 3,,7 + +.macro declare_var_vector_reg name:req,reg:req + q\name\() .req q\reg + v\name\() .req v\reg + s\name\() .req s\reg +.endm + + job .req x0 + len .req x1 + data .req x2 + digest .req x0 + + msg0 .req w3 + msg1 .req w4 + msg2 .req w5 + msg3 .req w6 + msg4 .req w7 + + msg .req w9 + msgP .req w10 + SS1 .req w11 + SS2 .req w12 + TT1 .req w13 + TT2 .req w14 + Tj .req w15 + tmp0 .req w19 + tmp1 .req w20 + dig_A .req w21 + dig_B .req w22 + dig_C .req w23 + dig_D .req w24 + dig_E .req w25 + dig_F .req w26 + dig_G .req w27 + dig_H .req w28 + + declare_var_vector_reg dig0,0 + declare_var_vector_reg dig1,1 + declare_var_vector_reg dig0_bak,2 + declare_var_vector_reg dig1_bak,3 + declare_var_vector_reg vect_msg0,4 + declare_var_vector_reg vect_msg1,5 + declare_var_vector_reg vect_msg2,6 + declare_var_vector_reg vect_msg3,7 + + declare_var_vector_reg vect_msgP0,16 + declare_var_vector_reg vect_msgP1,17 + declare_var_vector_reg vect_msgP2,18 + + + + + + +// round 0-11 +.macro sm3_round_0 round:req + ldr msg, [sp,msg_off+4*\round\()] + ldr msgP,[sp,wp_off +4*\round\()] + add SS1,dig_E,Tj + ror TT1,dig_A,32-12 + add SS1,SS1,TT1 + ror SS1,SS1,32-7 //SS1 done + eor SS2,SS1,TT1 //SS2 done + eor TT1,dig_A,dig_B + eor TT2,dig_E,dig_F + add SS2,SS2,msgP + eor TT2,TT2,dig_G + add SS1,SS1,msg + eor TT1,TT1,dig_C + add SS2,SS2,dig_D + add SS1,SS1,dig_H + add TT1,TT1,SS2 + add TT2,TT2,SS1 + mov dig_D,dig_C + ror dig_C,dig_B,32-9 + mov dig_B,dig_A + mov dig_A,TT1 + eor TT1,TT2,TT2,ror (32-17) + mov dig_H,dig_G + ror dig_G,dig_F,32-19 + mov dig_F,dig_E + eor dig_E,TT1,TT2,ror(32-9) + ror Tj,Tj,(32-1) +.endm + +//round 12-15 +.macro sm3_round_12 round:req + ldr msg, [sp,msg_off+4*((\round\())%17)] + ldr msg0,[sp,msg_off+4*((\round\()+4 - 16)%17)] + ldr msg1,[sp,msg_off+4*((\round\()+4 - 9)%17)] + add SS1,dig_E,Tj + ror TT1,dig_A,32-12 + add SS1,SS1,TT1 + ror SS1,SS1,32-7 //SS1 done + eor SS2,SS1,TT1 //SS2 done + + eor msg0,msg0,msg1 + ldr msg2,[sp,msg_off+4*((\round\()+4 - 3)%17)] + eor TT1,dig_A,dig_B + eor TT2,dig_E,dig_F + add SS2,SS2,dig_D + eor TT2,TT2,dig_G + add SS1,SS1,msg + eor msg0,msg0,msg2,ror (32-15) + ldr msg3,[sp,msg_off+4*((\round\()+4 - 13)%17)] + ldr msg4,[sp,msg_off+4*((\round\()+4 - 6)%17)] + eor msg1,msg0,msg0,ror (32 -15) + eor TT1,TT1,dig_C + add TT1,TT1,SS2 + eor msg4,msg4,msg3, ror (32-7) + eor msg0,msg1,msg0, ror (32-23) + add SS1,SS1,dig_H + eor msg0,msg0,msg4 + add TT2,TT2,SS1 + mov dig_D,dig_C + str msg0,[sp,msg_off+4*((\round\()+4)%17)] + eor msgP,msg,msg0 + add TT1,TT1,msgP + ror dig_C,dig_B,32-9 + mov dig_B,dig_A + mov dig_A,TT1 + eor TT1,TT2,TT2,ror (32-17) + mov dig_H,dig_G + ror dig_G,dig_F,32-19 + mov dig_F,dig_E + eor dig_E,TT1,TT2,ror(32-9) + ror Tj,Tj,32-1 +.endm + +// round 16-62 +.macro sm3_round_16 round:req + ldr msg, [sp,msg_off+4*((\round\())%17)] + ldr msg0,[sp,msg_off+4*((\round\()+4 - 16)%17)] + ldr msg1,[sp,msg_off+4*((\round\()+4 - 9)%17)] + add SS1,dig_E,Tj + ror TT1,dig_A,32-12 + add SS1,SS1,TT1 + ror SS1,SS1,32-7 //SS1 done + eor SS2,SS1,TT1 //SS2 done + + eor msg0,msg0,msg1 + ldr msg2,[sp,msg_off+4*((\round\()+4 - 3)%17)] + orr TT1,dig_B,dig_C + and tmp0,dig_B,dig_C + + eor TT2,dig_F,dig_G + and TT1,TT1,dig_A + add SS2,SS2,dig_D + orr TT1,TT1,tmp0 + and TT2,TT2,dig_E + add SS1,SS1,msg + eor TT2,TT2,dig_G + + eor msg0,msg0,msg2,ror (32-15) + ldr msg3,[sp,msg_off+4*((\round\()+4 - 13)%17)] + ldr msg4,[sp,msg_off+4*((\round\()+4 - 6)%17)] + eor msg1,msg0,msg0,ror (32 -15) + add TT1,TT1,SS2 + eor msg4,msg4,msg3, ror (32-7) + eor msg0,msg1,msg0, ror (32-23) + add SS1,SS1,dig_H + eor msg0,msg0,msg4 + add TT2,TT2,SS1 + mov dig_D,dig_C + str msg0,[sp,msg_off+4*((\round\()+4)%17)] + eor msgP,msg,msg0 + add TT1,TT1,msgP + ror dig_C,dig_B,32-9 + mov dig_B,dig_A + mov dig_A,TT1 + eor TT1,TT2,TT2,ror (32-17) + mov dig_H,dig_G + ror dig_G,dig_F,32-19 + mov dig_F,dig_E + eor dig_E,TT1,TT2,ror(32-9) + ror Tj,Tj,32-1 +.endm + +//round 63 +.macro sm3_round_63 round:req + ldr msg, [sp,msg_off+4*((\round\())%17)] + ldr msg0,[sp,msg_off+4*((\round\()+4 - 16)%17)] + ldr msg1,[sp,msg_off+4*((\round\()+4 - 9)%17)] + add SS1,dig_E,Tj + ror TT1,dig_A,32-12 + add SS1,SS1,TT1 + ror SS1,SS1,32-7 //SS1 done + eor SS2,SS1,TT1 //SS2 done + eor msg0,msg0,msg1 + ldr msg2,[sp,msg_off+4*((\round\()+4 - 3)%17)] + orr TT1,dig_B,dig_C + and tmp0,dig_B,dig_C + eor TT2,dig_F,dig_G + and TT1,TT1,dig_A + add SS2,SS2,dig_D + orr TT1,TT1,tmp0 + and TT2,TT2,dig_E + add SS1,SS1,msg + eor TT2,TT2,dig_G + eor msg0,msg0,msg2,ror (32-15) + ldr msg3,[sp,msg_off+4*((\round\()+4 - 13)%17)] + ldr msg4,[sp,msg_off+4*((\round\()+4 - 6)%17)] + eor msg1,msg0,msg0,ror (32 -15) + add TT1,TT1,SS2 + eor msg4,msg4,msg3, ror (32-7) + eor msg0,msg1,msg0, ror (32-23) + add SS1,SS1,dig_H + eor msg0,msg0,msg4 + add TT2,TT2,SS1 + str msg0,[sp,msg_off+4*((\round\()+4)%17)] + eor msgP,msg,msg0 + add TT1,TT1,msgP + ins vdig0_bak.s[3],dig_C + ror dig_C,dig_B,32-9 + ins vdig0_bak.s[1],dig_A + ins vdig0_bak.s[0],TT1 + ins vdig0_bak.s[2],dig_C + eor TT1,TT2,TT2,ror (32-17) + ins vdig1_bak.s[3],dig_G + ror dig_G,dig_F,32-19 + ins vdig1_bak.s[1],dig_E + ins vdig1_bak.s[2],dig_G + eor dig_E,TT1,TT2,ror(32-9) + ins vdig1_bak.s[0],dig_E +.endm + + .set wp_off , 96 + .set msg_off, 96 + 12*4 +#define STACK_SIZE 224 + .global sm3_mb_asimd_x1 + .type sm3_mb_asimd_x1, %function +sm3_mb_asimd_x1: + stp x29,x30, [sp,-STACK_SIZE]! + cmp len,0 + ldr data,[job],64 + ldp qdig0,qdig1,[digest] + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + rev32 vdig0.16b,vdig0.16b + stp x23, x24, [sp, 48] + rev32 vdig1.16b,vdig1.16b + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] + ble .exit_func + +.start_loop: + + /** prepare first 12 round data **/ + ld1 {vvect_msg0.16b-vvect_msg3.16b},[data],64 + mov Tj, 17689 + umov dig_A,vdig0.s[0] + movk Tj, 0x79cc, lsl 16 + rev32 vvect_msg0.16b,vvect_msg0.16b + umov dig_B,vdig0.s[1] + rev32 vvect_msg1.16b,vvect_msg1.16b + umov dig_C,vdig0.s[2] + rev32 vvect_msg2.16b,vvect_msg2.16b + umov dig_D,vdig0.s[3] + rev32 vvect_msg3.16b,vvect_msg3.16b + umov dig_E,vdig1.s[0] + stp qvect_msg0,qvect_msg1,[sp,msg_off] + umov dig_F,vdig1.s[1] + stp qvect_msg2,qvect_msg3,[sp,msg_off+32] + umov dig_G,vdig1.s[2] + eor vvect_msgP0.16b,vvect_msg0.16b,vvect_msg1.16b + eor vvect_msgP1.16b,vvect_msg1.16b,vvect_msg2.16b + umov dig_H,vdig1.s[3] + stp qvect_msgP0,qvect_msgP1,[sp,wp_off] + eor vvect_msgP2.16b,vvect_msg2.16b,vvect_msg3.16b + str qvect_msgP2,[sp,wp_off+32] + + sm3_round_0 0 + sm3_round_0 1 + sm3_round_0 2 + sm3_round_0 3 + sm3_round_0 4 + sm3_round_0 5 + sm3_round_0 6 + sm3_round_0 7 + sm3_round_0 8 + sm3_round_0 9 + sm3_round_0 10 + sm3_round_0 11 + + sm3_round_12 12 + sm3_round_12 13 + sm3_round_12 14 + sm3_round_12 15 + mov Tj, 0x7a87 + movk Tj, 0x9d8a, lsl 16 + sm3_round_16 16 + sm3_round_16 17 + sm3_round_16 18 + sm3_round_16 19 + sm3_round_16 20 + sm3_round_16 21 + sm3_round_16 22 + sm3_round_16 23 + sm3_round_16 24 + sm3_round_16 25 + sm3_round_16 26 + sm3_round_16 27 + sm3_round_16 28 + sm3_round_16 29 + sm3_round_16 30 + sm3_round_16 31 + sm3_round_16 32 + sm3_round_16 33 + sm3_round_16 34 + sm3_round_16 35 + sm3_round_16 36 + sm3_round_16 37 + sm3_round_16 38 + sm3_round_16 39 + sm3_round_16 40 + sm3_round_16 41 + sm3_round_16 42 + sm3_round_16 43 + sm3_round_16 44 + sm3_round_16 45 + sm3_round_16 46 + sm3_round_16 47 + sm3_round_16 48 + sm3_round_16 49 + sm3_round_16 50 + sm3_round_16 51 + sm3_round_16 52 + sm3_round_16 53 + sm3_round_16 54 + sm3_round_16 55 + sm3_round_16 56 + sm3_round_16 57 + sm3_round_16 58 + sm3_round_16 59 + sm3_round_16 60 + sm3_round_16 61 + sm3_round_16 62 + sm3_round_63 63 + subs len,len,1 + eor vdig0.16b,vdig0.16b,vdig0_bak.16b + eor vdig1.16b,vdig1.16b,vdig1_bak.16b + bne .start_loop +.exit_func: + ldp x19, x20, [sp, 16] + rev32 vdig0.16b,vdig0.16b + ldp x21, x22, [sp, 32] + rev32 vdig1.16b,vdig1.16b + ldp x23, x24, [sp, 48] + stp qdig0,qdig1,[digest] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + ldp x29, x30, [sp], STACK_SIZE + ret + .size sm3_mb_asimd_x1, .-sm3_mb_asimd_x1 + |