diff options
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S new file mode 100644 index 000000000..012b15c14 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S @@ -0,0 +1,192 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +#include "sha1_asimd_common.S" + +.macro internal_load windex + // load 64-bytes from each address to maximize usage of cache line + .if \windex == 0 + mov tmp,dataptr + ld1 {WORD0.4s},[data0],16 + ld1 {WORD4.4s},[data0],16 + ld1 {WORD8.4s},[data0],16 + ld1 {WORD12.4s},[data0],16 + + ld1 {WORD1.4s},[data1],16 + ld1 {WORD5.4s},[data1],16 + ld1 {WORD9.4s},[data1],16 + ld1 {WORD13.4s},[data1],16 + + ld1 {WORD2.4s},[data2],16 + ld1 {WORD6.4s},[data2],16 + ld1 {WORD10.4s},[data2],16 + ld1 {WORD14.4s},[data2],16 + + ld1 {WORD3.4s},[data3],16 + ld1 {WORD7.4s},[data3],16 + ld1 {WORD11.4s},[data3],16 + ld1 {WORD15.4s},[data3],16 + + st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[0],[tmp],16 + st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[1],[tmp],16 + st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[2],[tmp],16 + st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[3],[tmp],16 + .endif + + .if \windex == 4 + mov tmp,dataptr + st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[0],[tmp],16 + st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[1],[tmp],16 + st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[2],[tmp],16 + st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[3],[tmp],16 + .endif + + .if \windex == 8 + mov tmp,dataptr + st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[0],[tmp],16 + st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[1],[tmp],16 + st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[2],[tmp],16 + st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[3],[tmp],16 + .endif + + .if \windex == 12 + mov tmp,dataptr + st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[0],[tmp],16 + st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[1],[tmp],16 + st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[2],[tmp],16 + st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[3],[tmp],16 + .endif +.endm + +.macro load_x4_word idx:req + internal_load \idx + ld1 {WORD\idx\().16b},[dataptr],16 +.endm + +/* + * void sha1_mb_asimd_x4(SHA1_JOB *j0, SHA1_JOB*j1, SHA1_JOB*j2, SHA1_JOB *j3, int blocks) + */ + job0 .req x0 + job1 .req x1 + job2 .req x2 + job3 .req x3 + num_blocks .req w4 + tmp .req x5 + data0 .req x6 + data1 .req x7 + data2 .req x8 + data3 .req x9 + databuf .req x10 + dataptr .req x11 + savedsp .req x12 + + .global sha1_mb_asimd_x4 + .type sha1_mb_asimd_x4, %function +sha1_mb_asimd_x4: + cmp num_blocks, #0 + beq .return + sha1_asimd_save_stack + mov savedsp,sp + sub databuf,sp,256 + mov tmp,63 + bic databuf,databuf,tmp + mov sp,databuf + + add tmp,job0,64 + ld4 {VA.s,VB.s,VC.s,VD.s}[0],[tmp],#16 + ld1 {VE.s}[0],[tmp] + ldr data0,[job0] + + add tmp,job1,64 + ld4 {VA.s,VB.s,VC.s,VD.s}[1],[tmp],#16 + ld1 {VE.s}[1],[tmp] + ldr data1,[job1] + + add tmp,job2,64 + ld4 {VA.s,VB.s,VC.s,VD.s}[2],[tmp],#16 + ld1 {VE.s}[2],[tmp] + ldr data2,[job2] + + add tmp,job3,64 + ld4 {VA.s,VB.s,VC.s,VD.s}[3],[tmp],#16 + ld1 {VE.s}[3],[tmp] + ldr data3,[job3] + +.block_loop: + mov dataptr,databuf + sha1_single + subs num_blocks, num_blocks, 1 + bne .block_loop + + add tmp,job0,64 + st4 {VA.s,VB.s,VC.s,VD.s}[0],[tmp],#16 + st1 {VE.s}[0],[tmp] + + add tmp,job1,64 + st4 {VA.s,VB.s,VC.s,VD.s}[1],[tmp],#16 + st1 {VE.s}[1],[tmp] + + add tmp,job2,64 + st4 {VA.s,VB.s,VC.s,VD.s}[2],[tmp],#16 + st1 {VE.s}[2],[tmp] + + add tmp,job3,64 + st4 {VA.s,VB.s,VC.s,VD.s}[3],[tmp],#16 + st1 {VE.s}[3],[tmp] + + mov sp,savedsp + sha1_asimd_restore_stack +.return: + ret + + .size sha1_mb_asimd_x4, .-sha1_mb_asimd_x4 + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 |