diff options
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64')
11 files changed, 2256 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S new file mode 100644 index 000000000..55d6f932f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S @@ -0,0 +1,294 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + + input_data .req x0 + num_blocks .req w1 + digest .req x2 + + // x2 is reused intentionally between digest/tmp + // due to running out of registers + TMP .req x2 + TMPW .req w2 + sha1key_adr .req x3 + WK .req w3 + WF .req w4 + WA .req w5 + WB .req w6 + WC .req w7 + WD .req w8 + WE .req w9 + WORD0 .req w10 + WORD1 .req w11 + WORD2 .req w12 + WORD3 .req w13 + WORD4 .req w14 + WORD5 .req w15 + WORD6 .req w16 + WORD7 .req w17 + WORD8 .req w18 + WORD9 .req w19 + WORD10 .req w20 + WORD11 .req w21 + WORD12 .req w22 + WORD13 .req w23 + WORD14 .req w24 + WORD15 .req w25 + AA .req w26 + BB .req w27 + CC .req w28 + DD .req w29 + EE .req w30 + + TT .req w0 + +.macro save_stack + stp x16,x17,[sp, -128]! + stp x18,x19,[sp, 16] + stp x20,x21,[sp, 32] + stp x22,x23,[sp, 48] + stp x24,x25,[sp, 64] + stp x26,x27,[sp, 80] + stp x28,x29,[sp, 96] + str x30,[sp, 112] + // have to reuse x2, which is digest address + str x2,[sp, 120] +.endm + +.macro restore_stack + ldp x18,x19,[sp, 16] + ldp x20,x21,[sp, 32] + ldp x22,x23,[sp, 48] + ldp x24,x25,[sp, 64] + ldp x26,x27,[sp, 80] + ldp x28,x29,[sp, 96] + ldr x30,[sp, 112] + ldr x2,[sp, 120] + ldp x16,x17,[sp],128 +.endm +// macro F = (D ^ (B & (C ^ D))) +.macro FUNC_F0 + eor WF, WC, WD + and WF, WB, WF + eor WF, WD, WF +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F1 + eor WF, WB, WC + eor WF, WF, WD +.endm + +// F = ((B & C) | (B & D) | (C & D)) +.macro FUNC_F2 + and TMPW, WB, WC + and WF, WB, WD + orr WF, WF, TMPW + and TMPW, WC, WD + orr WF, WF, TMPW +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F3 + FUNC_F1 +.endm + +.altmacro +.macro load_next_word windex + .if \windex < 16 + load_word_at \windex + .endif +.endm + +.macro SHA1_STEP_00_15 windex:req + rev WORD\windex\(),WORD\windex\() + next_word=\windex+1 + load_next_word %next_word + + ror TMPW,WA,#32-5 + add WE,WE,TMPW + add WE,WE,WK + FUNC_F0 + ror WB,WB,#32-30 + add WE,WE,WORD\windex\() + add WE,WE,WF +.endm + +.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req + eor TMPW,\reg_14,\reg_8 + eor \reg_16,\reg_16,\reg_3 + eor \reg_16,\reg_16,TMPW + + ror TMPW,WA,#32-5 + ror \reg_16,\reg_16, #32 - 1 + + add WE,WE,TMPW + add WE,WE,WK + \func_f + ror WB,WB,#32-30 + add WE,WE,\reg_16 + add WE,WE,WF +.endm + +.macro SWAP_STATES + .unreq TT + TT .req WE + .unreq WE + WE .req WD + .unreq WD + WD .req WC + .unreq WC + WC .req WB + .unreq WB + WB .req WA + .unreq WA + WA .req TT +.endm + +.altmacro +.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req + SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\() +.endm + +.macro exec_step windex:req + .if \windex <= 15 + SHA1_STEP_00_15 windex + .else + idx14=((\windex - 14) & 15) + idx8=((\windex - 8) & 15) + idx3=((\windex - 3) & 15) + idx16=(\windex & 15) + .if \windex <= 19 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 20 && \windex <= 39 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 40 && \windex <= 59 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 60 && \windex <= 79 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16 + .endif + .endif + + SWAP_STATES +.endm + +.macro exec_steps idx:req,more:vararg + exec_step \idx + .ifnb \more + exec_steps \more + .endif +.endm + +.altmacro + +.macro load_two_words_at idx0:req,idx1:req + ldp WORD\idx0\(),WORD\idx1\(),[input_data],8 +.endm + +.macro load_word_at idx:req + .if \idx % 2 == 0 + idx1=\idx+1 + load_two_words_at \idx,%idx1 + .endif +.endm + +/* + * void sha1_aarch64_x1(uint32_t *input_data, int num_blocks, uint32_t digest[5]) + */ + .global sha1_aarch64_x1 + .type sha1_aarch64_x1, %function +sha1_aarch64_x1: + cmp num_blocks, #0 + beq .return + + ldp WA,WB,[digest] + ldp WC,WD,[digest,8] + ldr WE,[digest,16] + save_stack + +.block_loop: + mov AA, WA + mov BB, WB + mov CC, WC + mov DD, WD + mov EE, WE + + load_word_at 0 + + adr sha1key_adr, KEY_0 + ldr WK, [sha1key_adr] + exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 + + // 20 ~ 39 + adr sha1key_adr, KEY_1 + ldr WK, [sha1key_adr] + exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 + + // 40 ~ 59 + adr sha1key_adr, KEY_2 + ldr WK, [sha1key_adr] + exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 + + // 60 ~ 79 + adr sha1key_adr, KEY_3 + ldr WK, [sha1key_adr] + exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 + + add WA, AA, WA + add WB, BB, WB + add WC, CC, WC + add WD, DD, WD + add WE, EE, WE + + subs num_blocks, num_blocks, 1 + bne .block_loop + + restore_stack + stp WA,WB,[digest] + stp WC,WD,[digest,8] + str WE,[digest,16] + +.return: + ret + + .size sha1_aarch64_x1, .-sha1_aarch64_x1 + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +KEY_0: + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S new file mode 100644 index 000000000..c8b8dd982 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S @@ -0,0 +1,269 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +// macro F = (D ^ (B & (C ^ D))) +.macro FUNC_F0 + eor VF.16b, VC.16b, VD.16b + and VF.16b, VB.16b, VF.16b + eor VF.16b, VD.16b, VF.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F1 + eor VF.16b, VB.16b, VC.16b + eor VF.16b, VF.16b, VD.16b +.endm + +// F = ((B & C) | (B & D) | (C & D)) +.macro FUNC_F2 + and vT0.16b, VB.16b, VC.16b + and vT1.16b, VB.16b, VD.16b + and vT2.16b, VC.16b, VD.16b + orr VF.16b, vT0.16b, vT1.16b + orr VF.16b, VF.16b, vT2.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F3 + FUNC_F1 +.endm + +.altmacro +.macro load_next_word windex + .if \windex < 16 + load_x4_word \windex + .endif +.endm + +// FUNC_F0 is merged into STEP_00_15 for efficiency +.macro SHA1_STEP_00_15_F0 windex:req + rev32 WORD\windex\().16b,WORD\windex\().16b + next_word=\windex+1 + load_next_word %next_word + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, VA.4s, 32 - 5 + add VE.4s, VE.4s, VK.4s + sli VT.4s, VA.4s, 5 + eor VF.16b, VC.16b, VD.16b + add VE.4s, VE.4s, WORD\windex\().4s + and VF.16b, VB.16b, VF.16b + add VE.4s, VE.4s, VT.4s + eor VF.16b, VD.16b, VF.16b + ushr VT.4s, VB.4s, 32 - 30 + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + +.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req + eor vT0.16b,\reg_3\().16b,\reg_8\().16b + eor VT.16b,\reg_14\().16b,\reg_16\().16b + eor vT0.16b,vT0.16b,VT.16b + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, vT0.4s, 32 - 1 + add VE.4s, VE.4s, VK.4s + ushr vT1.4s, VA.4s, 32 - 5 + sli VT.4s, vT0.4s, 1 + add VE.4s, VE.4s, VT.4s + sli vT1.4s, VA.4s, 5 + mov \reg_16\().16b,VT.16b + add VE.4s, VE.4s, vT1.4s + ushr VT.4s, VB.4s, 32 - 30 + \func_f + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + + VA .req v0 + VB .req v1 + VC .req v2 + VD .req v3 + VE .req v4 + VT .req v5 + VF .req v6 + VK .req v7 + WORD0 .req v8 + WORD1 .req v9 + WORD2 .req v10 + WORD3 .req v11 + WORD4 .req v12 + WORD5 .req v13 + WORD6 .req v14 + WORD7 .req v15 + WORD8 .req v16 + WORD9 .req v17 + WORD10 .req v18 + WORD11 .req v19 + WORD12 .req v20 + WORD13 .req v21 + WORD14 .req v22 + WORD15 .req v23 + vT0 .req v24 + vT1 .req v25 + vT2 .req v26 + vAA .req v27 + vBB .req v28 + vCC .req v29 + vDD .req v30 + vEE .req v31 + TT .req v0 + sha1key_adr .req x15 + +.macro SWAP_STATES + // shifted VB is held in VT after each step + .unreq TT + TT .req VE + .unreq VE + VE .req VD + .unreq VD + VD .req VC + .unreq VC + VC .req VT + .unreq VT + VT .req VB + .unreq VB + VB .req VA + .unreq VA + VA .req TT +.endm + +.altmacro +.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req + SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\() +.endm + +.macro exec_step windex:req + .if \windex <= 15 + SHA1_STEP_00_15_F0 windex + .else + idx14=((\windex - 14) & 15) + idx8=((\windex - 8) & 15) + idx3=((\windex - 3) & 15) + idx16=(\windex & 15) + .if \windex <= 19 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 20 && \windex <= 39 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 40 && \windex <= 59 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 60 && \windex <= 79 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16 + .endif + .endif + + SWAP_STATES + + .if \windex == 79 + // after 80 steps, the registers ABCDET has shifted from + // its orignal order of 012345 to 341520 + // have to swap back for both compile- and run-time correctness + mov v0.16b,v3.16b + .unreq VA + VA .req v0 + + mov vT0.16b,v2.16b + mov v2.16b,v1.16b + mov v1.16b,v4.16b + .unreq VB + VB .req v1 + .unreq VC + VC .req v2 + + mov v3.16b,v5.16b + .unreq VD + VD .req v3 + + mov v4.16b,vT0.16b + .unreq VE + VE .req v4 + + .unreq VT + VT .req v5 + .endif +.endm + +.macro exec_steps idx:req,more:vararg + exec_step \idx + .ifnb \more + exec_steps \more + .endif +.endm + +.macro sha1_single + load_x4_word 0 + + mov vAA.16B, VA.16B + mov vBB.16B, VB.16B + mov vCC.16B, VC.16B + mov vDD.16B, VD.16B + mov vEE.16B, VE.16B + + adr sha1key_adr, KEY_0 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 + + // 20 ~ 39 + adr sha1key_adr, KEY_1 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 + + // 40 ~ 59 + adr sha1key_adr, KEY_2 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 + + // 60 ~ 79 + adr sha1key_adr, KEY_3 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 + + add VA.4s, vAA.4s, VA.4s + add VB.4s, vBB.4s, VB.4s + add VC.4s, vCC.4s, VC.4s + add VD.4s, vDD.4s, VD.4s + add VE.4s, vEE.4s, VE.4s +.endm + +.macro sha1_asimd_save_stack + stp d8,d9,[sp, -64]! + stp d10,d11,[sp, 16] + stp d12,d13,[sp, 32] + stp d14,d15,[sp, 48] +.endm + +.macro sha1_asimd_restore_stack + ldp d10,d11,[sp, 16] + ldp d12,d13,[sp, 32] + ldp d14,d15,[sp, 48] + ldp d8,d9,[sp],64 +.endm diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c new file mode 100644 index 000000000..9a9952ff6 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c @@ -0,0 +1,250 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdint.h> +#include <string.h> +#include "sha1_mb.h" +#include "memcpy_inline.h" +#include "endian_helper.h" +void sha1_mb_mgr_init_asimd(SHA1_MB_JOB_MGR * state); +SHA1_JOB *sha1_mb_mgr_submit_asimd(SHA1_MB_JOB_MGR * state, SHA1_JOB * job); +SHA1_JOB *sha1_mb_mgr_flush_asimd(SHA1_MB_JOB_MGR * state); +static inline void hash_init_digest(SHA1_WORD_T * digest); +static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len); +static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx); + +void sha1_ctx_mgr_init_asimd(SHA1_HASH_CTX_MGR * mgr) +{ + sha1_mb_mgr_init_asimd(&mgr->mgr); +} + +SHA1_HASH_CTX *sha1_ctx_mgr_submit_asimd(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx, + const void *buffer, uint32_t len, HASH_CTX_FLAG flags) +{ + if (flags & (~HASH_ENTIRE)) { + // User should not pass anything other than FIRST, UPDATE, or LAST + ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; + return ctx; + } + + if (ctx->status & HASH_CTX_STS_PROCESSING) { + // Cannot submit to a currently processing job. + ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; + return ctx; + } + + if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + // Cannot update a finished job. + ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; + return ctx; + } + + if (flags & HASH_FIRST) { + // Init digest + hash_init_digest(ctx->job.result_digest); + + // Reset byte counter + ctx->total_length = 0; + + // Clear extra blocks + ctx->partial_block_buffer_length = 0; + } + // If we made it here, there were no errors during this call to submit + ctx->error = HASH_CTX_ERROR_NONE; + + // Store buffer ptr info from user + ctx->incoming_buffer = buffer; + ctx->incoming_buffer_length = len; + + // Store the user's request flags and mark this ctx as currently being processed. + ctx->status = (flags & HASH_LAST) ? + (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : + HASH_CTX_STS_PROCESSING; + + // Advance byte counter + ctx->total_length += len; + + // If there is anything currently buffered in the extra blocks, append to it until it contains a whole block. + // Or if the user's buffer contains less than a whole block, append as much as possible to the extra block. + if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) { + // Compute how many bytes to copy from user buffer into extra block + uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length; + if (len < copy_len) + copy_len = len; + + if (copy_len) { + // Copy and update relevant pointers and counters + memcpy_fixedlen(&ctx->partial_block_buffer + [ctx->partial_block_buffer_length], buffer, copy_len); + + ctx->partial_block_buffer_length += copy_len; + ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len); + ctx->incoming_buffer_length = len - copy_len; + } + // The extra block should never contain more than 1 block here + assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); + + // If the extra block buffer contains exactly 1 block, it can be hashed. + if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { + ctx->partial_block_buffer_length = 0; + + ctx->job.buffer = ctx->partial_block_buffer; + ctx->job.len = 1; + + ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr, &ctx->job); + } + } + + return sha1_ctx_mgr_resubmit(mgr, ctx); +} + +SHA1_HASH_CTX *sha1_ctx_mgr_flush_asimd(SHA1_HASH_CTX_MGR * mgr) +{ + SHA1_HASH_CTX *ctx; + + while (1) { + ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_flush_asimd(&mgr->mgr); + + // If flush returned 0, there are no more jobs in flight. + if (!ctx) + return NULL; + + // If flush returned a job, verify that it is safe to return to the user. + // If it is not ready, resubmit the job to finish processing. + ctx = sha1_ctx_mgr_resubmit(mgr, ctx); + + // If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. + if (ctx) + return ctx; + + // Otherwise, all jobs currently being managed by the SHA1_HASH_CTX_MGR still need processing. Loop. + } +} + +static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx) +{ + while (ctx) { + + if (ctx->status & HASH_CTX_STS_COMPLETE) { + ctx->status = HASH_CTX_STS_COMPLETE; // Clear PROCESSING bit + return ctx; + } + // If the extra blocks are empty, begin hashing what remains in the user's buffer. + if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) { + const void *buffer = ctx->incoming_buffer; + uint32_t len = ctx->incoming_buffer_length; + + // Only entire blocks can be hashed. Copy remainder to extra blocks buffer. + uint32_t copy_len = len & (SHA1_BLOCK_SIZE - 1); + + if (copy_len) { + len -= copy_len; + memcpy_fixedlen(ctx->partial_block_buffer, + ((const char *)buffer + len), copy_len); + ctx->partial_block_buffer_length = copy_len; + } + + ctx->incoming_buffer_length = 0; + + // len should be a multiple of the block size now + assert((len % SHA1_BLOCK_SIZE) == 0); + + // Set len to the number of blocks to be hashed in the user's buffer + len >>= SHA1_LOG2_BLOCK_SIZE; + + if (len) { + ctx->job.buffer = (uint8_t *) buffer; + ctx->job.len = len; + ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr, + &ctx->job); + continue; + } + } + // If the extra blocks are not empty, then we are either on the last block(s) + // or we need more user input before continuing. + if (ctx->status & HASH_CTX_STS_LAST) { + uint8_t *buf = ctx->partial_block_buffer; + uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length); + + ctx->status = + (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_COMPLETE); + ctx->job.buffer = buf; + ctx->job.len = (uint32_t) n_extra_blocks; + ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr, &ctx->job); + continue; + } + + if (ctx) + ctx->status = HASH_CTX_STS_IDLE; + return ctx; + } + + return NULL; +} + +static inline void hash_init_digest(SHA1_WORD_T * digest) +{ + static const SHA1_WORD_T hash_initial_digest[SHA1_DIGEST_NWORDS] = + { SHA1_INITIAL_DIGEST }; + memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest)); +} + +static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len) +{ + uint32_t i = (uint32_t) (total_len & (SHA1_BLOCK_SIZE - 1)); + + memclr_fixedlen(&padblock[i], SHA1_BLOCK_SIZE); + padblock[i] = 0x80; + + // Move i to the end of either 1st or 2nd extra block depending on length + i += ((SHA1_BLOCK_SIZE - 1) & (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) + 1 + + SHA1_PADLENGTHFIELD_SIZE; + +#if SHA1_PADLENGTHFIELD_SIZE == 16 + *((uint64_t *) & padblock[i - 16]) = 0; +#endif + + *((uint64_t *) & padblock[i - 8]) = to_be64((uint64_t) total_len << 3); + + return i >> SHA1_LOG2_BLOCK_SIZE; // Number of extra blocks to hash +} + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; +struct slver sha1_ctx_mgr_init_asimd_slver_02020142; +struct slver sha1_ctx_mgr_init_asimd_slver = { 0x0142, 0x02, 0x02 }; + +struct slver sha1_ctx_mgr_submit_asimd_slver_02020143; +struct slver sha1_ctx_mgr_submit_asimd_slver = { 0x0143, 0x02, 0x02 }; + +struct slver sha1_ctx_mgr_flush_asimd_slver_02020144; +struct slver sha1_ctx_mgr_flush_asimd_slver = { 0x0144, 0x02, 0x02 }; diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c new file mode 100644 index 000000000..e40a344ff --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c @@ -0,0 +1,250 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdint.h> +#include <string.h> +#include "sha1_mb.h" +#include "memcpy_inline.h" +#include "endian_helper.h" +void sha1_mb_mgr_init_ce(SHA1_MB_JOB_MGR * state); +SHA1_JOB *sha1_mb_mgr_submit_ce(SHA1_MB_JOB_MGR * state, SHA1_JOB * job); +SHA1_JOB *sha1_mb_mgr_flush_ce(SHA1_MB_JOB_MGR * state); +static inline void hash_init_digest(SHA1_WORD_T * digest); +static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len); +static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx); + +void sha1_ctx_mgr_init_ce(SHA1_HASH_CTX_MGR * mgr) +{ + sha1_mb_mgr_init_ce(&mgr->mgr); +} + +SHA1_HASH_CTX *sha1_ctx_mgr_submit_ce(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx, + const void *buffer, uint32_t len, HASH_CTX_FLAG flags) +{ + if (flags & (~HASH_ENTIRE)) { + // User should not pass anything other than FIRST, UPDATE, or LAST + ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; + return ctx; + } + + if (ctx->status & HASH_CTX_STS_PROCESSING) { + // Cannot submit to a currently processing job. + ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; + return ctx; + } + + if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + // Cannot update a finished job. + ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; + return ctx; + } + + if (flags & HASH_FIRST) { + // Init digest + hash_init_digest(ctx->job.result_digest); + + // Reset byte counter + ctx->total_length = 0; + + // Clear extra blocks + ctx->partial_block_buffer_length = 0; + } + // If we made it here, there were no errors during this call to submit + ctx->error = HASH_CTX_ERROR_NONE; + + // Store buffer ptr info from user + ctx->incoming_buffer = buffer; + ctx->incoming_buffer_length = len; + + // Store the user's request flags and mark this ctx as currently being processed. + ctx->status = (flags & HASH_LAST) ? + (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : + HASH_CTX_STS_PROCESSING; + + // Advance byte counter + ctx->total_length += len; + + // If there is anything currently buffered in the extra blocks, append to it until it contains a whole block. + // Or if the user's buffer contains less than a whole block, append as much as possible to the extra block. + if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) { + // Compute how many bytes to copy from user buffer into extra block + uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length; + if (len < copy_len) + copy_len = len; + + if (copy_len) { + // Copy and update relevant pointers and counters + memcpy_fixedlen(&ctx->partial_block_buffer + [ctx->partial_block_buffer_length], buffer, copy_len); + + ctx->partial_block_buffer_length += copy_len; + ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len); + ctx->incoming_buffer_length = len - copy_len; + } + // The extra block should never contain more than 1 block here + assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); + + // If the extra block buffer contains exactly 1 block, it can be hashed. + if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { + ctx->partial_block_buffer_length = 0; + + ctx->job.buffer = ctx->partial_block_buffer; + ctx->job.len = 1; + + ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr, &ctx->job); + } + } + + return sha1_ctx_mgr_resubmit(mgr, ctx); +} + +SHA1_HASH_CTX *sha1_ctx_mgr_flush_ce(SHA1_HASH_CTX_MGR * mgr) +{ + SHA1_HASH_CTX *ctx; + + while (1) { + ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_flush_ce(&mgr->mgr); + + // If flush returned 0, there are no more jobs in flight. + if (!ctx) + return NULL; + + // If flush returned a job, verify that it is safe to return to the user. + // If it is not ready, resubmit the job to finish processing. + ctx = sha1_ctx_mgr_resubmit(mgr, ctx); + + // If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. + if (ctx) + return ctx; + + // Otherwise, all jobs currently being managed by the SHA1_HASH_CTX_MGR still need processing. Loop. + } +} + +static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx) +{ + while (ctx) { + + if (ctx->status & HASH_CTX_STS_COMPLETE) { + ctx->status = HASH_CTX_STS_COMPLETE; // Clear PROCESSING bit + return ctx; + } + // If the extra blocks are empty, begin hashing what remains in the user's buffer. + if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) { + const void *buffer = ctx->incoming_buffer; + uint32_t len = ctx->incoming_buffer_length; + + // Only entire blocks can be hashed. Copy remainder to extra blocks buffer. + uint32_t copy_len = len & (SHA1_BLOCK_SIZE - 1); + + if (copy_len) { + len -= copy_len; + memcpy_fixedlen(ctx->partial_block_buffer, + ((const char *)buffer + len), copy_len); + ctx->partial_block_buffer_length = copy_len; + } + + ctx->incoming_buffer_length = 0; + + // len should be a multiple of the block size now + assert((len % SHA1_BLOCK_SIZE) == 0); + + // Set len to the number of blocks to be hashed in the user's buffer + len >>= SHA1_LOG2_BLOCK_SIZE; + + if (len) { + ctx->job.buffer = (uint8_t *) buffer; + ctx->job.len = len; + ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr, + &ctx->job); + continue; + } + } + // If the extra blocks are not empty, then we are either on the last block(s) + // or we need more user input before continuing. + if (ctx->status & HASH_CTX_STS_LAST) { + uint8_t *buf = ctx->partial_block_buffer; + uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length); + + ctx->status = + (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_COMPLETE); + ctx->job.buffer = buf; + ctx->job.len = (uint32_t) n_extra_blocks; + ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr, &ctx->job); + continue; + } + + if (ctx) + ctx->status = HASH_CTX_STS_IDLE; + return ctx; + } + + return NULL; +} + +static inline void hash_init_digest(SHA1_WORD_T * digest) +{ + static const SHA1_WORD_T hash_initial_digest[SHA1_DIGEST_NWORDS] = + { SHA1_INITIAL_DIGEST }; + memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest)); +} + +static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len) +{ + uint32_t i = (uint32_t) (total_len & (SHA1_BLOCK_SIZE - 1)); + + memclr_fixedlen(&padblock[i], SHA1_BLOCK_SIZE); + padblock[i] = 0x80; + + // Move i to the end of either 1st or 2nd extra block depending on length + i += ((SHA1_BLOCK_SIZE - 1) & (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) + 1 + + SHA1_PADLENGTHFIELD_SIZE; + +#if SHA1_PADLENGTHFIELD_SIZE == 16 + *((uint64_t *) & padblock[i - 16]) = 0; +#endif + + *((uint64_t *) & padblock[i - 8]) = to_be64((uint64_t) total_len << 3); + + return i >> SHA1_LOG2_BLOCK_SIZE; // Number of extra blocks to hash +} + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; +struct slver sha1_ctx_mgr_init_ce_slver_02020142; +struct slver sha1_ctx_mgr_init_ce_slver = { 0x0142, 0x02, 0x02 }; + +struct slver sha1_ctx_mgr_submit_ce_slver_02020143; +struct slver sha1_ctx_mgr_submit_ce_slver = { 0x0143, 0x02, 0x02 }; + +struct slver sha1_ctx_mgr_flush_ce_slver_02020144; +struct slver sha1_ctx_mgr_flush_ce_slver = { 0x0144, 0x02, 0x02 }; diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c new file mode 100644 index 000000000..0942c1a95 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c @@ -0,0 +1,93 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <aarch64_multibinary.h> + +DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_submit) +{ + + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(sha1_ctx_mgr_submit_ce); + + if (auxval & HWCAP_ASIMD) { + switch (get_micro_arch_id()) { + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): // fall through + case MICRO_ARCH_ID(ARM, CORTEX_A57): // fall through + case MICRO_ARCH_ID(ARM, CORTEX_A72): // fall through + return PROVIDER_INFO(sha1_ctx_mgr_submit_asimd); + default: + break; + } + } + + return PROVIDER_BASIC(sha1_ctx_mgr_submit); + +} + +DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_init) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(sha1_ctx_mgr_init_ce); + + if (auxval & HWCAP_ASIMD) { + switch (get_micro_arch_id()) { + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): // fall through + case MICRO_ARCH_ID(ARM, CORTEX_A57): // fall through + case MICRO_ARCH_ID(ARM, CORTEX_A72): // fall through + return PROVIDER_INFO(sha1_ctx_mgr_init_asimd); + default: + break; + } + } + + return PROVIDER_BASIC(sha1_ctx_mgr_init); + +} + +DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_flush) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(sha1_ctx_mgr_flush_ce); + + if (auxval & HWCAP_ASIMD) { + switch (get_micro_arch_id()) { + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): // fall through + case MICRO_ARCH_ID(ARM, CORTEX_A57): // fall through + case MICRO_ARCH_ID(ARM, CORTEX_A72): // fall through + return PROVIDER_INFO(sha1_ctx_mgr_flush_asimd); + default: + break; + } + } + + return PROVIDER_BASIC(sha1_ctx_mgr_flush); + +} diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S new file mode 100644 index 000000000..012b15c14 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S @@ -0,0 +1,192 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +#include "sha1_asimd_common.S" + +.macro internal_load windex + // load 64-bytes from each address to maximize usage of cache line + .if \windex == 0 + mov tmp,dataptr + ld1 {WORD0.4s},[data0],16 + ld1 {WORD4.4s},[data0],16 + ld1 {WORD8.4s},[data0],16 + ld1 {WORD12.4s},[data0],16 + + ld1 {WORD1.4s},[data1],16 + ld1 {WORD5.4s},[data1],16 + ld1 {WORD9.4s},[data1],16 + ld1 {WORD13.4s},[data1],16 + + ld1 {WORD2.4s},[data2],16 + ld1 {WORD6.4s},[data2],16 + ld1 {WORD10.4s},[data2],16 + ld1 {WORD14.4s},[data2],16 + + ld1 {WORD3.4s},[data3],16 + ld1 {WORD7.4s},[data3],16 + ld1 {WORD11.4s},[data3],16 + ld1 {WORD15.4s},[data3],16 + + st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[0],[tmp],16 + st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[1],[tmp],16 + st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[2],[tmp],16 + st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[3],[tmp],16 + .endif + + .if \windex == 4 + mov tmp,dataptr + st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[0],[tmp],16 + st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[1],[tmp],16 + st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[2],[tmp],16 + st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[3],[tmp],16 + .endif + + .if \windex == 8 + mov tmp,dataptr + st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[0],[tmp],16 + st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[1],[tmp],16 + st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[2],[tmp],16 + st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[3],[tmp],16 + .endif + + .if \windex == 12 + mov tmp,dataptr + st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[0],[tmp],16 + st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[1],[tmp],16 + st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[2],[tmp],16 + st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[3],[tmp],16 + .endif +.endm + +.macro load_x4_word idx:req + internal_load \idx + ld1 {WORD\idx\().16b},[dataptr],16 +.endm + +/* + * void sha1_mb_asimd_x4(SHA1_JOB *j0, SHA1_JOB*j1, SHA1_JOB*j2, SHA1_JOB *j3, int blocks) + */ + job0 .req x0 + job1 .req x1 + job2 .req x2 + job3 .req x3 + num_blocks .req w4 + tmp .req x5 + data0 .req x6 + data1 .req x7 + data2 .req x8 + data3 .req x9 + databuf .req x10 + dataptr .req x11 + savedsp .req x12 + + .global sha1_mb_asimd_x4 + .type sha1_mb_asimd_x4, %function +sha1_mb_asimd_x4: + cmp num_blocks, #0 + beq .return + sha1_asimd_save_stack + mov savedsp,sp + sub databuf,sp,256 + mov tmp,63 + bic databuf,databuf,tmp + mov sp,databuf + + add tmp,job0,64 + ld4 {VA.s,VB.s,VC.s,VD.s}[0],[tmp],#16 + ld1 {VE.s}[0],[tmp] + ldr data0,[job0] + + add tmp,job1,64 + ld4 {VA.s,VB.s,VC.s,VD.s}[1],[tmp],#16 + ld1 {VE.s}[1],[tmp] + ldr data1,[job1] + + add tmp,job2,64 + ld4 {VA.s,VB.s,VC.s,VD.s}[2],[tmp],#16 + ld1 {VE.s}[2],[tmp] + ldr data2,[job2] + + add tmp,job3,64 + ld4 {VA.s,VB.s,VC.s,VD.s}[3],[tmp],#16 + ld1 {VE.s}[3],[tmp] + ldr data3,[job3] + +.block_loop: + mov dataptr,databuf + sha1_single + subs num_blocks, num_blocks, 1 + bne .block_loop + + add tmp,job0,64 + st4 {VA.s,VB.s,VC.s,VD.s}[0],[tmp],#16 + st1 {VE.s}[0],[tmp] + + add tmp,job1,64 + st4 {VA.s,VB.s,VC.s,VD.s}[1],[tmp],#16 + st1 {VE.s}[1],[tmp] + + add tmp,job2,64 + st4 {VA.s,VB.s,VC.s,VD.s}[2],[tmp],#16 + st1 {VE.s}[2],[tmp] + + add tmp,job3,64 + st4 {VA.s,VB.s,VC.s,VD.s}[3],[tmp],#16 + st1 {VE.s}[3],[tmp] + + mov sp,savedsp + sha1_asimd_restore_stack +.return: + ret + + .size sha1_mb_asimd_x4, .-sha1_mb_asimd_x4 + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c new file mode 100644 index 000000000..4b34e7b53 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c @@ -0,0 +1,217 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <stddef.h> +#include <sha1_mb.h> +#include <assert.h> +#include "endian_helper.h" + +extern void sha1_aarch64_x1(const uint8_t * data, int num_blocks, uint32_t digest[]); +static inline void sha1_job_x1(SHA1_JOB * job, int blocks) +{ + sha1_aarch64_x1(job->buffer, blocks, job->result_digest); +} + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +#define SHA1_MB_ASIMD_MAX_LANES 4 +void sha1_mb_asimd_x4(SHA1_JOB *, SHA1_JOB *, SHA1_JOB *, SHA1_JOB *, int); + +#define LANE_IS_NOT_FINISHED(state,i) \ + (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane!=NULL) +#define LANE_IS_FINISHED(state,i) \ + (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane!=NULL) +#define LANE_IS_FREE(state,i) \ + (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane==NULL) +#define LANE_IS_INVALID(state,i) \ + (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane==NULL) + +void sha1_mb_mgr_init_asimd(SHA1_MB_JOB_MGR * state) +{ + unsigned int i; + + state->unused_lanes = 0xf; + state->num_lanes_inuse = 0; + for (i = 0; i < SHA1_MB_ASIMD_MAX_LANES; i++) { + state->unused_lanes <<= 4; + state->unused_lanes |= SHA1_MB_ASIMD_MAX_LANES - 1 - i; + state->lens[i] = i; + state->ldata[i].job_in_lane = 0; + } + + // lanes > SHA1_MB_ASIMD_MAX_LANES is invalid lane + for (; i < SHA1_MAX_LANES; i++) { + state->lens[i] = 0xf; + state->ldata[i].job_in_lane = 0; + } +} + +static int sha1_mb_mgr_do_jobs(SHA1_MB_JOB_MGR * state) +{ + int lane_idx, len, i, lanes, blocks; + int lane_idx_array[SHA1_MAX_LANES]; + + if (state->num_lanes_inuse == 0) { + return -1; + } + lanes = 0, len = 0; + for (i = 0; i < SHA1_MAX_LANES && lanes < state->num_lanes_inuse; i++) { + if (LANE_IS_NOT_FINISHED(state, i)) { + if (lanes) + len = min(len, state->lens[i]); + else + len = state->lens[i]; + lane_idx_array[lanes] = i; + lanes++; + } + } + + if (lanes == 0) + return -1; + lane_idx = len & 0xf; + len = len & (~0xf); + blocks = len >> 4; + + /* for less-than-3-lane job, ASIMD really does not have much advantage + * compared to scalar due to wasted >= 50% capacity + * therefore we only run ASIMD for 3/4 lanes of data + */ + if (lanes == SHA1_MB_ASIMD_MAX_LANES) { + sha1_mb_asimd_x4(state->ldata[lane_idx_array[0]].job_in_lane, + state->ldata[lane_idx_array[1]].job_in_lane, + state->ldata[lane_idx_array[2]].job_in_lane, + state->ldata[lane_idx_array[3]].job_in_lane, blocks); + } else if (lanes == 3) { + /* in case of 3 lanes, apparently ASIMD will still operate as if + * there were four lanes of data in processing (waste 25% capacity) + * theoretically we can let ASIMD implementation know the number of lanes + * so that it could "at least" save some memory loading time + * but in practice, we can just pass lane 0 as dummy for similar + * cache performance + */ + SHA1_JOB dummy; + dummy.buffer = state->ldata[lane_idx_array[0]].job_in_lane->buffer; + dummy.len = state->ldata[lane_idx_array[0]].job_in_lane->len; + sha1_mb_asimd_x4(state->ldata[lane_idx_array[0]].job_in_lane, + &dummy, + state->ldata[lane_idx_array[1]].job_in_lane, + state->ldata[lane_idx_array[2]].job_in_lane, blocks); + } else { + sha1_job_x1(state->ldata[lane_idx_array[0]].job_in_lane, blocks); + if (lanes >= 2) { + sha1_job_x1(state->ldata[lane_idx_array[1]].job_in_lane, blocks); + } + } + + // only return the min length job + for (i = 0; i < SHA1_MAX_LANES; i++) { + if (LANE_IS_NOT_FINISHED(state, i)) { + state->lens[i] -= len; + state->ldata[i].job_in_lane->len -= len; + state->ldata[i].job_in_lane->buffer += len << 2; + } + } + return lane_idx; + +} + +static SHA1_JOB *sha1_mb_mgr_free_lane(SHA1_MB_JOB_MGR * state) +{ + int i; + SHA1_JOB *ret = NULL; + + for (i = 0; i < SHA1_MB_ASIMD_MAX_LANES; i++) { + if (LANE_IS_FINISHED(state, i)) { + state->unused_lanes <<= 4; + state->unused_lanes |= i; + state->num_lanes_inuse--; + ret = state->ldata[i].job_in_lane; + ret->status = STS_COMPLETED; + state->ldata[i].job_in_lane = NULL; + break; + } + } + return ret; +} + +static void sha1_mb_mgr_insert_job(SHA1_MB_JOB_MGR * state, SHA1_JOB * job) +{ + int lane_idx; + // add job into lanes + lane_idx = state->unused_lanes & 0xf; + // fatal error + assert(lane_idx < SHA1_MB_ASIMD_MAX_LANES); + state->lens[lane_idx] = (job->len << 4) | lane_idx; + state->ldata[lane_idx].job_in_lane = job; + state->unused_lanes >>= 4; + state->num_lanes_inuse++; +} + +SHA1_JOB *sha1_mb_mgr_submit_asimd(SHA1_MB_JOB_MGR * state, SHA1_JOB * job) +{ +#ifndef NDEBUG + int lane_idx; +#endif + SHA1_JOB *ret; + + // add job into lanes + sha1_mb_mgr_insert_job(state, job); + + ret = sha1_mb_mgr_free_lane(state); + if (ret != NULL) { + return ret; + } + // submit will wait all lane has data + if (state->num_lanes_inuse < SHA1_MB_ASIMD_MAX_LANES) + return NULL; +#ifndef NDEBUG + lane_idx = sha1_mb_mgr_do_jobs(state); + assert(lane_idx != -1); +#else + sha1_mb_mgr_do_jobs(state); +#endif + + // ~ i = lane_idx; + ret = sha1_mb_mgr_free_lane(state); + return ret; +} + +SHA1_JOB *sha1_mb_mgr_flush_asimd(SHA1_MB_JOB_MGR * state) +{ + SHA1_JOB *ret; + ret = sha1_mb_mgr_free_lane(state); + if (ret) { + return ret; + } + + sha1_mb_mgr_do_jobs(state); + return sha1_mb_mgr_free_lane(state); + +} diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c new file mode 100644 index 000000000..1dfd67d0c --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c @@ -0,0 +1,208 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <stddef.h> +#include <sha1_mb.h> +#include <assert.h> + +#ifndef max +#define max(a,b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +#define SHA1_MB_CE_MAX_LANES 2 +#if SHA1_MB_CE_MAX_LANES >=2 +void sha1_mb_ce_x2(SHA1_JOB *, SHA1_JOB *, int); +#endif +void sha1_mb_ce_x1(SHA1_JOB *, int); + +#define LANE_IS_NOT_FINISHED(state,i) \ + (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane!=NULL) +#define LANE_IS_FINISHED(state,i) \ + (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane!=NULL) +#define LANE_IS_FREE(state,i) \ + (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane==NULL) +#define LANE_IS_INVALID(state,i) \ + (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane==NULL) +void sha1_mb_mgr_init_ce(SHA1_MB_JOB_MGR * state) +{ + unsigned int i; + + state->unused_lanes = 0xf; + state->num_lanes_inuse = 0; + for (i = 0; i < SHA1_MB_CE_MAX_LANES; i++) { + state->unused_lanes <<= 4; + state->unused_lanes |= i; + state->lens[i] = i; + state->ldata[i].job_in_lane = 0; + } + + //lanes > SHA1_MB_CE_MAX_LANES is invalid lane + for (; i < SHA1_MAX_LANES; i++) { + state->lens[i] = 0xf; + state->ldata[i].job_in_lane = 0; + } +} + +static int sha1_mb_mgr_do_jobs(SHA1_MB_JOB_MGR * state) +{ + int lane_idx, len, i, lanes; + + int lane_idx_array[SHA1_MAX_LANES]; + + if (state->num_lanes_inuse == 0) { + return -1; + } +#if SHA1_MB_CE_MAX_LANES == 2 + if (state->num_lanes_inuse == 2) { + len = min(state->lens[0], state->lens[1]); + lane_idx = len & 0xf; + len &= ~0xf; + + sha1_mb_ce_x2(state->ldata[0].job_in_lane, + state->ldata[1].job_in_lane, len >> 4); + + } else +#endif + { + lanes = 0, len = 0; + for (i = 0; i < SHA1_MAX_LANES && lanes < state->num_lanes_inuse; i++) { + if (LANE_IS_NOT_FINISHED(state, i)) { + if (lanes) + len = min(len, state->lens[i]); + else + len = state->lens[i]; + lane_idx_array[lanes] = i; + lanes++; + } + } + if (lanes == 0) + return -1; + lane_idx = len & 0xf; + len = len & (~0xf); + +#if SHA1_MB_CE_MAX_LANES >=2 + if (lanes == 2) { + sha1_mb_ce_x2(state->ldata[lane_idx_array[0]].job_in_lane, + state->ldata[lane_idx_array[1]].job_in_lane, len >> 4); + } else +#endif + { + sha1_mb_ce_x1(state->ldata[lane_idx_array[0]].job_in_lane, len >> 4); + } + } + //only return the min length job + for (i = 0; i < SHA1_MAX_LANES; i++) { + if (LANE_IS_NOT_FINISHED(state, i)) { + state->lens[i] -= len; + state->ldata[i].job_in_lane->len -= len; + state->ldata[i].job_in_lane->buffer += len << 2; + } + } + + return lane_idx; + +} + +static SHA1_JOB *sha1_mb_mgr_free_lane(SHA1_MB_JOB_MGR * state) +{ + int i; + SHA1_JOB *ret = NULL; + + for (i = 0; i < SHA1_MB_CE_MAX_LANES; i++) { + if (LANE_IS_FINISHED(state, i)) { + + state->unused_lanes <<= 4; + state->unused_lanes |= i; + state->num_lanes_inuse--; + ret = state->ldata[i].job_in_lane; + ret->status = STS_COMPLETED; + state->ldata[i].job_in_lane = NULL; + break; + } + } + return ret; +} + +static void sha1_mb_mgr_insert_job(SHA1_MB_JOB_MGR * state, SHA1_JOB * job) +{ + int lane_idx; + //add job into lanes + lane_idx = state->unused_lanes & 0xf; + //fatal error + assert(lane_idx < SHA1_MB_CE_MAX_LANES); + state->lens[lane_idx] = (job->len << 4) | lane_idx; + state->ldata[lane_idx].job_in_lane = job; + state->unused_lanes >>= 4; + state->num_lanes_inuse++; +} + +SHA1_JOB *sha1_mb_mgr_submit_ce(SHA1_MB_JOB_MGR * state, SHA1_JOB * job) +{ +#ifndef NDEBUG + int lane_idx; +#endif + SHA1_JOB *ret; + + //add job into lanes + sha1_mb_mgr_insert_job(state, job); + + ret = sha1_mb_mgr_free_lane(state); + if (ret != NULL) { + return ret; + } + //submit will wait all lane has data + if (state->num_lanes_inuse < SHA1_MB_CE_MAX_LANES) + return NULL; +#ifndef NDEBUG + lane_idx = sha1_mb_mgr_do_jobs(state); + assert(lane_idx != -1); +#else + sha1_mb_mgr_do_jobs(state); +#endif + + //~ i = lane_idx; + ret = sha1_mb_mgr_free_lane(state); + return ret; +} + +SHA1_JOB *sha1_mb_mgr_flush_ce(SHA1_MB_JOB_MGR * state) +{ + SHA1_JOB *ret; + ret = sha1_mb_mgr_free_lane(state); + if (ret) { + return ret; + } + + sha1_mb_mgr_do_jobs(state); + return sha1_mb_mgr_free_lane(state); + +} diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S new file mode 100644 index 000000000..bb1929d76 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S @@ -0,0 +1,36 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +#include "aarch64_multibinary.h" + + +mbin_interface sha1_ctx_mgr_submit +mbin_interface sha1_ctx_mgr_init +mbin_interface sha1_ctx_mgr_flush diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S new file mode 100644 index 000000000..22f736793 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S @@ -0,0 +1,194 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 2 + .p2align 3,,7 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg +.endm + +/** +maros for round 4-67 +*/ +.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req,k:req + sha1h \e0\()_s, \abcd\()_s + \inst \abcd\()_q,\e1\()_s,\tmp1\()_v.4s + add \tmp1\()_v.4s,\msg3\()_v.4s,\k\()_v.4s + sha1su1 \msg0\()_v.4s,\msg3\()_v.4s + sha1su0 \msg1\()_v.4s,\msg2\()_v.4s,\msg3\()_v.4s +.endm + + +/* +Variable list +*/ + + declare_var_vector_reg key_0,28 + declare_var_vector_reg key_1,29 + declare_var_vector_reg key_2,30 + declare_var_vector_reg key_3,31 + + +/* +digest variables +*/ + declare_var_vector_reg abcd,0 + declare_var_vector_reg e0,1 + declare_var_vector_reg e1,2 + declare_var_vector_reg abcd_saved,3 + declare_var_vector_reg e0_saved,4 +/* +Message variables +*/ + declare_var_vector_reg msg_0,16 + declare_var_vector_reg msg_1,17 + declare_var_vector_reg msg_2,18 + declare_var_vector_reg msg_3,19 +/* +Temporay variables +*/ + declare_var_vector_reg tmp_0,5 + declare_var_vector_reg tmp_1,6 + +/* + void sha1_mb_ce_x1(SHA1_JOB * job, int len); +*/ +/* +Arguements list +*/ + job .req x0 + len .req w1 + data .req x2 + tmp .req x3 + .global sha1_mb_ce_x1 + .type sha1_mb_ce_x1, %function +sha1_mb_ce_x1: + ldr data, [job] + ldr abcd_q, [job, 64] + ldr e0_s, [job, 80] + adr tmp, KEY + ld1 {key_0_v.4s-key_3_v.4s},[tmp] + +start_loop: + + //load msgs + ld1 {msg_0_v.4s-msg_3_v.4s},[data] + + //adjust loop parameter + add data,data,64 + sub len, len, #1 + cmp len, 0 + //backup digest + mov abcd_saved_v.16b,abcd_v.16b + mov e0_saved_v.16b,e0_v.16b + + rev32 msg_0_v.16b,msg_0_v.16b + rev32 msg_1_v.16b,msg_1_v.16b + add tmp_0_v.4s,msg_0_v.4s,key_0_v.4s + rev32 msg_2_v.16b,msg_2_v.16b + add tmp_1_v.4s,msg_1_v.4s,key_0_v.4s + rev32 msg_3_v.16b,msg_3_v.16b + + /* rounds 0-3 */ + sha1h e1_s,abcd_s + sha1c abcd_q,e0_s,tmp_0_v.4s + add tmp_0_v.4s,msg_2_v.4s,key_0_v.4s + sha1su0 msg_0_v.4s,msg_1_v.4s,msg_2_v.4s + + sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_0 /* rounds 4-7 */ + sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_0 + sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1 /* rounds 12-15 */ + sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_1 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_1 /* rounds 20-23 */ + sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_1 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1 + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_2 /* rounds 36-39 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_2 + sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_2 + sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2 + sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_3 /* rounds 52-55 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_3 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_3 + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_3 + + /* rounds 68-71 */ + sha1h e0_s,abcd_s + sha1p abcd_q,e1_s,tmp_1_v.4s + add tmp_1_v.4s,msg_3_v.4s,key_3_v.4s + sha1su1 msg_0_v.4s,msg_3_v.4s + + /* rounds 72-75 */ + sha1h e1_s,abcd_s + sha1p abcd_q,e0_s,tmp_0_v.4s + + /* rounds 76-79 */ + sha1h e0_s,abcd_s + sha1p abcd_q,e1_s,tmp_1_v.4s + + + + add abcd_v.4s,abcd_v.4s,abcd_saved_v.4s + add e0_v.2s,e0_v.2s,e0_saved_v.2s + + + bgt start_loop + str abcd_q, [job, 64] + str e0_s, [job, 80] + + ret + + .size sha1_mb_ce_x1, .-sha1_mb_ce_x1 + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +KEY: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S new file mode 100644 index 000000000..93f653ad2 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S @@ -0,0 +1,253 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 2 + .p2align 3,,7 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg +.endm + +/** +maros for round 4-67 +*/ +.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req,k:req + sha1h l0_\e0\()_s, l0_\abcd\()_s + sha1h l1_\e0\()_s, l1_\abcd\()_s + + \inst l0_\abcd\()_q,l0_\e1\()_s,l0_\tmp1\()_v.4s + \inst l1_\abcd\()_q,l1_\e1\()_s,l1_\tmp1\()_v.4s + + add l0_\tmp1\()_v.4s,l0_\msg3\()_v.4s,\k\()_v.4s + add l1_\tmp1\()_v.4s,l1_\msg3\()_v.4s,\k\()_v.4s + + sha1su1 l0_\msg0\()_v.4s,l0_\msg3\()_v.4s + sha1su1 l1_\msg0\()_v.4s,l1_\msg3\()_v.4s + + sha1su0 l0_\msg1\()_v.4s,l0_\msg2\()_v.4s,l0_\msg3\()_v.4s + sha1su0 l1_\msg1\()_v.4s,l1_\msg2\()_v.4s,l1_\msg3\()_v.4s +.endm + + +/* +Variable list +*/ + + declare_var_vector_reg key_0,28 + declare_var_vector_reg key_1,29 + declare_var_vector_reg key_2,30 + declare_var_vector_reg key_3,31 + + +/* +lane variables +*/ + declare_var_vector_reg l0_abcd,0 + declare_var_vector_reg l0_e0,1 + declare_var_vector_reg l0_e1,2 + declare_var_vector_reg l0_abcd_saved,3 + declare_var_vector_reg l0_e0_saved,4 + declare_var_vector_reg l0_tmp_0,5 + declare_var_vector_reg l0_tmp_1,6 + declare_var_vector_reg l0_msg_0,16 + declare_var_vector_reg l0_msg_1,17 + declare_var_vector_reg l0_msg_2,18 + declare_var_vector_reg l0_msg_3,19 + + declare_var_vector_reg l1_abcd,7 + declare_var_vector_reg l1_e0,8 + declare_var_vector_reg l1_e1,9 + declare_var_vector_reg l1_abcd_saved,24 + declare_var_vector_reg l1_e0_saved,25 + declare_var_vector_reg l1_tmp_0,26 + declare_var_vector_reg l1_tmp_1,27 + declare_var_vector_reg l1_msg_0,20 + declare_var_vector_reg l1_msg_1,21 + declare_var_vector_reg l1_msg_2,22 + declare_var_vector_reg l1_msg_3,23 + +/* + void sha1_mb_ce_x2(SHA1_JOB * job_0, SHA1_JOB * job_1,int len); +*/ + l0_job .req x0 + l1_job .req x1 + len .req w2 + + l0_data .req x3 + l1_data .req x4 + tmp .req x5 + .global sha1_mb_ce_x2 + .type sha1_mb_ce_x2, %function +sha1_mb_ce_x2: + //push d8,d9 to stack + stp d8, d9, [sp, -256]! + + adr tmp, KEY + ld1 {key_0_v.4s-key_3_v.4s},[tmp] + ldr l0_data, [l0_job] + ldr l1_data, [l1_job] + ldr l0_abcd_q, [l0_job, 64] + ldr l0_e0_s, [l0_job, 80] + ldr l1_abcd_q, [l1_job, 64] + ldr l1_e0_s, [l1_job, 80] + +start_loop: + + //load msgs + ld1 {l0_msg_0_v.4s-l0_msg_3_v.4s},[l0_data] + ld1 {l1_msg_0_v.4s-l1_msg_3_v.4s},[l1_data] + + //adjust loop parameter + add l0_data,l0_data,64 + add l1_data,l1_data,64 + sub len, len, #1 + cmp len, 0 + //backup digest + mov l0_abcd_saved_v.16b, l0_abcd_v.16b + mov l0_e0_saved_v.16b, l0_e0_v.16b + mov l1_abcd_saved_v.16b, l1_abcd_v.16b + mov l1_e0_saved_v.16b, l1_e0_v.16b + + rev32 l0_msg_0_v.16b, l0_msg_0_v.16b + rev32 l0_msg_1_v.16b, l0_msg_1_v.16b + add l0_tmp_0_v.4s, l0_msg_0_v.4s, key_0_v.4s + rev32 l0_msg_2_v.16b, l0_msg_2_v.16b + add l0_tmp_1_v.4s, l0_msg_1_v.4s, key_0_v.4s + rev32 l0_msg_3_v.16b, l0_msg_3_v.16b + + rev32 l1_msg_0_v.16b, l1_msg_0_v.16b + rev32 l1_msg_1_v.16b, l1_msg_1_v.16b + add l1_tmp_0_v.4s, l1_msg_0_v.4s, key_0_v.4s + rev32 l1_msg_2_v.16b, l1_msg_2_v.16b + add l1_tmp_1_v.4s, l1_msg_1_v.4s, key_0_v.4s + rev32 l1_msg_3_v.16b, l1_msg_3_v.16b + + /* rounds 0-3 */ + sha1h l0_e1_s, l0_abcd_s + sha1c l0_abcd_q, l0_e0_s, l0_tmp_0_v.4s + add l0_tmp_0_v.4s, l0_msg_2_v.4s, key_0_v.4s + sha1su0 l0_msg_0_v.4s, l0_msg_1_v.4s, l0_msg_2_v.4s + + sha1h l1_e1_s, l1_abcd_s + sha1c l1_abcd_q, l1_e0_s, l1_tmp_0_v.4s + add l1_tmp_0_v.4s, l1_msg_2_v.4s, key_0_v.4s + sha1su0 l1_msg_0_v.4s, l1_msg_1_v.4s, l1_msg_2_v.4s + + sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_0 /* rounds 4-7 */ + sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_0 + sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1 /* rounds 12-15 */ + sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_1 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_1 /* rounds 20-23 */ + sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_1 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1 + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_2 /* rounds 36-39 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_2 + sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_2 + sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2 + sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_3 /* rounds 52-55 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_3 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_3 + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_3 + + /* rounds 68-71 */ + sha1h l0_e0_s, l0_abcd_s + sha1p l0_abcd_q, l0_e1_s, l0_tmp_1_v.4s + add l0_tmp_1_v.4s, l0_msg_3_v.4s, key_3_v.4s + sha1su1 l0_msg_0_v.4s, l0_msg_3_v.4s + + sha1h l1_e0_s, l1_abcd_s + sha1p l1_abcd_q, l1_e1_s, l1_tmp_1_v.4s + add l1_tmp_1_v.4s, l1_msg_3_v.4s, key_3_v.4s + sha1su1 l1_msg_0_v.4s, l1_msg_3_v.4s + + /* rounds 72-75 */ + sha1h l0_e1_s, l0_abcd_s + sha1p l0_abcd_q, l0_e0_s, l0_tmp_0_v.4s + + sha1h l1_e1_s, l1_abcd_s + sha1p l1_abcd_q, l1_e0_s, l1_tmp_0_v.4s + + /* rounds 76-79 */ + sha1h l0_e0_s, l0_abcd_s + sha1p l0_abcd_q, l0_e1_s, l0_tmp_1_v.4s + + sha1h l1_e0_s, l1_abcd_s + sha1p l1_abcd_q, l1_e1_s, l1_tmp_1_v.4s + + + + add l0_abcd_v.4s, l0_abcd_v.4s, l0_abcd_saved_v.4s + add l0_e0_v.2s, l0_e0_v.2s, l0_e0_saved_v.2s + add l1_abcd_v.4s, l1_abcd_v.4s, l1_abcd_saved_v.4s + add l1_e0_v.2s, l1_e0_v.2s, l1_e0_saved_v.2s + + + + + bgt start_loop + + str l0_abcd_q, [l0_job, 64] + str l0_e0_s, [l0_job, 80] + + + str l1_abcd_q, [l1_job, 64] + str l1_e0_s, [l1_job, 80] + + //pop d8,d9 from stack + ldp d8, d9, [sp], 256 + ret + + .size sha1_mb_ce_x2, .-sha1_mb_ce_x2 + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +KEY: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 |