diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S new file mode 100644 index 000000000..ccc66f41a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S @@ -0,0 +1,271 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +// macro F = (D ^ (B & (C ^ D))) +.macro FUNC_F0 + eor VF.16b, VC.16b, VD.16b + and VF.16b, VB.16b, VF.16b + eor VF.16b, VD.16b, VF.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F1 + eor VF.16b, VB.16b, VC.16b + eor VF.16b, VF.16b, VD.16b +.endm + +// F = ((B & C) | (B & D) | (C & D)) +.macro FUNC_F2 + and vT0.16b, VB.16b, VC.16b + and vT1.16b, VB.16b, VD.16b + and vT2.16b, VC.16b, VD.16b + orr VF.16b, vT0.16b, vT1.16b + orr VF.16b, VF.16b, vT2.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F3 + FUNC_F1 +.endm + +.altmacro +.macro load_next_word windex + .if \windex < 16 + load_x4_word \windex + .endif +.endm + +// FUNC_F0 is merged into STEP_00_15 for efficiency +.macro SHA1_STEP_00_15_F0 windex:req + rev32 WORD\windex\().16b,WORD\windex\().16b + next_word=\windex+1 + load_next_word %next_word + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, VA.4s, 32 - 5 + add VE.4s, VE.4s, VK.4s + sli VT.4s, VA.4s, 5 + eor VF.16b, VC.16b, VD.16b + add VE.4s, VE.4s, WORD\windex\().4s + and VF.16b, VB.16b, VF.16b + add VE.4s, VE.4s, VT.4s + eor VF.16b, VD.16b, VF.16b + ushr VT.4s, VB.4s, 32 - 30 + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + +.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req + eor vT0.16b,\reg_3\().16b,\reg_8\().16b + eor VT.16b,\reg_14\().16b,\reg_16\().16b + sha1_step_16_79_interleave0 \windex + eor vT0.16b,vT0.16b,VT.16b + sha1_step_16_79_interleave1 \windex + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, vT0.4s, 32 - 1 + add VE.4s, VE.4s, VK.4s + ushr vT1.4s, VA.4s, 32 - 5 + sli VT.4s, vT0.4s, 1 + add VE.4s, VE.4s, VT.4s + sli vT1.4s, VA.4s, 5 + mov \reg_16\().16b,VT.16b + add VE.4s, VE.4s, vT1.4s + ushr VT.4s, VB.4s, 32 - 30 + \func_f + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + + VA .req v0 + VB .req v1 + VC .req v2 + VD .req v3 + VE .req v4 + VT .req v5 + VF .req v6 + VK .req v7 + WORD0 .req v8 + WORD1 .req v9 + WORD2 .req v10 + WORD3 .req v11 + WORD4 .req v12 + WORD5 .req v13 + WORD6 .req v14 + WORD7 .req v15 + WORD8 .req v16 + WORD9 .req v17 + WORD10 .req v18 + WORD11 .req v19 + WORD12 .req v20 + WORD13 .req v21 + WORD14 .req v22 + WORD15 .req v23 + vT0 .req v24 + vT1 .req v25 + vT2 .req v26 + vAA .req v27 + vBB .req v28 + vCC .req v29 + vDD .req v30 + vEE .req v31 + TT .req v0 + sha1key_adr .req x15 + +.macro SWAP_STATES + // shifted VB is held in VT after each step + .unreq TT + TT .req VE + .unreq VE + VE .req VD + .unreq VD + VD .req VC + .unreq VC + VC .req VT + .unreq VT + VT .req VB + .unreq VB + VB .req VA + .unreq VA + VA .req TT +.endm + +.altmacro +.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req + SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\() +.endm + +.macro exec_step windex:req + .if \windex <= 15 + SHA1_STEP_00_15_F0 windex + .else + idx14=((\windex - 14) & 15) + idx8=((\windex - 8) & 15) + idx3=((\windex - 3) & 15) + idx16=(\windex & 15) + .if \windex <= 19 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 20 && \windex <= 39 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 40 && \windex <= 59 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 60 && \windex <= 79 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16 + .endif + .endif + + SWAP_STATES + + .if \windex == 79 + // after 80 steps, the registers ABCDET has shifted from + // its orignal order of 012345 to 341520 + // have to swap back for both compile- and run-time correctness + mov v0.16b,v3.16b + .unreq VA + VA .req v0 + + mov vT0.16b,v2.16b + mov v2.16b,v1.16b + mov v1.16b,v4.16b + .unreq VB + VB .req v1 + .unreq VC + VC .req v2 + + mov v3.16b,v5.16b + .unreq VD + VD .req v3 + + mov v4.16b,vT0.16b + .unreq VE + VE .req v4 + + .unreq VT + VT .req v5 + .endif +.endm + +.macro exec_steps idx:req,more:vararg + exec_step \idx + .ifnb \more + exec_steps \more + .endif +.endm + +.macro sha1_single + load_x4_word 0 + + mov vAA.16B, VA.16B + mov vBB.16B, VB.16B + mov vCC.16B, VC.16B + mov vDD.16B, VD.16B + mov vEE.16B, VE.16B + + adr sha1key_adr, KEY_0 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 + + // 20 ~ 39 + adr sha1key_adr, KEY_1 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 + + // 40 ~ 59 + adr sha1key_adr, KEY_2 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 + + // 60 ~ 79 + adr sha1key_adr, KEY_3 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 + + add VA.4s, vAA.4s, VA.4s + add VB.4s, vBB.4s, VB.4s + add VC.4s, vCC.4s, VC.4s + add VD.4s, vDD.4s, VD.4s + add VE.4s, vEE.4s, VE.4s +.endm + +.macro sha1_asimd_save_stack + stp d8,d9,[sp, -64]! + stp d10,d11,[sp, 16] + stp d12,d13,[sp, 32] + stp d14,d15,[sp, 48] +.endm + +.macro sha1_asimd_restore_stack + ldp d10,d11,[sp, 16] + ldp d12,d13,[sp, 32] + ldp d14,d15,[sp, 48] + ldp d8,d9,[sp],64 +.endm |