diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/crypto/isa-l/isa-l_crypto/sha256_mb/aarch64/sha256_mb_x3_ce.S | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/sha256_mb/aarch64/sha256_mb_x3_ce.S')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/sha256_mb/aarch64/sha256_mb_x3_ce.S | 342 |
1 files changed, 342 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sha256_mb/aarch64/sha256_mb_x3_ce.S b/src/crypto/isa-l/isa-l_crypto/sha256_mb/aarch64/sha256_mb_x3_ce.S new file mode 100644 index 000000000..6ed1591ba --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha256_mb/aarch64/sha256_mb_x3_ce.S @@ -0,0 +1,342 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 2 + .p2align 3,,7 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg +.endm +/** +maros for round 48-63 +*/ +.macro sha256_4_rounds_high msg:req,tmp0:req,tmp1:req + ldr key_q , [tmp] + mov l0_tmp2_v.16b,l0_abcd_v.16b + mov l1_tmp2_v.16b,l1_abcd_v.16b + mov l2_tmp2_v.16b,l2_abcd_v.16b + add tmp,tmp,16 + add l0_\tmp1\()_v.4s,l0_\msg\()_v.4s,key_v.4s + add l1_\tmp1\()_v.4s,l1_\msg\()_v.4s,key_v.4s + add l2_\tmp1\()_v.4s,l2_\msg\()_v.4s,key_v.4s + sha256h l0_abcd_q,l0_efgh_q,l0_\tmp0\()_v.4s + sha256h l1_abcd_q,l1_efgh_q,l1_\tmp0\()_v.4s + sha256h l2_abcd_q,l2_efgh_q,l2_\tmp0\()_v.4s + sha256h2 l0_efgh_q,l0_tmp2_q,l0_\tmp0\()_v.4s + sha256h2 l1_efgh_q,l1_tmp2_q,l1_\tmp0\()_v.4s + sha256h2 l2_efgh_q,l2_tmp2_q,l2_\tmp0\()_v.4s + +.endm +/** +maros for round 0-47 +*/ +.macro sha256_4_rounds_low msg0:req,msg1:req,msg2:req,msg3:req,tmp0:req,tmp1:req + sha256su0 l0_\msg0\()_v.4s,l0_\msg1\()_v.4s + sha256su0 l1_\msg0\()_v.4s,l1_\msg1\()_v.4s + sha256su0 l2_\msg0\()_v.4s,l2_\msg1\()_v.4s + sha256_4_rounds_high \msg1,\tmp0,\tmp1 + sha256su1 l0_\msg0\()_v.4s,l0_\msg2\()_v.4s,l0_\msg3\()_v.4s + sha256su1 l1_\msg0\()_v.4s,l1_\msg2\()_v.4s,l1_\msg3\()_v.4s + sha256su1 l2_\msg0\()_v.4s,l2_\msg2\()_v.4s,l2_\msg3\()_v.4s +.endm + + +/* +Variable list +*/ + + declare_var_vector_reg key,31 + + +/* +digest variables +*/ + declare_var_vector_reg l0_abcd,0 + declare_var_vector_reg l0_efgh,1 + declare_var_vector_reg l1_abcd,2 + declare_var_vector_reg l1_efgh,3 + declare_var_vector_reg l2_abcd,4 + declare_var_vector_reg l2_efgh,5 + declare_var_vector_reg l1_abcd_saved,16 + declare_var_vector_reg l1_efgh_saved,17 + declare_var_vector_reg l0_abcd_saved,20 + declare_var_vector_reg l0_efgh_saved,21 + declare_var_vector_reg l2_abcd_saved,24 + declare_var_vector_reg l2_efgh_saved,25 +/* +Temporay variables +*/ + declare_var_vector_reg l0_tmp0,6 + declare_var_vector_reg l0_tmp1,7 + declare_var_vector_reg l0_tmp2,8 + declare_var_vector_reg l1_tmp0,9 + declare_var_vector_reg l1_tmp1,10 + declare_var_vector_reg l1_tmp2,11 + declare_var_vector_reg l2_tmp0,12 + declare_var_vector_reg l2_tmp1,13 + declare_var_vector_reg l2_tmp2,14 +/* +Message variables +*/ + declare_var_vector_reg l0_msg0,16 + declare_var_vector_reg l0_msg1,17 + declare_var_vector_reg l0_msg2,18 + declare_var_vector_reg l0_msg3,19 + declare_var_vector_reg l1_msg0,20 + declare_var_vector_reg l1_msg1,21 + declare_var_vector_reg l1_msg2,22 + declare_var_vector_reg l1_msg3,23 + declare_var_vector_reg l2_msg0,24 + declare_var_vector_reg l2_msg1,25 + declare_var_vector_reg l2_msg2,26 + declare_var_vector_reg l2_msg3,27 + + + +/* + void sha256_mb_ce_x3(SHA256_JOB *, SHA256_JOB *, SHA256_JOB *, int); +*/ +/* +Arguements list +*/ + l0_job .req x0 + l1_job .req x1 + l2_job .req x2 + len .req w3 + l0_data .req x4 + l1_data .req x5 + l2_data .req x6 + tmp .req x7 + .global sha256_mb_ce_x3 + .type sha256_mb_ce_x3, %function +sha256_mb_ce_x3: + //push d8~d15 + stp d8,d9,[sp,-192]! + stp d10,d11,[sp,16] + stp d12,d13,[sp,32] + stp d14,d15,[sp,48] + ldr l0_data, [l0_job] + ldr l0_abcd_q, [l0_job, 64] + ldr l0_efgh_q, [l0_job, 80] + ldr l1_data, [l1_job] + ldr l1_abcd_q, [l1_job, 64] + ldr l1_efgh_q, [l1_job, 80] + ldr l2_data, [l2_job] + ldr l2_abcd_q, [l2_job, 64] + ldr l2_efgh_q, [l2_job, 80] + + + +start_loop: + + //load key addr + adr tmp, KEY + //load msgs + ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data] + ld1 {l1_msg0_v.4s-l1_msg3_v.4s},[l1_data] + ld1 {l2_msg0_v.4s-l2_msg3_v.4s},[l2_data] + ldr key_q,[tmp] + add tmp,tmp,16 + //adjust loop parameter + add l0_data,l0_data,64 + add l1_data,l1_data,64 + add l2_data,l2_data,64 + sub len, len, #1 + cmp len, 0 +/* + //backup digest + mov l0_abcd_saved_v.16b,l0_abcd_v.16b + mov l0_efgh_saved_v.16b,l0_efgh_v.16b + mov l1_abcd_saved_v.16b,l1_abcd_v.16b + mov l1_efgh_saved_v.16b,l1_efgh_v.16b + mov l2_abcd_saved_v.16b,l2_abcd_v.16b + mov l2_efgh_saved_v.16b,l2_efgh_v.16b +*/ + + rev32 l0_msg0_v.16b,l0_msg0_v.16b + rev32 l0_msg1_v.16b,l0_msg1_v.16b + add l0_tmp0_v.4s, l0_msg0_v.4s,key_v.4s + rev32 l0_msg2_v.16b,l0_msg2_v.16b + rev32 l0_msg3_v.16b,l0_msg3_v.16b + + rev32 l1_msg0_v.16b,l1_msg0_v.16b + rev32 l1_msg1_v.16b,l1_msg1_v.16b + add l1_tmp0_v.4s, l1_msg0_v.4s,key_v.4s + rev32 l1_msg2_v.16b,l1_msg2_v.16b + rev32 l1_msg3_v.16b,l1_msg3_v.16b + + rev32 l2_msg0_v.16b,l2_msg0_v.16b + rev32 l2_msg1_v.16b,l2_msg1_v.16b + add l2_tmp0_v.4s, l2_msg0_v.4s,key_v.4s + rev32 l2_msg2_v.16b,l2_msg2_v.16b + rev32 l2_msg3_v.16b,l2_msg3_v.16b + + + + sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0,tmp1 /* rounds 0-3 */ + sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp1,tmp0 + sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0,tmp1 + sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp1,tmp0 + + sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0,tmp1 /* rounds 16-19 */ + sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp1,tmp0 + sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0,tmp1 + sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp1,tmp0 + sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0,tmp1 /* rounds 32-35 */ + sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp1,tmp0 + sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0,tmp1 + sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp1,tmp0 + + + + sha256_4_rounds_high msg1,tmp0,tmp1 /* rounds 48-51 */ + + /* msg0 msg1 is free , share with digest regs */ + ldr l0_abcd_saved_q, [l0_job, 64] + ldr l1_abcd_saved_q, [l1_job, 64] + ldr l2_abcd_saved_q, [l2_job, 64] + ldr l0_efgh_saved_q, [l0_job, 80] + ldr l1_efgh_saved_q, [l1_job, 80] + ldr l2_efgh_saved_q, [l2_job, 80] + + sha256_4_rounds_high msg2,tmp1,tmp0 + sha256_4_rounds_high msg3,tmp0,tmp1 + + /* rounds 60-63 */ + mov l0_tmp2_v.16b,l0_abcd_v.16b + sha256h l0_abcd_q,l0_efgh_q,l0_tmp1_v.4s + sha256h2 l0_efgh_q,l0_tmp2_q,l0_tmp1_v.4s + + mov l1_tmp2_v.16b,l1_abcd_v.16b + sha256h l1_abcd_q,l1_efgh_q,l1_tmp1_v.4s + sha256h2 l1_efgh_q,l1_tmp2_q,l1_tmp1_v.4s + + mov l2_tmp2_v.16b,l2_abcd_v.16b + sha256h l2_abcd_q,l2_efgh_q,l2_tmp1_v.4s + sha256h2 l2_efgh_q,l2_tmp2_q,l2_tmp1_v.4s + + /* combine state */ + add l0_abcd_v.4s,l0_abcd_v.4s,l0_abcd_saved_v.4s + add l0_efgh_v.4s,l0_efgh_v.4s,l0_efgh_saved_v.4s + add l1_abcd_v.4s,l1_abcd_v.4s,l1_abcd_saved_v.4s + add l1_efgh_v.4s,l1_efgh_v.4s,l1_efgh_saved_v.4s + add l2_abcd_v.4s,l2_abcd_v.4s,l2_abcd_saved_v.4s + add l2_efgh_v.4s,l2_efgh_v.4s,l2_efgh_saved_v.4s + + str l0_abcd_q, [l0_job, 64] + str l0_efgh_q, [l0_job, 80] + str l1_abcd_q, [l1_job, 64] + str l1_efgh_q, [l1_job, 80] + str l2_abcd_q, [l2_job, 64] + str l2_efgh_q, [l2_job, 80] + + bgt start_loop + + + ldp d10,d11,[sp,16] + ldp d12,d13,[sp,32] + ldp d14,d15,[sp,48] + ldp d8, d9, [sp], 192 + ret + + .size sha256_mb_ce_x3, .-sha256_mb_ce_x3 + .section .rol0_data.cst16,"aM",@progbits,16 + .align 4 +KEY: + .word 0x428A2F98 + .word 0x71374491 + .word 0xB5C0FBCF + .word 0xE9B5DBA5 + .word 0x3956C25B + .word 0x59F111F1 + .word 0x923F82A4 + .word 0xAB1C5ED5 + .word 0xD807AA98 + .word 0x12835B01 + .word 0x243185BE + .word 0x550C7DC3 + .word 0x72BE5D74 + .word 0x80DEB1FE + .word 0x9BDC06A7 + .word 0xC19BF174 + .word 0xE49B69C1 + .word 0xEFBE4786 + .word 0x0FC19DC6 + .word 0x240CA1CC + .word 0x2DE92C6F + .word 0x4A7484AA + .word 0x5CB0A9DC + .word 0x76F988DA + .word 0x983E5152 + .word 0xA831C66D + .word 0xB00327C8 + .word 0xBF597FC7 + .word 0xC6E00BF3 + .word 0xD5A79147 + .word 0x06CA6351 + .word 0x14292967 + .word 0x27B70A85 + .word 0x2E1B2138 + .word 0x4D2C6DFC + .word 0x53380D13 + .word 0x650A7354 + .word 0x766A0ABB + .word 0x81C2C92E + .word 0x92722C85 + .word 0xA2BFE8A1 + .word 0xA81A664B + .word 0xC24B8B70 + .word 0xC76C51A3 + .word 0xD192E819 + .word 0xD6990624 + .word 0xF40E3585 + .word 0x106AA070 + .word 0x19A4C116 + .word 0x1E376C08 + .word 0x2748774C + .word 0x34B0BCB5 + .word 0x391C0CB3 + .word 0x4ED8AA4A + .word 0x5B9CCA4F + .word 0x682E6FF3 + .word 0x748F82EE + .word 0x78A5636F + .word 0x84C87814 + .word 0x8CC70208 + .word 0x90BEFFFA + .word 0xA4506CEB + .word 0xBEF9A3F7 + .word 0xC67178F2 |