diff options
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S | 526 |
1 files changed, 526 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S new file mode 100644 index 000000000..53979131d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S @@ -0,0 +1,526 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + q_\name .req q\reg + v_\name .req v\reg + s_\name .req s\reg +.endm + +.macro add_key_rol a:req,b:req,k:req,w:req,r:req + add v_tmp0.4s,v_\k\().4s,v_\w\().4s + add v_tmp1.4s,v_tmp1.4s,v_\a\().4s + add v_tmp1.4s,v_tmp1.4s,v_tmp0.4s + shl v_tmp0.4s,v_tmp1.4s,\r + ushr v_tmp1.4s,v_tmp1.4s,32-\r + orr v_tmp0.16b,v_tmp1.16b,v_tmp0.16b + + add v_\a\().4s,v_\b\().4s,v_tmp0.4s +.endm +.macro round_0_15 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req + mov v_tmp1.16b, v_\b\().16b + bsl v_tmp1.16b, v_\c\().16b, v_\d\().16b + ldr q_\k1,[key_adr],16 + add_key_rol \a,\b,\k,\w,\r +.endm + +.macro round_16_31 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req + mov v_tmp1.16b, v_\d\().16b + bsl v_tmp1.16b, v_\b\().16b, v_\c\().16b + ldr q_\k1,[key_adr],16 + add_key_rol \a,\b,\k,\w,\r +.endm + +.macro round_32_47 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req + eor v_tmp1.16b,v_\b\().16b,v_\c\().16b + eor v_tmp1.16b,v_tmp1.16b,v_\d\().16b + ldr q_\k1,[key_adr],16 + add_key_rol \a,\b,\k,\w,\r +.endm + +.macro round_48_63 a:req,b:req,c:req,d:req,k:req,k1,w:req,r:req + orn v_tmp1.16b,v_\b\().16b,v_\d\().16b + eor v_tmp1.16b,v_tmp1.16b,v_\c\().16b + .ifnb \k1 + ldr q_\k1,[key_adr],16 + .endif + add_key_rol \a,\b,\k,\w,\r +.endm +/* + variables +*/ + declare_var_vector_reg tmp0, 0 + declare_var_vector_reg tmp1, 1 + declare_var_vector_reg k, 2 + declare_var_vector_reg k1, 3 + declare_var_vector_reg a, 4 + declare_var_vector_reg b, 5 + declare_var_vector_reg c, 6 + declare_var_vector_reg d, 7 + declare_var_vector_reg a1, 8 + declare_var_vector_reg b1, 9 + declare_var_vector_reg c1, 10 + declare_var_vector_reg d1, 11 + + declare_var_vector_reg w0, 16 + declare_var_vector_reg w1, 17 + declare_var_vector_reg w2, 18 + declare_var_vector_reg w3, 19 + declare_var_vector_reg w4, 20 + declare_var_vector_reg w5, 21 + declare_var_vector_reg w6, 22 + declare_var_vector_reg w7, 23 + declare_var_vector_reg w8, 24 + declare_var_vector_reg w9, 25 + declare_var_vector_reg w10, 26 + declare_var_vector_reg w11, 27 + declare_var_vector_reg w12, 28 + declare_var_vector_reg w13, 29 + declare_var_vector_reg w14, 30 + declare_var_vector_reg w15, 31 + + len .req w4 + len_x .req x4 + lane0 .req x5 + lane1 .req x6 + lane2 .req x7 + lane3 .req x9 + end .req x4 + job0 .req x0 + job1 .req x1 + job2 .req x2 + job3 .req x3 + key_adr .req x10 + +/* + void md5_mb_asimd_x4(MD5_JOB * job0, MD5_JOB * job1, + MD5_JOB * job2, MD5_JOB * job3, int len) +*/ + .global md5_mb_asimd_x4 + .type md5_mb_asimd_x4, %function +md5_mb_asimd_x4: + stp x29,x30,[sp,-48]! + ldr lane0,[job0],64 + stp d8,d9,[sp,16] + ldr lane1,[job1],64 + stp d10,d11,[sp,32] + ldr lane2,[job2],64 + cmp len,0 + ldr lane3,[job3],64 + ble .exit + + //load digests + ld4 {v_a.s-v_d.s}[0],[job0] + add end,lane0,len_x,lsl 6 + ld4 {v_a.s-v_d.s}[1],[job1] + ld4 {v_a.s-v_d.s}[2],[job2] + ld4 {v_a.s-v_d.s}[3],[job3] +.loop_start: + ld1 {v_w0.s}[0],[lane0],4 + mov v_a1.16b,v_a.16b + ld1 {v_w0.s}[1],[lane1],4 + mov v_b1.16b,v_b.16b + ld1 {v_w0.s}[2],[lane2],4 + mov v_c1.16b,v_c.16b + ld1 {v_w0.s}[3],[lane3],4 + mov v_d1.16b,v_d.16b + + ld3 {v_w1.s-v_w3.s}[0],[lane0],12 + adrp key_adr,.key_consts + ld3 {v_w1.s-v_w3.s}[1],[lane1],12 + add key_adr,key_adr,#:lo12:.key_consts + ld3 {v_w1.s-v_w3.s}[2],[lane2],12 + ldr q_k,[key_adr],16 + ld3 {v_w1.s-v_w3.s}[3],[lane3],12 + + + ld4 {v_w4.s-v_w7.s}[0], [lane0],16 + + round_0_15 a,b,c,d,k,k1,w0,7 + + ld4 {v_w4.s-v_w7.s}[1], [lane1],16 + round_0_15 d,a,b,c,k1,k,w1,12 + ld4 {v_w4.s-v_w7.s}[2], [lane2],16 + round_0_15 c,d,a,b,k,k1,w2,17 + ld4 {v_w4.s-v_w7.s}[3], [lane3],16 + round_0_15 b,c,d,a,k1,k,w3,22 + ld4 {v_w8.s-v_w11.s}[0],[lane0],16 + round_0_15 a,b,c,d,k,k1,w4,7 + ld4 {v_w8.s-v_w11.s}[1],[lane1],16 + round_0_15 d,a,b,c,k1,k,w5,12 + ld4 {v_w8.s-v_w11.s}[2],[lane2],16 + round_0_15 c,d,a,b,k,k1,w6,17 + ld4 {v_w8.s-v_w11.s}[3],[lane3],16 + round_0_15 b,c,d,a,k1,k,w7,22 + ld4 {v_w12.s-v_w15.s}[0],[lane0],16 + round_0_15 a,b,c,d,k,k1,w8,7 + ld4 {v_w12.s-v_w15.s}[1],[lane1],16 + round_0_15 d,a,b,c,k1,k,w9,12 + ld4 {v_w12.s-v_w15.s}[2],[lane2],16 + round_0_15 c,d,a,b,k,k1,w10,17 + ld4 {v_w12.s-v_w15.s}[3],[lane3],16 + round_0_15 b,c,d,a,k1,k,w11,22 + round_0_15 a,b,c,d,k,k1,w12,7 + round_0_15 d,a,b,c,k1,k,w13,12 + round_0_15 c,d,a,b,k,k1,w14,17 + round_0_15 b,c,d,a,k1,k,w15,22 + + round_16_31 a,b,c,d,k,k1,w1,5 + round_16_31 d,a,b,c,k1,k,w6,9 + round_16_31 c,d,a,b,k,k1,w11,14 + round_16_31 b,c,d,a,k1,k,w0,20 + round_16_31 a,b,c,d,k,k1,w5,5 + round_16_31 d,a,b,c,k1,k,w10,9 + round_16_31 c,d,a,b,k,k1,w15,14 + round_16_31 b,c,d,a,k1,k,w4,20 + round_16_31 a,b,c,d,k,k1,w9,5 + round_16_31 d,a,b,c,k1,k,w14,9 + round_16_31 c,d,a,b,k,k1,w3,14 + round_16_31 b,c,d,a,k1,k,w8,20 + round_16_31 a,b,c,d,k,k1,w13,5 + round_16_31 d,a,b,c,k1,k,w2,9 + round_16_31 c,d,a,b,k,k1,w7,14 + round_16_31 b,c,d,a,k1,k,w12,20 + + round_32_47 a,b,c,d,k,k1,w5,4 + round_32_47 d,a,b,c,k1,k,w8,11 + round_32_47 c,d,a,b,k,k1,w11,16 + round_32_47 b,c,d,a,k1,k,w14,23 + round_32_47 a,b,c,d,k,k1,w1,4 + round_32_47 d,a,b,c,k1,k,w4,11 + round_32_47 c,d,a,b,k,k1,w7,16 + round_32_47 b,c,d,a,k1,k,w10,23 + round_32_47 a,b,c,d,k,k1,w13,4 + round_32_47 d,a,b,c,k1,k,w0,11 + round_32_47 c,d,a,b,k,k1,w3,16 + round_32_47 b,c,d,a,k1,k,w6,23 + round_32_47 a,b,c,d,k,k1,w9,4 + round_32_47 d,a,b,c,k1,k,w12,11 + round_32_47 c,d,a,b,k,k1,w15,16 + round_32_47 b,c,d,a,k1,k,w2,23 + + round_48_63 a,b,c,d,k,k1,w0,6 + round_48_63 d,a,b,c,k1,k,w7,10 + round_48_63 c,d,a,b,k,k1,w14,15 + round_48_63 b,c,d,a,k1,k,w5,21 + round_48_63 a,b,c,d,k,k1,w12,6 + round_48_63 d,a,b,c,k1,k,w3,10 + round_48_63 c,d,a,b,k,k1,w10,15 + round_48_63 b,c,d,a,k1,k,w1,21 + round_48_63 a,b,c,d,k,k1,w8,6 + round_48_63 d,a,b,c,k1,k,w15,10 + round_48_63 c,d,a,b,k,k1,w6,15 + round_48_63 b,c,d,a,k1,k,w13,21 + round_48_63 a,b,c,d,k,k1,w4,6 + round_48_63 d,a,b,c,k1,k,w11,10 + round_48_63 c,d,a,b,k,k1,w2,15 + round_48_63 b,c,d,a,k1, ,w9,21 + + + + + cmp lane0,end + add v_a.4s,v_a1.4s,v_a.4s + add v_b.4s,v_b1.4s,v_b.4s + add v_c.4s,v_c1.4s,v_c.4s + add v_d.4s,v_d1.4s,v_d.4s + bne .loop_start + + st4 {v_a.s-v_d.s}[0],[job0] + st4 {v_a.s-v_d.s}[1],[job1] + st4 {v_a.s-v_d.s}[2],[job2] + st4 {v_a.s-v_d.s}[3],[job3] +.exit: + ldp d8,d9,[sp,16] + ldp d10,d11,[sp,32] + ldp x29,x30,[sp],48 + ret +.key_consts: + .word 0xd76aa478 + .word 0xd76aa478 + .word 0xd76aa478 + .word 0xd76aa478 + .word 0xe8c7b756 + .word 0xe8c7b756 + .word 0xe8c7b756 + .word 0xe8c7b756 + .word 0x242070db + .word 0x242070db + .word 0x242070db + .word 0x242070db + .word 0xc1bdceee + .word 0xc1bdceee + .word 0xc1bdceee + .word 0xc1bdceee + .word 0xf57c0faf + .word 0xf57c0faf + .word 0xf57c0faf + .word 0xf57c0faf + .word 0x4787c62a + .word 0x4787c62a + .word 0x4787c62a + .word 0x4787c62a + .word 0xa8304613 + .word 0xa8304613 + .word 0xa8304613 + .word 0xa8304613 + .word 0xfd469501 + .word 0xfd469501 + .word 0xfd469501 + .word 0xfd469501 + .word 0x698098d8 + .word 0x698098d8 + .word 0x698098d8 + .word 0x698098d8 + .word 0x8b44f7af + .word 0x8b44f7af + .word 0x8b44f7af + .word 0x8b44f7af + .word 0xffff5bb1 + .word 0xffff5bb1 + .word 0xffff5bb1 + .word 0xffff5bb1 + .word 0x895cd7be + .word 0x895cd7be + .word 0x895cd7be + .word 0x895cd7be + .word 0x6b901122 + .word 0x6b901122 + .word 0x6b901122 + .word 0x6b901122 + .word 0xfd987193 + .word 0xfd987193 + .word 0xfd987193 + .word 0xfd987193 + .word 0xa679438e + .word 0xa679438e + .word 0xa679438e + .word 0xa679438e + .word 0x49b40821 + .word 0x49b40821 + .word 0x49b40821 + .word 0x49b40821 + .word 0xf61e2562 + .word 0xf61e2562 + .word 0xf61e2562 + .word 0xf61e2562 + .word 0xc040b340 + .word 0xc040b340 + .word 0xc040b340 + .word 0xc040b340 + .word 0x265e5a51 + .word 0x265e5a51 + .word 0x265e5a51 + .word 0x265e5a51 + .word 0xe9b6c7aa + .word 0xe9b6c7aa + .word 0xe9b6c7aa + .word 0xe9b6c7aa + .word 0xd62f105d + .word 0xd62f105d + .word 0xd62f105d + .word 0xd62f105d + .word 0x02441453 + .word 0x02441453 + .word 0x02441453 + .word 0x02441453 + .word 0xd8a1e681 + .word 0xd8a1e681 + .word 0xd8a1e681 + .word 0xd8a1e681 + .word 0xe7d3fbc8 + .word 0xe7d3fbc8 + .word 0xe7d3fbc8 + .word 0xe7d3fbc8 + .word 0x21e1cde6 + .word 0x21e1cde6 + .word 0x21e1cde6 + .word 0x21e1cde6 + .word 0xc33707d6 + .word 0xc33707d6 + .word 0xc33707d6 + .word 0xc33707d6 + .word 0xf4d50d87 + .word 0xf4d50d87 + .word 0xf4d50d87 + .word 0xf4d50d87 + .word 0x455a14ed + .word 0x455a14ed + .word 0x455a14ed + .word 0x455a14ed + .word 0xa9e3e905 + .word 0xa9e3e905 + .word 0xa9e3e905 + .word 0xa9e3e905 + .word 0xfcefa3f8 + .word 0xfcefa3f8 + .word 0xfcefa3f8 + .word 0xfcefa3f8 + .word 0x676f02d9 + .word 0x676f02d9 + .word 0x676f02d9 + .word 0x676f02d9 + .word 0x8d2a4c8a + .word 0x8d2a4c8a + .word 0x8d2a4c8a + .word 0x8d2a4c8a + .word 0xfffa3942 + .word 0xfffa3942 + .word 0xfffa3942 + .word 0xfffa3942 + .word 0x8771f681 + .word 0x8771f681 + .word 0x8771f681 + .word 0x8771f681 + .word 0x6d9d6122 + .word 0x6d9d6122 + .word 0x6d9d6122 + .word 0x6d9d6122 + .word 0xfde5380c + .word 0xfde5380c + .word 0xfde5380c + .word 0xfde5380c + .word 0xa4beea44 + .word 0xa4beea44 + .word 0xa4beea44 + .word 0xa4beea44 + .word 0x4bdecfa9 + .word 0x4bdecfa9 + .word 0x4bdecfa9 + .word 0x4bdecfa9 + .word 0xf6bb4b60 + .word 0xf6bb4b60 + .word 0xf6bb4b60 + .word 0xf6bb4b60 + .word 0xbebfbc70 + .word 0xbebfbc70 + .word 0xbebfbc70 + .word 0xbebfbc70 + .word 0x289b7ec6 + .word 0x289b7ec6 + .word 0x289b7ec6 + .word 0x289b7ec6 + .word 0xeaa127fa + .word 0xeaa127fa + .word 0xeaa127fa + .word 0xeaa127fa + .word 0xd4ef3085 + .word 0xd4ef3085 + .word 0xd4ef3085 + .word 0xd4ef3085 + .word 0x04881d05 + .word 0x04881d05 + .word 0x04881d05 + .word 0x04881d05 + .word 0xd9d4d039 + .word 0xd9d4d039 + .word 0xd9d4d039 + .word 0xd9d4d039 + .word 0xe6db99e5 + .word 0xe6db99e5 + .word 0xe6db99e5 + .word 0xe6db99e5 + .word 0x1fa27cf8 + .word 0x1fa27cf8 + .word 0x1fa27cf8 + .word 0x1fa27cf8 + .word 0xc4ac5665 + .word 0xc4ac5665 + .word 0xc4ac5665 + .word 0xc4ac5665 + .word 0xf4292244 + .word 0xf4292244 + .word 0xf4292244 + .word 0xf4292244 + .word 0x432aff97 + .word 0x432aff97 + .word 0x432aff97 + .word 0x432aff97 + .word 0xab9423a7 + .word 0xab9423a7 + .word 0xab9423a7 + .word 0xab9423a7 + .word 0xfc93a039 + .word 0xfc93a039 + .word 0xfc93a039 + .word 0xfc93a039 + .word 0x655b59c3 + .word 0x655b59c3 + .word 0x655b59c3 + .word 0x655b59c3 + .word 0x8f0ccc92 + .word 0x8f0ccc92 + .word 0x8f0ccc92 + .word 0x8f0ccc92 + .word 0xffeff47d + .word 0xffeff47d + .word 0xffeff47d + .word 0xffeff47d + .word 0x85845dd1 + .word 0x85845dd1 + .word 0x85845dd1 + .word 0x85845dd1 + .word 0x6fa87e4f + .word 0x6fa87e4f + .word 0x6fa87e4f + .word 0x6fa87e4f + .word 0xfe2ce6e0 + .word 0xfe2ce6e0 + .word 0xfe2ce6e0 + .word 0xfe2ce6e0 + .word 0xa3014314 + .word 0xa3014314 + .word 0xa3014314 + .word 0xa3014314 + .word 0x4e0811a1 + .word 0x4e0811a1 + .word 0x4e0811a1 + .word 0x4e0811a1 + .word 0xf7537e82 + .word 0xf7537e82 + .word 0xf7537e82 + .word 0xf7537e82 + .word 0xbd3af235 + .word 0xbd3af235 + .word 0xbd3af235 + .word 0xbd3af235 + .word 0x2ad7d2bb + .word 0x2ad7d2bb + .word 0x2ad7d2bb + .word 0x2ad7d2bb + .word 0xeb86d391 + .word 0xeb86d391 + .word 0xeb86d391 + .word 0xeb86d391 + .size md5_mb_asimd_x4, .-md5_mb_asimd_x4 |