summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S526
1 files changed, 526 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S
new file mode 100644
index 000000000..53979131d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S
@@ -0,0 +1,526 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a
+
+/*
+Macros
+*/
+
+.macro declare_var_vector_reg name:req,reg:req
+ q_\name .req q\reg
+ v_\name .req v\reg
+ s_\name .req s\reg
+.endm
+
+.macro add_key_rol a:req,b:req,k:req,w:req,r:req
+ add v_tmp0.4s,v_\k\().4s,v_\w\().4s
+ add v_tmp1.4s,v_tmp1.4s,v_\a\().4s
+ add v_tmp1.4s,v_tmp1.4s,v_tmp0.4s
+ shl v_tmp0.4s,v_tmp1.4s,\r
+ ushr v_tmp1.4s,v_tmp1.4s,32-\r
+ orr v_tmp0.16b,v_tmp1.16b,v_tmp0.16b
+
+ add v_\a\().4s,v_\b\().4s,v_tmp0.4s
+.endm
+.macro round_0_15 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req
+ mov v_tmp1.16b, v_\b\().16b
+ bsl v_tmp1.16b, v_\c\().16b, v_\d\().16b
+ ldr q_\k1,[key_adr],16
+ add_key_rol \a,\b,\k,\w,\r
+.endm
+
+.macro round_16_31 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req
+ mov v_tmp1.16b, v_\d\().16b
+ bsl v_tmp1.16b, v_\b\().16b, v_\c\().16b
+ ldr q_\k1,[key_adr],16
+ add_key_rol \a,\b,\k,\w,\r
+.endm
+
+.macro round_32_47 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req
+ eor v_tmp1.16b,v_\b\().16b,v_\c\().16b
+ eor v_tmp1.16b,v_tmp1.16b,v_\d\().16b
+ ldr q_\k1,[key_adr],16
+ add_key_rol \a,\b,\k,\w,\r
+.endm
+
+.macro round_48_63 a:req,b:req,c:req,d:req,k:req,k1,w:req,r:req
+ orn v_tmp1.16b,v_\b\().16b,v_\d\().16b
+ eor v_tmp1.16b,v_tmp1.16b,v_\c\().16b
+ .ifnb \k1
+ ldr q_\k1,[key_adr],16
+ .endif
+ add_key_rol \a,\b,\k,\w,\r
+.endm
+/*
+ variables
+*/
+ declare_var_vector_reg tmp0, 0
+ declare_var_vector_reg tmp1, 1
+ declare_var_vector_reg k, 2
+ declare_var_vector_reg k1, 3
+ declare_var_vector_reg a, 4
+ declare_var_vector_reg b, 5
+ declare_var_vector_reg c, 6
+ declare_var_vector_reg d, 7
+ declare_var_vector_reg a1, 8
+ declare_var_vector_reg b1, 9
+ declare_var_vector_reg c1, 10
+ declare_var_vector_reg d1, 11
+
+ declare_var_vector_reg w0, 16
+ declare_var_vector_reg w1, 17
+ declare_var_vector_reg w2, 18
+ declare_var_vector_reg w3, 19
+ declare_var_vector_reg w4, 20
+ declare_var_vector_reg w5, 21
+ declare_var_vector_reg w6, 22
+ declare_var_vector_reg w7, 23
+ declare_var_vector_reg w8, 24
+ declare_var_vector_reg w9, 25
+ declare_var_vector_reg w10, 26
+ declare_var_vector_reg w11, 27
+ declare_var_vector_reg w12, 28
+ declare_var_vector_reg w13, 29
+ declare_var_vector_reg w14, 30
+ declare_var_vector_reg w15, 31
+
+ len .req w4
+ len_x .req x4
+ lane0 .req x5
+ lane1 .req x6
+ lane2 .req x7
+ lane3 .req x9
+ end .req x4
+ job0 .req x0
+ job1 .req x1
+ job2 .req x2
+ job3 .req x3
+ key_adr .req x10
+
+/*
+ void md5_mb_asimd_x4(MD5_JOB * job0, MD5_JOB * job1,
+ MD5_JOB * job2, MD5_JOB * job3, int len)
+*/
+ .global md5_mb_asimd_x4
+ .type md5_mb_asimd_x4, %function
+md5_mb_asimd_x4:
+ stp x29,x30,[sp,-48]!
+ ldr lane0,[job0],64
+ stp d8,d9,[sp,16]
+ ldr lane1,[job1],64
+ stp d10,d11,[sp,32]
+ ldr lane2,[job2],64
+ cmp len,0
+ ldr lane3,[job3],64
+ ble .exit
+
+ //load digests
+ ld4 {v_a.s-v_d.s}[0],[job0]
+ add end,lane0,len_x,lsl 6
+ ld4 {v_a.s-v_d.s}[1],[job1]
+ ld4 {v_a.s-v_d.s}[2],[job2]
+ ld4 {v_a.s-v_d.s}[3],[job3]
+.loop_start:
+ ld1 {v_w0.s}[0],[lane0],4
+ mov v_a1.16b,v_a.16b
+ ld1 {v_w0.s}[1],[lane1],4
+ mov v_b1.16b,v_b.16b
+ ld1 {v_w0.s}[2],[lane2],4
+ mov v_c1.16b,v_c.16b
+ ld1 {v_w0.s}[3],[lane3],4
+ mov v_d1.16b,v_d.16b
+
+ ld3 {v_w1.s-v_w3.s}[0],[lane0],12
+ adrp key_adr,.key_consts
+ ld3 {v_w1.s-v_w3.s}[1],[lane1],12
+ add key_adr,key_adr,#:lo12:.key_consts
+ ld3 {v_w1.s-v_w3.s}[2],[lane2],12
+ ldr q_k,[key_adr],16
+ ld3 {v_w1.s-v_w3.s}[3],[lane3],12
+
+
+ ld4 {v_w4.s-v_w7.s}[0], [lane0],16
+
+ round_0_15 a,b,c,d,k,k1,w0,7
+
+ ld4 {v_w4.s-v_w7.s}[1], [lane1],16
+ round_0_15 d,a,b,c,k1,k,w1,12
+ ld4 {v_w4.s-v_w7.s}[2], [lane2],16
+ round_0_15 c,d,a,b,k,k1,w2,17
+ ld4 {v_w4.s-v_w7.s}[3], [lane3],16
+ round_0_15 b,c,d,a,k1,k,w3,22
+ ld4 {v_w8.s-v_w11.s}[0],[lane0],16
+ round_0_15 a,b,c,d,k,k1,w4,7
+ ld4 {v_w8.s-v_w11.s}[1],[lane1],16
+ round_0_15 d,a,b,c,k1,k,w5,12
+ ld4 {v_w8.s-v_w11.s}[2],[lane2],16
+ round_0_15 c,d,a,b,k,k1,w6,17
+ ld4 {v_w8.s-v_w11.s}[3],[lane3],16
+ round_0_15 b,c,d,a,k1,k,w7,22
+ ld4 {v_w12.s-v_w15.s}[0],[lane0],16
+ round_0_15 a,b,c,d,k,k1,w8,7
+ ld4 {v_w12.s-v_w15.s}[1],[lane1],16
+ round_0_15 d,a,b,c,k1,k,w9,12
+ ld4 {v_w12.s-v_w15.s}[2],[lane2],16
+ round_0_15 c,d,a,b,k,k1,w10,17
+ ld4 {v_w12.s-v_w15.s}[3],[lane3],16
+ round_0_15 b,c,d,a,k1,k,w11,22
+ round_0_15 a,b,c,d,k,k1,w12,7
+ round_0_15 d,a,b,c,k1,k,w13,12
+ round_0_15 c,d,a,b,k,k1,w14,17
+ round_0_15 b,c,d,a,k1,k,w15,22
+
+ round_16_31 a,b,c,d,k,k1,w1,5
+ round_16_31 d,a,b,c,k1,k,w6,9
+ round_16_31 c,d,a,b,k,k1,w11,14
+ round_16_31 b,c,d,a,k1,k,w0,20
+ round_16_31 a,b,c,d,k,k1,w5,5
+ round_16_31 d,a,b,c,k1,k,w10,9
+ round_16_31 c,d,a,b,k,k1,w15,14
+ round_16_31 b,c,d,a,k1,k,w4,20
+ round_16_31 a,b,c,d,k,k1,w9,5
+ round_16_31 d,a,b,c,k1,k,w14,9
+ round_16_31 c,d,a,b,k,k1,w3,14
+ round_16_31 b,c,d,a,k1,k,w8,20
+ round_16_31 a,b,c,d,k,k1,w13,5
+ round_16_31 d,a,b,c,k1,k,w2,9
+ round_16_31 c,d,a,b,k,k1,w7,14
+ round_16_31 b,c,d,a,k1,k,w12,20
+
+ round_32_47 a,b,c,d,k,k1,w5,4
+ round_32_47 d,a,b,c,k1,k,w8,11
+ round_32_47 c,d,a,b,k,k1,w11,16
+ round_32_47 b,c,d,a,k1,k,w14,23
+ round_32_47 a,b,c,d,k,k1,w1,4
+ round_32_47 d,a,b,c,k1,k,w4,11
+ round_32_47 c,d,a,b,k,k1,w7,16
+ round_32_47 b,c,d,a,k1,k,w10,23
+ round_32_47 a,b,c,d,k,k1,w13,4
+ round_32_47 d,a,b,c,k1,k,w0,11
+ round_32_47 c,d,a,b,k,k1,w3,16
+ round_32_47 b,c,d,a,k1,k,w6,23
+ round_32_47 a,b,c,d,k,k1,w9,4
+ round_32_47 d,a,b,c,k1,k,w12,11
+ round_32_47 c,d,a,b,k,k1,w15,16
+ round_32_47 b,c,d,a,k1,k,w2,23
+
+ round_48_63 a,b,c,d,k,k1,w0,6
+ round_48_63 d,a,b,c,k1,k,w7,10
+ round_48_63 c,d,a,b,k,k1,w14,15
+ round_48_63 b,c,d,a,k1,k,w5,21
+ round_48_63 a,b,c,d,k,k1,w12,6
+ round_48_63 d,a,b,c,k1,k,w3,10
+ round_48_63 c,d,a,b,k,k1,w10,15
+ round_48_63 b,c,d,a,k1,k,w1,21
+ round_48_63 a,b,c,d,k,k1,w8,6
+ round_48_63 d,a,b,c,k1,k,w15,10
+ round_48_63 c,d,a,b,k,k1,w6,15
+ round_48_63 b,c,d,a,k1,k,w13,21
+ round_48_63 a,b,c,d,k,k1,w4,6
+ round_48_63 d,a,b,c,k1,k,w11,10
+ round_48_63 c,d,a,b,k,k1,w2,15
+ round_48_63 b,c,d,a,k1, ,w9,21
+
+
+
+
+ cmp lane0,end
+ add v_a.4s,v_a1.4s,v_a.4s
+ add v_b.4s,v_b1.4s,v_b.4s
+ add v_c.4s,v_c1.4s,v_c.4s
+ add v_d.4s,v_d1.4s,v_d.4s
+ bne .loop_start
+
+ st4 {v_a.s-v_d.s}[0],[job0]
+ st4 {v_a.s-v_d.s}[1],[job1]
+ st4 {v_a.s-v_d.s}[2],[job2]
+ st4 {v_a.s-v_d.s}[3],[job3]
+.exit:
+ ldp d8,d9,[sp,16]
+ ldp d10,d11,[sp,32]
+ ldp x29,x30,[sp],48
+ ret
+.key_consts:
+ .word 0xd76aa478
+ .word 0xd76aa478
+ .word 0xd76aa478
+ .word 0xd76aa478
+ .word 0xe8c7b756
+ .word 0xe8c7b756
+ .word 0xe8c7b756
+ .word 0xe8c7b756
+ .word 0x242070db
+ .word 0x242070db
+ .word 0x242070db
+ .word 0x242070db
+ .word 0xc1bdceee
+ .word 0xc1bdceee
+ .word 0xc1bdceee
+ .word 0xc1bdceee
+ .word 0xf57c0faf
+ .word 0xf57c0faf
+ .word 0xf57c0faf
+ .word 0xf57c0faf
+ .word 0x4787c62a
+ .word 0x4787c62a
+ .word 0x4787c62a
+ .word 0x4787c62a
+ .word 0xa8304613
+ .word 0xa8304613
+ .word 0xa8304613
+ .word 0xa8304613
+ .word 0xfd469501
+ .word 0xfd469501
+ .word 0xfd469501
+ .word 0xfd469501
+ .word 0x698098d8
+ .word 0x698098d8
+ .word 0x698098d8
+ .word 0x698098d8
+ .word 0x8b44f7af
+ .word 0x8b44f7af
+ .word 0x8b44f7af
+ .word 0x8b44f7af
+ .word 0xffff5bb1
+ .word 0xffff5bb1
+ .word 0xffff5bb1
+ .word 0xffff5bb1
+ .word 0x895cd7be
+ .word 0x895cd7be
+ .word 0x895cd7be
+ .word 0x895cd7be
+ .word 0x6b901122
+ .word 0x6b901122
+ .word 0x6b901122
+ .word 0x6b901122
+ .word 0xfd987193
+ .word 0xfd987193
+ .word 0xfd987193
+ .word 0xfd987193
+ .word 0xa679438e
+ .word 0xa679438e
+ .word 0xa679438e
+ .word 0xa679438e
+ .word 0x49b40821
+ .word 0x49b40821
+ .word 0x49b40821
+ .word 0x49b40821
+ .word 0xf61e2562
+ .word 0xf61e2562
+ .word 0xf61e2562
+ .word 0xf61e2562
+ .word 0xc040b340
+ .word 0xc040b340
+ .word 0xc040b340
+ .word 0xc040b340
+ .word 0x265e5a51
+ .word 0x265e5a51
+ .word 0x265e5a51
+ .word 0x265e5a51
+ .word 0xe9b6c7aa
+ .word 0xe9b6c7aa
+ .word 0xe9b6c7aa
+ .word 0xe9b6c7aa
+ .word 0xd62f105d
+ .word 0xd62f105d
+ .word 0xd62f105d
+ .word 0xd62f105d
+ .word 0x02441453
+ .word 0x02441453
+ .word 0x02441453
+ .word 0x02441453
+ .word 0xd8a1e681
+ .word 0xd8a1e681
+ .word 0xd8a1e681
+ .word 0xd8a1e681
+ .word 0xe7d3fbc8
+ .word 0xe7d3fbc8
+ .word 0xe7d3fbc8
+ .word 0xe7d3fbc8
+ .word 0x21e1cde6
+ .word 0x21e1cde6
+ .word 0x21e1cde6
+ .word 0x21e1cde6
+ .word 0xc33707d6
+ .word 0xc33707d6
+ .word 0xc33707d6
+ .word 0xc33707d6
+ .word 0xf4d50d87
+ .word 0xf4d50d87
+ .word 0xf4d50d87
+ .word 0xf4d50d87
+ .word 0x455a14ed
+ .word 0x455a14ed
+ .word 0x455a14ed
+ .word 0x455a14ed
+ .word 0xa9e3e905
+ .word 0xa9e3e905
+ .word 0xa9e3e905
+ .word 0xa9e3e905
+ .word 0xfcefa3f8
+ .word 0xfcefa3f8
+ .word 0xfcefa3f8
+ .word 0xfcefa3f8
+ .word 0x676f02d9
+ .word 0x676f02d9
+ .word 0x676f02d9
+ .word 0x676f02d9
+ .word 0x8d2a4c8a
+ .word 0x8d2a4c8a
+ .word 0x8d2a4c8a
+ .word 0x8d2a4c8a
+ .word 0xfffa3942
+ .word 0xfffa3942
+ .word 0xfffa3942
+ .word 0xfffa3942
+ .word 0x8771f681
+ .word 0x8771f681
+ .word 0x8771f681
+ .word 0x8771f681
+ .word 0x6d9d6122
+ .word 0x6d9d6122
+ .word 0x6d9d6122
+ .word 0x6d9d6122
+ .word 0xfde5380c
+ .word 0xfde5380c
+ .word 0xfde5380c
+ .word 0xfde5380c
+ .word 0xa4beea44
+ .word 0xa4beea44
+ .word 0xa4beea44
+ .word 0xa4beea44
+ .word 0x4bdecfa9
+ .word 0x4bdecfa9
+ .word 0x4bdecfa9
+ .word 0x4bdecfa9
+ .word 0xf6bb4b60
+ .word 0xf6bb4b60
+ .word 0xf6bb4b60
+ .word 0xf6bb4b60
+ .word 0xbebfbc70
+ .word 0xbebfbc70
+ .word 0xbebfbc70
+ .word 0xbebfbc70
+ .word 0x289b7ec6
+ .word 0x289b7ec6
+ .word 0x289b7ec6
+ .word 0x289b7ec6
+ .word 0xeaa127fa
+ .word 0xeaa127fa
+ .word 0xeaa127fa
+ .word 0xeaa127fa
+ .word 0xd4ef3085
+ .word 0xd4ef3085
+ .word 0xd4ef3085
+ .word 0xd4ef3085
+ .word 0x04881d05
+ .word 0x04881d05
+ .word 0x04881d05
+ .word 0x04881d05
+ .word 0xd9d4d039
+ .word 0xd9d4d039
+ .word 0xd9d4d039
+ .word 0xd9d4d039
+ .word 0xe6db99e5
+ .word 0xe6db99e5
+ .word 0xe6db99e5
+ .word 0xe6db99e5
+ .word 0x1fa27cf8
+ .word 0x1fa27cf8
+ .word 0x1fa27cf8
+ .word 0x1fa27cf8
+ .word 0xc4ac5665
+ .word 0xc4ac5665
+ .word 0xc4ac5665
+ .word 0xc4ac5665
+ .word 0xf4292244
+ .word 0xf4292244
+ .word 0xf4292244
+ .word 0xf4292244
+ .word 0x432aff97
+ .word 0x432aff97
+ .word 0x432aff97
+ .word 0x432aff97
+ .word 0xab9423a7
+ .word 0xab9423a7
+ .word 0xab9423a7
+ .word 0xab9423a7
+ .word 0xfc93a039
+ .word 0xfc93a039
+ .word 0xfc93a039
+ .word 0xfc93a039
+ .word 0x655b59c3
+ .word 0x655b59c3
+ .word 0x655b59c3
+ .word 0x655b59c3
+ .word 0x8f0ccc92
+ .word 0x8f0ccc92
+ .word 0x8f0ccc92
+ .word 0x8f0ccc92
+ .word 0xffeff47d
+ .word 0xffeff47d
+ .word 0xffeff47d
+ .word 0xffeff47d
+ .word 0x85845dd1
+ .word 0x85845dd1
+ .word 0x85845dd1
+ .word 0x85845dd1
+ .word 0x6fa87e4f
+ .word 0x6fa87e4f
+ .word 0x6fa87e4f
+ .word 0x6fa87e4f
+ .word 0xfe2ce6e0
+ .word 0xfe2ce6e0
+ .word 0xfe2ce6e0
+ .word 0xfe2ce6e0
+ .word 0xa3014314
+ .word 0xa3014314
+ .word 0xa3014314
+ .word 0xa3014314
+ .word 0x4e0811a1
+ .word 0x4e0811a1
+ .word 0x4e0811a1
+ .word 0x4e0811a1
+ .word 0xf7537e82
+ .word 0xf7537e82
+ .word 0xf7537e82
+ .word 0xf7537e82
+ .word 0xbd3af235
+ .word 0xbd3af235
+ .word 0xbd3af235
+ .word 0xbd3af235
+ .word 0x2ad7d2bb
+ .word 0x2ad7d2bb
+ .word 0x2ad7d2bb
+ .word 0x2ad7d2bb
+ .word 0xeb86d391
+ .word 0xeb86d391
+ .word 0xeb86d391
+ .word 0xeb86d391
+ .size md5_mb_asimd_x4, .-md5_mb_asimd_x4