diff options
Diffstat (limited to '')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S | 482 |
1 files changed, 482 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S new file mode 100644 index 000000000..7f4256e20 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S @@ -0,0 +1,482 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 2 + .p2align 3,,7 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg +.endm + + + +/* +Variable list +*/ + + declare_var_vector_reg lane0_msg_0, 0 + declare_var_vector_reg lane1_msg_0, 1 + declare_var_vector_reg lane2_msg_0, 2 + declare_var_vector_reg lane3_msg_0, 3 + declare_var_vector_reg lane0_msg_1, 4 + declare_var_vector_reg lane1_msg_1, 5 + declare_var_vector_reg lane2_msg_1, 6 + declare_var_vector_reg lane3_msg_1, 7 + declare_var_vector_reg lane0_msg_2, 8 + declare_var_vector_reg lane1_msg_2, 9 + declare_var_vector_reg lane2_msg_2,10 + declare_var_vector_reg lane3_msg_2,11 + declare_var_vector_reg lane0_msg_3,12 + declare_var_vector_reg lane1_msg_3,13 + declare_var_vector_reg lane2_msg_3,14 + declare_var_vector_reg lane3_msg_3,15 + + declare_var_vector_reg lane0_abcd ,16 + declare_var_vector_reg lane1_abcd ,17 + declare_var_vector_reg lane2_abcd ,18 + declare_var_vector_reg lane3_abcd ,19 + declare_var_vector_reg lane0_tmp0 ,20 + declare_var_vector_reg lane1_tmp0 ,21 + declare_var_vector_reg lane2_tmp0 ,22 + declare_var_vector_reg lane3_tmp0 ,23 + declare_var_vector_reg lane0_tmp1 ,24 + declare_var_vector_reg lane1_tmp1 ,25 + declare_var_vector_reg lane2_tmp1 ,26 + declare_var_vector_reg lane3_tmp1 ,27 + + + declare_var_vector_reg e0 ,28 + declare_var_vector_reg e1 ,29 + declare_var_vector_reg key ,30 + declare_var_vector_reg tmp ,31 + + key_adr .req x5 + msg_adr .req x6 + block_cnt .req x7 + offs .req x8 + mur_n1 .req x9 + mur_n1_w .req w9 + mur_n2 .req x10 + mur_n2_w .req w10 + mur_hash1 .req x11 + mur_hash2 .req x12 + mur_c1 .req x13 + mur_c2 .req x14 + mur_data1 .req x15 + + digest_adr .req x16 + tmp0_adr .req x17 + tmp1_adr .req x18 + mur_data2 .req x19 + mur_data .req x20 + +.macro murmur3_00 + ldp mur_data1, mur_data2, [mur_data], #16 + mul mur_data1, mur_data1, mur_c1 + mul mur_data2, mur_data2, mur_c2 +.endm + +.macro murmur3_01 + /* rotate left by 31 bits */ + ror mur_data1, mur_data1, #64-31 + /* rotate left by 33 bits */ + ror mur_data2, mur_data2, #64-33 + mul mur_data1, mur_data1, mur_c2 + mul mur_data2, mur_data2, mur_c1 +.endm + +.macro murmur3_02 + eor mur_hash1, mur_hash1, mur_data1 + /* rotate left by 27 bits */ + ror mur_hash1, mur_hash1, #64-27 + add mur_hash1, mur_hash1, mur_hash2 + // mur_hash1 = mur_hash1 * 5 + N1 + add mur_hash1, mur_hash1, mur_hash1, LSL #2 + add mur_hash1, mur_n1, mur_hash1 +.endm + +.macro murmur3_03 + eor mur_hash2, mur_hash2, mur_data2 + /* rotate left by 31 bits */ + ror mur_hash2, mur_hash2, #64-31 + add mur_hash2, mur_hash2, mur_hash1 + // mur_hash2 = mur_hash2 * 5 + N2 + add mur_hash2, mur_hash2, mur_hash2, LSL #2 + add mur_hash2, mur_n2, mur_hash2 +.endm + +/** + * maros for round 4-67 + * the code execute 16 times per block, allowing the inserted murmur3 operation to process 256 bytes +*/ +.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req + sha1h lane0_\tmp0\()_s, lane0_\abcd\()_s + sha1h lane1_\tmp0\()_s, lane1_\abcd\()_s + sha1h lane2_\tmp0\()_s, lane2_\abcd\()_s + sha1h lane3_\tmp0\()_s, lane3_\abcd\()_s + mov \e0\()_v.S[0],lane0_\tmp0\()_v.S[0] + mov \e0\()_v.S[1],lane1_\tmp0\()_v.S[0] + mov \e0\()_v.S[2],lane2_\tmp0\()_v.S[0] + mov \e0\()_v.S[3],lane3_\tmp0\()_v.S[0] + mov lane0_\tmp0\()_v.S[0],\e1\()_v.S[0] + mov lane1_\tmp0\()_v.S[0],\e1\()_v.S[1] + mov lane2_\tmp0\()_v.S[0],\e1\()_v.S[2] + mov lane3_\tmp0\()_v.S[0],\e1\()_v.S[3] + \inst lane0_\abcd\()_q,lane0_\tmp0\()_s,lane0_\tmp1\()_v.4s + murmur3_00 + \inst lane1_\abcd\()_q,lane1_\tmp0\()_s,lane1_\tmp1\()_v.4s + murmur3_01 + \inst lane2_\abcd\()_q,lane2_\tmp0\()_s,lane2_\tmp1\()_v.4s + murmur3_02 + \inst lane3_\abcd\()_q,lane3_\tmp0\()_s,lane3_\tmp1\()_v.4s + murmur3_03 + ld1 {lane0_\tmp0\()_v.4s-lane3_\tmp0\()_v.4s},[\tmp0\()_adr] + add lane0_\tmp1\()_v.4s,lane0_\msg3\()_v.4s,key_v.4s + add lane1_\tmp1\()_v.4s,lane1_\msg3\()_v.4s,key_v.4s + add lane2_\tmp1\()_v.4s,lane2_\msg3\()_v.4s,key_v.4s + add lane3_\tmp1\()_v.4s,lane3_\msg3\()_v.4s,key_v.4s + st1 {lane0_\tmp1\()_v.4s-lane3_\tmp1\()_v.4s},[\tmp1\()_adr] + sha1su1 lane0_\msg0\()_v.4s,lane0_\msg3\()_v.4s + sha1su1 lane1_\msg0\()_v.4s,lane1_\msg3\()_v.4s + sha1su1 lane2_\msg0\()_v.4s,lane2_\msg3\()_v.4s + sha1su1 lane3_\msg0\()_v.4s,lane3_\msg3\()_v.4s + sha1su0 lane0_\msg1\()_v.4s,lane0_\msg2\()_v.4s,lane0_\msg3\()_v.4s + sha1su0 lane1_\msg1\()_v.4s,lane1_\msg2\()_v.4s,lane1_\msg3\()_v.4s + sha1su0 lane2_\msg1\()_v.4s,lane2_\msg2\()_v.4s,lane2_\msg3\()_v.4s + sha1su0 lane3_\msg1\()_v.4s,lane3_\msg2\()_v.4s,lane3_\msg3\()_v.4s +.endm + + +/* + * void mh_sha1_murmur3_block_ce (const uint8_t * input_data, + * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + * uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + * uint32_t num_blocks); + * arg 0 pointer to input data + * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) + * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. + * arg 3 pointer to murmur3 digest + * arg 4 number of 1KB blocks + */ + +/* +Arguements list +*/ + input_data .req x0 + digests .req x1 + frame_buffer .req x2 + mur_digest .req x3 + num_blocks .req w4 + + .global mh_sha1_murmur3_block_ce + .type mh_sha1_murmur3_block_ce, %function +mh_sha1_murmur3_block_ce: + // save temp vector registers + stp d8, d9, [sp, -80]! + + stp d10, d11, [sp, 16] + stp d12, d13, [sp, 32] + stp d14, d15, [sp, 48] + stp x19, x20, [sp, 64] + + mov mur_data, input_data + ldr mur_hash1, [mur_digest] + ldr mur_hash2, [mur_digest, 8] + adr mur_c1, C1 + ldr mur_c1, [mur_c1] + adr mur_c2, C2 + ldr mur_c2, [mur_c2] + adr tmp0_adr, N1 + ldr mur_n1_w, [tmp0_adr] + adr tmp0_adr, N2 + ldr mur_n2_w, [tmp0_adr] + + mov tmp0_adr,frame_buffer + add tmp1_adr,tmp0_adr,128 + + +start_loop: + mov block_cnt,0 + mov msg_adr,input_data +lane_loop: + mov offs,64 + adr key_adr,KEY_0 + // load msg 0 + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[3],[msg_adr],offs + + add digest_adr,digests,block_cnt + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs + ldr e0_q,[digest_adr] + + // load key_0 + ldr key_q,[key_adr] + + rev32 lane0_msg_0_v.16b,lane0_msg_0_v.16b + rev32 lane1_msg_0_v.16b,lane1_msg_0_v.16b + rev32 lane2_msg_0_v.16b,lane2_msg_0_v.16b + rev32 lane3_msg_0_v.16b,lane3_msg_0_v.16b + rev32 lane0_msg_1_v.16b,lane0_msg_1_v.16b + rev32 lane1_msg_1_v.16b,lane1_msg_1_v.16b + rev32 lane2_msg_1_v.16b,lane2_msg_1_v.16b + rev32 lane3_msg_1_v.16b,lane3_msg_1_v.16b + rev32 lane0_msg_2_v.16b,lane0_msg_2_v.16b + rev32 lane1_msg_2_v.16b,lane1_msg_2_v.16b + rev32 lane2_msg_2_v.16b,lane2_msg_2_v.16b + rev32 lane3_msg_2_v.16b,lane3_msg_2_v.16b + rev32 lane0_msg_3_v.16b,lane0_msg_3_v.16b + rev32 lane1_msg_3_v.16b,lane1_msg_3_v.16b + rev32 lane2_msg_3_v.16b,lane2_msg_3_v.16b + rev32 lane3_msg_3_v.16b,lane3_msg_3_v.16b + + add lane0_tmp1_v.4s,lane0_msg_1_v.4s,key_v.4s + add lane1_tmp1_v.4s,lane1_msg_1_v.4s,key_v.4s + add lane2_tmp1_v.4s,lane2_msg_1_v.4s,key_v.4s + add lane3_tmp1_v.4s,lane3_msg_1_v.4s,key_v.4s + st1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr] + + add lane0_tmp0_v.4s,lane0_msg_0_v.4s,key_v.4s + add lane1_tmp0_v.4s,lane1_msg_0_v.4s,key_v.4s + add lane2_tmp0_v.4s,lane2_msg_0_v.4s,key_v.4s + add lane3_tmp0_v.4s,lane3_msg_0_v.4s,key_v.4s + + /* rounds 0-3 */ + sha1h lane0_tmp1_s,lane0_abcd_s + sha1h lane1_tmp1_s,lane1_abcd_s + sha1h lane2_tmp1_s,lane2_abcd_s + sha1h lane3_tmp1_s,lane3_abcd_s + mov e1_v.S[0],lane0_tmp1_v.S[0] + mov e1_v.S[1],lane1_tmp1_v.S[0] + mov e1_v.S[2],lane2_tmp1_v.S[0] + mov e1_v.S[3],lane3_tmp1_v.S[0] + mov lane0_tmp1_v.S[0],e0_v.S[0] + mov lane1_tmp1_v.S[0],e0_v.S[1] + mov lane2_tmp1_v.S[0],e0_v.S[2] + mov lane3_tmp1_v.S[0],e0_v.S[3] + sha1c lane0_abcd_q,lane0_tmp1_s,lane0_tmp0_v.4s + sha1c lane1_abcd_q,lane1_tmp1_s,lane1_tmp0_v.4s + sha1c lane2_abcd_q,lane2_tmp1_s,lane2_tmp0_v.4s + sha1c lane3_abcd_q,lane3_tmp1_s,lane3_tmp0_v.4s + ld1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr] + add lane0_tmp0_v.4s,lane0_msg_2_v.4s,key_v.4s + sha1su0 lane0_msg_0_v.4s,lane0_msg_1_v.4s,lane0_msg_2_v.4s + add lane1_tmp0_v.4s,lane1_msg_2_v.4s,key_v.4s + sha1su0 lane1_msg_0_v.4s,lane1_msg_1_v.4s,lane1_msg_2_v.4s + add lane2_tmp0_v.4s,lane2_msg_2_v.4s,key_v.4s + sha1su0 lane2_msg_0_v.4s,lane2_msg_1_v.4s,lane2_msg_2_v.4s + add lane3_tmp0_v.4s,lane3_msg_2_v.4s,key_v.4s + sha1su0 lane3_msg_0_v.4s,lane3_msg_1_v.4s,lane3_msg_2_v.4s + st1 {lane0_tmp0_v.4s-lane3_tmp0_v.4s},[tmp0_adr] + + sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 4-7 */ + sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + + + adr key_adr,KEY_1 + ldr key_q,[key_adr] + sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */ + sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 20-23 */ + sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + + adr key_adr,KEY_2 + ldr key_q,[key_adr] + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + + adr key_adr,KEY_3 + ldr key_q,[key_adr] + sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + + // msg2 and msg1 are free + mov lane0_msg_2_v.S[0],e1_v.S[0] + mov lane1_msg_2_v.S[0],e1_v.S[1] + mov lane2_msg_2_v.S[0],e1_v.S[2] + mov lane3_msg_2_v.S[0],e1_v.S[3] + + /* rounds 68-71 */ + sha1h lane0_msg_1_s,lane0_abcd_s + sha1h lane1_msg_1_s,lane1_abcd_s + sha1h lane2_msg_1_s,lane2_abcd_s + sha1h lane3_msg_1_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s + sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s + sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s + sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s + add lane0_tmp1_v.4s,lane0_msg_3_v.4s,key_v.4s + add lane1_tmp1_v.4s,lane1_msg_3_v.4s,key_v.4s + add lane2_tmp1_v.4s,lane2_msg_3_v.4s,key_v.4s + add lane3_tmp1_v.4s,lane3_msg_3_v.4s,key_v.4s + sha1su1 lane0_msg_0_v.4s,lane0_msg_3_v.4s + sha1su1 lane1_msg_0_v.4s,lane1_msg_3_v.4s + sha1su1 lane2_msg_0_v.4s,lane2_msg_3_v.4s + sha1su1 lane3_msg_0_v.4s,lane3_msg_3_v.4s + + /* rounds 72-75 */ + sha1h lane0_msg_2_s,lane0_abcd_s + sha1h lane1_msg_2_s,lane1_abcd_s + sha1h lane2_msg_2_s,lane2_abcd_s + sha1h lane3_msg_2_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_1_s,lane0_tmp0_v.4s + sha1p lane1_abcd_q,lane1_msg_1_s,lane1_tmp0_v.4s + sha1p lane2_abcd_q,lane2_msg_1_s,lane2_tmp0_v.4s + sha1p lane3_abcd_q,lane3_msg_1_s,lane3_tmp0_v.4s + + /* rounds 76-79 */ + sha1h lane0_msg_1_s,lane0_abcd_s + sha1h lane1_msg_1_s,lane1_abcd_s + sha1h lane2_msg_1_s,lane2_abcd_s + sha1h lane3_msg_1_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s + sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s + sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s + sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s + add digest_adr,digests,block_cnt + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[digest_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[digest_adr] + + add lane0_abcd_v.4S,lane0_abcd_v.4S,lane0_msg_0_v.4S + add lane1_abcd_v.4S,lane1_abcd_v.4S,lane1_msg_0_v.4S + add lane2_abcd_v.4S,lane2_abcd_v.4S,lane2_msg_0_v.4S + add lane3_abcd_v.4S,lane3_abcd_v.4S,lane3_msg_0_v.4S + + add lane0_msg_1_v.4S,lane0_msg_1_v.4S,lane0_msg_3_v.4S + add lane1_msg_1_v.4S,lane1_msg_1_v.4S,lane1_msg_3_v.4S + add lane2_msg_1_v.4S,lane2_msg_1_v.4S,lane2_msg_3_v.4S + add lane3_msg_1_v.4S,lane3_msg_1_v.4S,lane3_msg_3_v.4S + + add digest_adr,digests,block_cnt + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs + st4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[digest_adr] + + add block_cnt,block_cnt,16 + cmp block_cnt,64 + add msg_adr,input_data,block_cnt + add digest_adr,digests,block_cnt + bcc lane_loop + + subs num_blocks,num_blocks,1 + add input_data,input_data,1024 + bhi start_loop + + /* save murmur-hash digest */ + str mur_hash1, [mur_digest], #8 + str mur_hash2, [mur_digest] + +exit_func: + // restore temp register + ldp d10, d11, [sp, 16] + ldp d12, d13, [sp, 32] + ldp d14, d15, [sp, 48] + ldp x19, x20, [sp, 64] + ldp d8, d9, [sp], 80 + ret + + .size mh_sha1_murmur3_block_ce, .-mh_sha1_murmur3_block_ce + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + +N1: + .word 0x52dce729 + .word 0x52dce729 + .word 0x52dce729 + .word 0x52dce729 +N2: + .word 0x38495ab5 + .word 0x38495ab5 + .word 0x38495ab5 + .word 0x38495ab5 + +C1: + .dword 0x87c37b91114253d5 + .dword 0x87c37b91114253d5 +C2: + .dword 0x4cf5ad432745937f + .dword 0x4cf5ad432745937f |