diff options
Diffstat (limited to 'src/crypto/sha1/sha1block_arm64.s')
-rw-r--r-- | src/crypto/sha1/sha1block_arm64.s | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/src/crypto/sha1/sha1block_arm64.s b/src/crypto/sha1/sha1block_arm64.s new file mode 100644 index 0000000..d568384 --- /dev/null +++ b/src/crypto/sha1/sha1block_arm64.s @@ -0,0 +1,152 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define HASHUPDATECHOOSE \ + SHA1C V16.S4, V1, V2 \ + SHA1H V3, V1 \ + VMOV V2.B16, V3.B16 + +#define HASHUPDATEPARITY \ + SHA1P V16.S4, V1, V2 \ + SHA1H V3, V1 \ + VMOV V2.B16, V3.B16 + +#define HASHUPDATEMAJ \ + SHA1M V16.S4, V1, V2 \ + SHA1H V3, V1 \ + VMOV V2.B16, V3.B16 + +// func sha1block(h []uint32, p []byte, k []uint32) +TEXT ·sha1block(SB),NOSPLIT,$0 + MOVD h_base+0(FP), R0 // hash value first address + MOVD p_base+24(FP), R1 // message first address + MOVD k_base+48(FP), R2 // k constants first address + MOVD p_len+32(FP), R3 // message length + VLD1.P 16(R0), [V0.S4] + FMOVS (R0), F20 + SUB $16, R0, R0 + +blockloop: + + VLD1.P 16(R1), [V4.B16] // load message + VLD1.P 16(R1), [V5.B16] + VLD1.P 16(R1), [V6.B16] + VLD1.P 16(R1), [V7.B16] + VLD1 (R2), [V19.S4] // load constant k0-k79 + VMOV V0.B16, V2.B16 + VMOV V20.S[0], V1 + VMOV V2.B16, V3.B16 + VDUP V19.S[0], V17.S4 + VREV32 V4.B16, V4.B16 // prepare for using message in Byte format + VREV32 V5.B16, V5.B16 + VREV32 V6.B16, V6.B16 + VREV32 V7.B16, V7.B16 + + + VDUP V19.S[1], V18.S4 + VADD V17.S4, V4.S4, V16.S4 + SHA1SU0 V6.S4, V5.S4, V4.S4 + HASHUPDATECHOOSE + SHA1SU1 V7.S4, V4.S4 + + VADD V17.S4, V5.S4, V16.S4 + SHA1SU0 V7.S4, V6.S4, V5.S4 + HASHUPDATECHOOSE + SHA1SU1 V4.S4, V5.S4 + VADD V17.S4, V6.S4, V16.S4 + SHA1SU0 V4.S4, V7.S4, V6.S4 + HASHUPDATECHOOSE + SHA1SU1 V5.S4, V6.S4 + + VADD V17.S4, V7.S4, V16.S4 + SHA1SU0 V5.S4, V4.S4, V7.S4 + HASHUPDATECHOOSE + SHA1SU1 V6.S4, V7.S4 + + VADD V17.S4, V4.S4, V16.S4 + SHA1SU0 V6.S4, V5.S4, V4.S4 + HASHUPDATECHOOSE + SHA1SU1 V7.S4, V4.S4 + + VDUP V19.S[2], V17.S4 + VADD V18.S4, V5.S4, V16.S4 + SHA1SU0 V7.S4, V6.S4, V5.S4 + HASHUPDATEPARITY + SHA1SU1 V4.S4, V5.S4 + + VADD V18.S4, V6.S4, V16.S4 + SHA1SU0 V4.S4, V7.S4, V6.S4 + HASHUPDATEPARITY + SHA1SU1 V5.S4, V6.S4 + + VADD V18.S4, V7.S4, V16.S4 + SHA1SU0 V5.S4, V4.S4, V7.S4 + HASHUPDATEPARITY + SHA1SU1 V6.S4, V7.S4 + + VADD V18.S4, V4.S4, V16.S4 + SHA1SU0 V6.S4, V5.S4, V4.S4 + HASHUPDATEPARITY + SHA1SU1 V7.S4, V4.S4 + + VADD V18.S4, V5.S4, V16.S4 + SHA1SU0 V7.S4, V6.S4, V5.S4 + HASHUPDATEPARITY + SHA1SU1 V4.S4, V5.S4 + + VDUP V19.S[3], V18.S4 + VADD V17.S4, V6.S4, V16.S4 + SHA1SU0 V4.S4, V7.S4, V6.S4 + HASHUPDATEMAJ + SHA1SU1 V5.S4, V6.S4 + + VADD V17.S4, V7.S4, V16.S4 + SHA1SU0 V5.S4, V4.S4, V7.S4 + HASHUPDATEMAJ + SHA1SU1 V6.S4, V7.S4 + + VADD V17.S4, V4.S4, V16.S4 + SHA1SU0 V6.S4, V5.S4, V4.S4 + HASHUPDATEMAJ + SHA1SU1 V7.S4, V4.S4 + + VADD V17.S4, V5.S4, V16.S4 + SHA1SU0 V7.S4, V6.S4, V5.S4 + HASHUPDATEMAJ + SHA1SU1 V4.S4, V5.S4 + + VADD V17.S4, V6.S4, V16.S4 + SHA1SU0 V4.S4, V7.S4, V6.S4 + HASHUPDATEMAJ + SHA1SU1 V5.S4, V6.S4 + + VADD V18.S4, V7.S4, V16.S4 + SHA1SU0 V5.S4, V4.S4, V7.S4 + HASHUPDATEPARITY + SHA1SU1 V6.S4, V7.S4 + + VADD V18.S4, V4.S4, V16.S4 + HASHUPDATEPARITY + + VADD V18.S4, V5.S4, V16.S4 + HASHUPDATEPARITY + + VADD V18.S4, V6.S4, V16.S4 + HASHUPDATEPARITY + + VADD V18.S4, V7.S4, V16.S4 + HASHUPDATEPARITY + + SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes + VADD V2.S4, V0.S4, V0.S4 + VADD V1.S4, V20.S4, V20.S4 + CBNZ R3, blockloop + +sha1ret: + + VST1.P [V0.S4], 16(R0) // store hash value H(dcba) + FMOVS F20, (R0) // store hash value H(e) + RET |