summaryrefslogtreecommitdiffstats
path: root/src/crypto/sha1/sha1block_arm.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/sha1/sha1block_arm.s')
-rw-r--r--src/crypto/sha1/sha1block_arm.s217
1 files changed, 217 insertions, 0 deletions
diff --git a/src/crypto/sha1/sha1block_arm.s b/src/crypto/sha1/sha1block_arm.s
new file mode 100644
index 0000000..2236533
--- /dev/null
+++ b/src/crypto/sha1/sha1block_arm.s
@@ -0,0 +1,217 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// ARM version of md5block.go
+
+#include "textflag.h"
+
+// SHA-1 block routine. See sha1block.go for Go equivalent.
+//
+// There are 80 rounds of 4 types:
+// - rounds 0-15 are type 1 and load data (ROUND1 macro).
+// - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
+// - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
+// - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
+// - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
+//
+// Each round loads or shuffles the data, then computes a per-round
+// function of b, c, d, and then mixes the result into and rotates the
+// five registers a, b, c, d, e holding the intermediate results.
+//
+// The register rotation is implemented by rotating the arguments to
+// the round macros instead of by explicit move instructions.
+
+// Register definitions
+#define Rdata R0 // Pointer to incoming data
+#define Rconst R1 // Current constant for SHA round
+#define Ra R2 // SHA-1 accumulator
+#define Rb R3 // SHA-1 accumulator
+#define Rc R4 // SHA-1 accumulator
+#define Rd R5 // SHA-1 accumulator
+#define Re R6 // SHA-1 accumulator
+#define Rt0 R7 // Temporary
+#define Rt1 R8 // Temporary
+// r9, r10 are forbidden
+// r11 is OK provided you check the assembler that no synthetic instructions use it
+#define Rt2 R11 // Temporary
+#define Rctr R12 // loop counter
+#define Rw R14 // point to w buffer
+
+// func block(dig *digest, p []byte)
+// 0(FP) is *digest
+// 4(FP) is p.array (struct Slice)
+// 8(FP) is p.len
+//12(FP) is p.cap
+//
+// Stack frame
+#define p_end end-4(SP) // pointer to the end of data
+#define p_data data-8(SP) // current data pointer (unused?)
+#define w_buf buf-(8+4*80)(SP) //80 words temporary buffer w uint32[80]
+#define saved abcde-(8+4*80+4*5)(SP) // saved sha1 registers a,b,c,d,e - these must be last (unused?)
+// Total size +4 for saved LR is 352
+
+ // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
+ // e += w[i]
+#define LOAD(Re) \
+ MOVBU 2(Rdata), Rt0 ; \
+ MOVBU 3(Rdata), Rt1 ; \
+ MOVBU 1(Rdata), Rt2 ; \
+ ORR Rt0<<8, Rt1, Rt0 ; \
+ MOVBU.P 4(Rdata), Rt1 ; \
+ ORR Rt2<<16, Rt0, Rt0 ; \
+ ORR Rt1<<24, Rt0, Rt0 ; \
+ MOVW.P Rt0, 4(Rw) ; \
+ ADD Rt0, Re, Re
+
+ // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
+ // w[i&0xf] = tmp<<1 | tmp>>(32-1)
+ // e += w[i&0xf]
+#define SHUFFLE(Re) \
+ MOVW (-16*4)(Rw), Rt0 ; \
+ MOVW (-14*4)(Rw), Rt1 ; \
+ MOVW (-8*4)(Rw), Rt2 ; \
+ EOR Rt0, Rt1, Rt0 ; \
+ MOVW (-3*4)(Rw), Rt1 ; \
+ EOR Rt2, Rt0, Rt0 ; \
+ EOR Rt0, Rt1, Rt0 ; \
+ MOVW Rt0@>(32-1), Rt0 ; \
+ MOVW.P Rt0, 4(Rw) ; \
+ ADD Rt0, Re, Re
+
+ // t1 = (b & c) | ((~b) & d)
+#define FUNC1(Ra, Rb, Rc, Rd, Re) \
+ MVN Rb, Rt1 ; \
+ AND Rb, Rc, Rt0 ; \
+ AND Rd, Rt1, Rt1 ; \
+ ORR Rt0, Rt1, Rt1
+
+ // t1 = b ^ c ^ d
+#define FUNC2(Ra, Rb, Rc, Rd, Re) \
+ EOR Rb, Rc, Rt1 ; \
+ EOR Rd, Rt1, Rt1
+
+ // t1 = (b & c) | (b & d) | (c & d) =
+ // t1 = (b & c) | ((b | c) & d)
+#define FUNC3(Ra, Rb, Rc, Rd, Re) \
+ ORR Rb, Rc, Rt0 ; \
+ AND Rb, Rc, Rt1 ; \
+ AND Rd, Rt0, Rt0 ; \
+ ORR Rt0, Rt1, Rt1
+
+#define FUNC4 FUNC2
+
+ // a5 := a<<5 | a>>(32-5)
+ // b = b<<30 | b>>(32-30)
+ // e = a5 + t1 + e + const
+#define MIX(Ra, Rb, Rc, Rd, Re) \
+ ADD Rt1, Re, Re ; \
+ MOVW Rb@>(32-30), Rb ; \
+ ADD Ra@>(32-5), Re, Re ; \
+ ADD Rconst, Re, Re
+
+#define ROUND1(Ra, Rb, Rc, Rd, Re) \
+ LOAD(Re) ; \
+ FUNC1(Ra, Rb, Rc, Rd, Re) ; \
+ MIX(Ra, Rb, Rc, Rd, Re)
+
+#define ROUND1x(Ra, Rb, Rc, Rd, Re) \
+ SHUFFLE(Re) ; \
+ FUNC1(Ra, Rb, Rc, Rd, Re) ; \
+ MIX(Ra, Rb, Rc, Rd, Re)
+
+#define ROUND2(Ra, Rb, Rc, Rd, Re) \
+ SHUFFLE(Re) ; \
+ FUNC2(Ra, Rb, Rc, Rd, Re) ; \
+ MIX(Ra, Rb, Rc, Rd, Re)
+
+#define ROUND3(Ra, Rb, Rc, Rd, Re) \
+ SHUFFLE(Re) ; \
+ FUNC3(Ra, Rb, Rc, Rd, Re) ; \
+ MIX(Ra, Rb, Rc, Rd, Re)
+
+#define ROUND4(Ra, Rb, Rc, Rd, Re) \
+ SHUFFLE(Re) ; \
+ FUNC4(Ra, Rb, Rc, Rd, Re) ; \
+ MIX(Ra, Rb, Rc, Rd, Re)
+
+
+// func block(dig *digest, p []byte)
+TEXT ·block(SB), 0, $352-16
+ MOVW p+4(FP), Rdata // pointer to the data
+ MOVW p_len+8(FP), Rt0 // number of bytes
+ ADD Rdata, Rt0
+ MOVW Rt0, p_end // pointer to end of data
+
+ // Load up initial SHA-1 accumulator
+ MOVW dig+0(FP), Rt0
+ MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re]
+
+loop:
+ // Save registers at SP+4 onwards
+ MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13)
+
+ MOVW $w_buf, Rw
+ MOVW $0x5A827999, Rconst
+ MOVW $3, Rctr
+loop1: ROUND1(Ra, Rb, Rc, Rd, Re)
+ ROUND1(Re, Ra, Rb, Rc, Rd)
+ ROUND1(Rd, Re, Ra, Rb, Rc)
+ ROUND1(Rc, Rd, Re, Ra, Rb)
+ ROUND1(Rb, Rc, Rd, Re, Ra)
+ SUB.S $1, Rctr
+ BNE loop1
+
+ ROUND1(Ra, Rb, Rc, Rd, Re)
+ ROUND1x(Re, Ra, Rb, Rc, Rd)
+ ROUND1x(Rd, Re, Ra, Rb, Rc)
+ ROUND1x(Rc, Rd, Re, Ra, Rb)
+ ROUND1x(Rb, Rc, Rd, Re, Ra)
+
+ MOVW $0x6ED9EBA1, Rconst
+ MOVW $4, Rctr
+loop2: ROUND2(Ra, Rb, Rc, Rd, Re)
+ ROUND2(Re, Ra, Rb, Rc, Rd)
+ ROUND2(Rd, Re, Ra, Rb, Rc)
+ ROUND2(Rc, Rd, Re, Ra, Rb)
+ ROUND2(Rb, Rc, Rd, Re, Ra)
+ SUB.S $1, Rctr
+ BNE loop2
+
+ MOVW $0x8F1BBCDC, Rconst
+ MOVW $4, Rctr
+loop3: ROUND3(Ra, Rb, Rc, Rd, Re)
+ ROUND3(Re, Ra, Rb, Rc, Rd)
+ ROUND3(Rd, Re, Ra, Rb, Rc)
+ ROUND3(Rc, Rd, Re, Ra, Rb)
+ ROUND3(Rb, Rc, Rd, Re, Ra)
+ SUB.S $1, Rctr
+ BNE loop3
+
+ MOVW $0xCA62C1D6, Rconst
+ MOVW $4, Rctr
+loop4: ROUND4(Ra, Rb, Rc, Rd, Re)
+ ROUND4(Re, Ra, Rb, Rc, Rd)
+ ROUND4(Rd, Re, Ra, Rb, Rc)
+ ROUND4(Rc, Rd, Re, Ra, Rb)
+ ROUND4(Rb, Rc, Rd, Re, Ra)
+ SUB.S $1, Rctr
+ BNE loop4
+
+ // Accumulate - restoring registers from SP+4
+ MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw]
+ ADD Rt0, Ra
+ ADD Rt1, Rb
+ ADD Rt2, Rc
+ ADD Rctr, Rd
+ ADD Rw, Re
+
+ MOVW p_end, Rt0
+ CMP Rt0, Rdata
+ BLO loop
+
+ // Save final SHA-1 accumulator
+ MOVW dig+0(FP), Rt0
+ MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0)
+
+ RET