1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
|
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// ARM version of md5block.go
#include "textflag.h"
// Register definitions
#define Rtable R0 // Pointer to MD5 constants table
#define Rdata R1 // Pointer to data to hash
#define Ra R2 // MD5 accumulator
#define Rb R3 // MD5 accumulator
#define Rc R4 // MD5 accumulator
#define Rd R5 // MD5 accumulator
#define Rc0 R6 // MD5 constant
#define Rc1 R7 // MD5 constant
#define Rc2 R8 // MD5 constant
// r9, r10 are forbidden
// r11 is OK provided you check the assembler that no synthetic instructions use it
#define Rc3 R11 // MD5 constant
#define Rt0 R12 // temporary
#define Rt1 R14 // temporary
// func block(dig *digest, p []byte)
// 0(FP) is *digest
// 4(FP) is p.array (struct Slice)
// 8(FP) is p.len
//12(FP) is p.cap
//
// Stack frame
#define p_end end-4(SP) // pointer to the end of data
#define p_data data-8(SP) // current data pointer
#define buf buffer-(8+4*16)(SP) //16 words temporary buffer
// 3 words at 4..12(R13) for called routine parameters
TEXT ·block(SB), NOSPLIT, $84-16
MOVW p+4(FP), Rdata // pointer to the data
MOVW p_len+8(FP), Rt0 // number of bytes
ADD Rdata, Rt0
MOVW Rt0, p_end // pointer to end of data
loop:
MOVW Rdata, p_data // Save Rdata
AND.S $3, Rdata, Rt0 // TST $3, Rdata not working see issue 5921
BEQ aligned // aligned detected - skip copy
// Copy the unaligned source data into the aligned temporary buffer
// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
MOVW $buf, Rtable // to
MOVW $64, Rc0 // n
MOVM.IB [Rtable,Rdata,Rc0], (R13)
BL runtime·memmove(SB)
// Point to the local aligned copy of the data
MOVW $buf, Rdata
aligned:
// Point to the table of constants
// A PC relative add would be cheaper than this
MOVW $·table(SB), Rtable
// Load up initial MD5 accumulator
MOVW dig+0(FP), Rc0
MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
// a += (((c^d)&b)^d) + X[index] + const
// a = a<<shift | a>>(32-shift) + b
#define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
EOR Rc, Rd, Rt0 ; \
AND Rb, Rt0 ; \
EOR Rd, Rt0 ; \
MOVW (index<<2)(Rdata), Rt1 ; \
ADD Rt1, Rt0 ; \
ADD Rconst, Rt0 ; \
ADD Rt0, Ra ; \
ADD Ra@>(32-shift), Rb, Ra ;
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND1(Ra, Rb, Rc, Rd, 0, 7, Rc0)
ROUND1(Rd, Ra, Rb, Rc, 1, 12, Rc1)
ROUND1(Rc, Rd, Ra, Rb, 2, 17, Rc2)
ROUND1(Rb, Rc, Rd, Ra, 3, 22, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND1(Ra, Rb, Rc, Rd, 4, 7, Rc0)
ROUND1(Rd, Ra, Rb, Rc, 5, 12, Rc1)
ROUND1(Rc, Rd, Ra, Rb, 6, 17, Rc2)
ROUND1(Rb, Rc, Rd, Ra, 7, 22, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND1(Ra, Rb, Rc, Rd, 8, 7, Rc0)
ROUND1(Rd, Ra, Rb, Rc, 9, 12, Rc1)
ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND1(Ra, Rb, Rc, Rd, 12, 7, Rc0)
ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
// a += (((b^c)&d)^c) + X[index] + const
// a = a<<shift | a>>(32-shift) + b
#define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
EOR Rb, Rc, Rt0 ; \
AND Rd, Rt0 ; \
EOR Rc, Rt0 ; \
MOVW (index<<2)(Rdata), Rt1 ; \
ADD Rt1, Rt0 ; \
ADD Rconst, Rt0 ; \
ADD Rt0, Ra ; \
ADD Ra@>(32-shift), Rb, Ra ;
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND2(Ra, Rb, Rc, Rd, 1, 5, Rc0)
ROUND2(Rd, Ra, Rb, Rc, 6, 9, Rc1)
ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
ROUND2(Rb, Rc, Rd, Ra, 0, 20, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND2(Ra, Rb, Rc, Rd, 5, 5, Rc0)
ROUND2(Rd, Ra, Rb, Rc, 10, 9, Rc1)
ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
ROUND2(Rb, Rc, Rd, Ra, 4, 20, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND2(Ra, Rb, Rc, Rd, 9, 5, Rc0)
ROUND2(Rd, Ra, Rb, Rc, 14, 9, Rc1)
ROUND2(Rc, Rd, Ra, Rb, 3, 14, Rc2)
ROUND2(Rb, Rc, Rd, Ra, 8, 20, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND2(Ra, Rb, Rc, Rd, 13, 5, Rc0)
ROUND2(Rd, Ra, Rb, Rc, 2, 9, Rc1)
ROUND2(Rc, Rd, Ra, Rb, 7, 14, Rc2)
ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
// a += (b^c^d) + X[index] + const
// a = a<<shift | a>>(32-shift) + b
#define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
EOR Rb, Rc, Rt0 ; \
EOR Rd, Rt0 ; \
MOVW (index<<2)(Rdata), Rt1 ; \
ADD Rt1, Rt0 ; \
ADD Rconst, Rt0 ; \
ADD Rt0, Ra ; \
ADD Ra@>(32-shift), Rb, Ra ;
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND3(Ra, Rb, Rc, Rd, 5, 4, Rc0)
ROUND3(Rd, Ra, Rb, Rc, 8, 11, Rc1)
ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND3(Ra, Rb, Rc, Rd, 1, 4, Rc0)
ROUND3(Rd, Ra, Rb, Rc, 4, 11, Rc1)
ROUND3(Rc, Rd, Ra, Rb, 7, 16, Rc2)
ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND3(Ra, Rb, Rc, Rd, 13, 4, Rc0)
ROUND3(Rd, Ra, Rb, Rc, 0, 11, Rc1)
ROUND3(Rc, Rd, Ra, Rb, 3, 16, Rc2)
ROUND3(Rb, Rc, Rd, Ra, 6, 23, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND3(Ra, Rb, Rc, Rd, 9, 4, Rc0)
ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
ROUND3(Rb, Rc, Rd, Ra, 2, 23, Rc3)
// a += (c^(b|^d)) + X[index] + const
// a = a<<shift | a>>(32-shift) + b
#define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
MVN Rd, Rt0 ; \
ORR Rb, Rt0 ; \
EOR Rc, Rt0 ; \
MOVW (index<<2)(Rdata), Rt1 ; \
ADD Rt1, Rt0 ; \
ADD Rconst, Rt0 ; \
ADD Rt0, Ra ; \
ADD Ra@>(32-shift), Rb, Ra ;
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND4(Ra, Rb, Rc, Rd, 0, 6, Rc0)
ROUND4(Rd, Ra, Rb, Rc, 7, 10, Rc1)
ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
ROUND4(Rb, Rc, Rd, Ra, 5, 21, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND4(Ra, Rb, Rc, Rd, 12, 6, Rc0)
ROUND4(Rd, Ra, Rb, Rc, 3, 10, Rc1)
ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
ROUND4(Rb, Rc, Rd, Ra, 1, 21, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND4(Ra, Rb, Rc, Rd, 8, 6, Rc0)
ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
ROUND4(Rc, Rd, Ra, Rb, 6, 15, Rc2)
ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
ROUND4(Ra, Rb, Rc, Rd, 4, 6, Rc0)
ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
ROUND4(Rc, Rd, Ra, Rb, 2, 15, Rc2)
ROUND4(Rb, Rc, Rd, Ra, 9, 21, Rc3)
MOVW dig+0(FP), Rt0
MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
ADD Rc0, Ra
ADD Rc1, Rb
ADD Rc2, Rc
ADD Rc3, Rd
MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
MOVW p_data, Rdata
MOVW p_end, Rt0
ADD $64, Rdata
CMP Rt0, Rdata
BLO loop
RET
// MD5 constants table
// Round 1
DATA ·table+0x00(SB)/4, $0xd76aa478
DATA ·table+0x04(SB)/4, $0xe8c7b756
DATA ·table+0x08(SB)/4, $0x242070db
DATA ·table+0x0c(SB)/4, $0xc1bdceee
DATA ·table+0x10(SB)/4, $0xf57c0faf
DATA ·table+0x14(SB)/4, $0x4787c62a
DATA ·table+0x18(SB)/4, $0xa8304613
DATA ·table+0x1c(SB)/4, $0xfd469501
DATA ·table+0x20(SB)/4, $0x698098d8
DATA ·table+0x24(SB)/4, $0x8b44f7af
DATA ·table+0x28(SB)/4, $0xffff5bb1
DATA ·table+0x2c(SB)/4, $0x895cd7be
DATA ·table+0x30(SB)/4, $0x6b901122
DATA ·table+0x34(SB)/4, $0xfd987193
DATA ·table+0x38(SB)/4, $0xa679438e
DATA ·table+0x3c(SB)/4, $0x49b40821
// Round 2
DATA ·table+0x40(SB)/4, $0xf61e2562
DATA ·table+0x44(SB)/4, $0xc040b340
DATA ·table+0x48(SB)/4, $0x265e5a51
DATA ·table+0x4c(SB)/4, $0xe9b6c7aa
DATA ·table+0x50(SB)/4, $0xd62f105d
DATA ·table+0x54(SB)/4, $0x02441453
DATA ·table+0x58(SB)/4, $0xd8a1e681
DATA ·table+0x5c(SB)/4, $0xe7d3fbc8
DATA ·table+0x60(SB)/4, $0x21e1cde6
DATA ·table+0x64(SB)/4, $0xc33707d6
DATA ·table+0x68(SB)/4, $0xf4d50d87
DATA ·table+0x6c(SB)/4, $0x455a14ed
DATA ·table+0x70(SB)/4, $0xa9e3e905
DATA ·table+0x74(SB)/4, $0xfcefa3f8
DATA ·table+0x78(SB)/4, $0x676f02d9
DATA ·table+0x7c(SB)/4, $0x8d2a4c8a
// Round 3
DATA ·table+0x80(SB)/4, $0xfffa3942
DATA ·table+0x84(SB)/4, $0x8771f681
DATA ·table+0x88(SB)/4, $0x6d9d6122
DATA ·table+0x8c(SB)/4, $0xfde5380c
DATA ·table+0x90(SB)/4, $0xa4beea44
DATA ·table+0x94(SB)/4, $0x4bdecfa9
DATA ·table+0x98(SB)/4, $0xf6bb4b60
DATA ·table+0x9c(SB)/4, $0xbebfbc70
DATA ·table+0xa0(SB)/4, $0x289b7ec6
DATA ·table+0xa4(SB)/4, $0xeaa127fa
DATA ·table+0xa8(SB)/4, $0xd4ef3085
DATA ·table+0xac(SB)/4, $0x04881d05
DATA ·table+0xb0(SB)/4, $0xd9d4d039
DATA ·table+0xb4(SB)/4, $0xe6db99e5
DATA ·table+0xb8(SB)/4, $0x1fa27cf8
DATA ·table+0xbc(SB)/4, $0xc4ac5665
// Round 4
DATA ·table+0xc0(SB)/4, $0xf4292244
DATA ·table+0xc4(SB)/4, $0x432aff97
DATA ·table+0xc8(SB)/4, $0xab9423a7
DATA ·table+0xcc(SB)/4, $0xfc93a039
DATA ·table+0xd0(SB)/4, $0x655b59c3
DATA ·table+0xd4(SB)/4, $0x8f0ccc92
DATA ·table+0xd8(SB)/4, $0xffeff47d
DATA ·table+0xdc(SB)/4, $0x85845dd1
DATA ·table+0xe0(SB)/4, $0x6fa87e4f
DATA ·table+0xe4(SB)/4, $0xfe2ce6e0
DATA ·table+0xe8(SB)/4, $0xa3014314
DATA ·table+0xec(SB)/4, $0x4e0811a1
DATA ·table+0xf0(SB)/4, $0xf7537e82
DATA ·table+0xf4(SB)/4, $0xbd3af235
DATA ·table+0xf8(SB)/4, $0x2ad7d2bb
DATA ·table+0xfc(SB)/4, $0xeb86d391
// Global definition
GLOBL ·table(SB),8,$256
|