1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
|
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64 ppc64le
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R5
MOVD b_base+24(FP), R6
MOVD a_len+8(FP), R3
CMP R5,R6,CR7
MOVD b_len+32(FP), R4
MOVD $ret+48(FP), R7
CMP R3,R4,CR6
BEQ CR7,equal
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
equal:
BEQ CR6,done
MOVD $1, R8
BGT CR6,greater
NEG R8
greater:
MOVD R8, (R7)
RET
done:
MOVD $0, (R7)
RET
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD a_base+0(FP), R5
MOVD b_base+16(FP), R6
MOVD a_len+8(FP), R3
CMP R5,R6,CR7
MOVD b_len+24(FP), R4
MOVD $ret+32(FP), R7
CMP R3,R4,CR6
BEQ CR7,equal
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
equal:
BEQ CR6,done
MOVD $1, R8
BGT CR6,greater
NEG R8
greater:
MOVD R8, (R7)
RET
done:
MOVD $0, (R7)
RET
// Do an efficient memcmp for ppc64le
// R3 = a len
// R4 = b len
// R5 = a addr
// R6 = b addr
// R7 = addr of return value
TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R3,R8 // set up length
CMP R3,R4,CR2 // unequal?
BC 12,8,setuplen // BLT CR2
MOVD R4,R8 // use R4 for comparison len
setuplen:
MOVD R8,CTR // set up loop counter
CMP R8,$8 // only optimize >=8
BLT simplecheck
DCBT (R5) // cache hint
DCBT (R6)
CMP R8,$32 // optimize >= 32
MOVD R8,R9
BLT setup8a // 8 byte moves only
setup32a:
SRADCC $5,R8,R9 // number of 32 byte chunks
MOVD R9,CTR
// Special processing for 32 bytes or longer.
// Loading this way is faster and correct as long as the
// doublewords being compared are equal. Once they
// are found unequal, reload them in proper byte order
// to determine greater or less than.
loop32a:
MOVD 0(R5),R9 // doublewords to compare
MOVD 0(R6),R10 // get 4 doublewords
MOVD 8(R5),R14
MOVD 8(R6),R15
CMPU R9,R10 // bytes equal?
MOVD $0,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD 16(R5),R9 // get next pair of doublewords
MOVD 16(R6),R10
CMPU R14,R15 // bytes match?
MOVD $8,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD 24(R5),R14 // get next pair of doublewords
MOVD 24(R6),R15
CMPU R9,R10 // bytes match?
MOVD $16,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32
ADD $32,R5 // bump up to next 32
ADD $32,R6
CMPU R14,R15 // bytes match?
BC 8,2,loop32a // br ctr and cr
BNE cmpne
ANDCC $24,R8,R9 // Any 8 byte chunks?
BEQ leftover // and result is 0
setup8a:
SRADCC $3,R9,R9 // get the 8 byte count
BEQ leftover // shifted value is 0
MOVD R9,CTR // loop count for doublewords
loop8:
MOVDBR (R5+R0),R9 // doublewords to compare
MOVDBR (R6+R0),R10 // LE compare order
ADD $8,R5
ADD $8,R6
CMPU R9,R10 // match?
BC 8,2,loop8 // bt ctr <> 0 && cr
BGT greater
BLT less
leftover:
ANDCC $7,R8,R9 // check for leftover bytes
MOVD R9,CTR // save the ctr
BNE simple // leftover bytes
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less
BR greater
simplecheck:
CMP R8,$0 // remaining compare length 0
BNE simple // do simple compare
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less // 1st len < 2nd len, result less
BR greater // 1st len > 2nd len must be greater
simple:
MOVBZ 0(R5), R9 // get byte from 1st operand
ADD $1,R5
MOVBZ 0(R6), R10 // get byte from 2nd operand
ADD $1,R6
CMPU R9, R10
BC 8,2,simple // bc ctr <> 0 && cr
BGT greater // 1st > 2nd
BLT less // 1st < 2nd
BC 12,10,equal // test CR2 for length comparison
BC 12,9,greater // 2nd len > 1st len
BR less // must be less
cmpne: // only here is not equal
MOVDBR (R5+R16),R8 // reload in reverse order
MOVDBR (R6+R16),R9
CMPU R8,R9 // compare correct endianness
BGT greater // here only if NE
less:
MOVD $-1,R3
MOVD R3,(R7) // return value if A < B
RET
equal:
MOVD $0,(R7) // return value if A == B
RET
greater:
MOVD $1,R3
MOVD R3,(R7) // return value if A > B
RET
// Do an efficient memcmp for ppc64 (BE)
// R3 = a len
// R4 = b len
// R5 = a addr
// R6 = b addr
// R7 = addr of return value
TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R3,R8 // set up length
CMP R3,R4,CR2 // unequal?
BC 12,8,setuplen // BLT CR2
MOVD R4,R8 // use R4 for comparison len
setuplen:
MOVD R8,CTR // set up loop counter
CMP R8,$8 // only optimize >=8
BLT simplecheck
DCBT (R5) // cache hint
DCBT (R6)
CMP R8,$32 // optimize >= 32
MOVD R8,R9
BLT setup8a // 8 byte moves only
setup32a:
SRADCC $5,R8,R9 // number of 32 byte chunks
MOVD R9,CTR
loop32a:
MOVD 0(R5),R9 // doublewords to compare
MOVD 0(R6),R10 // get 4 doublewords
MOVD 8(R5),R14
MOVD 8(R6),R15
CMPU R9,R10 // bytes equal?
BLT less // found to be less
BGT greater // found to be greater
MOVD 16(R5),R9 // get next pair of doublewords
MOVD 16(R6),R10
CMPU R14,R15 // bytes match?
BLT less // found less
BGT greater // found greater
MOVD 24(R5),R14 // get next pair of doublewords
MOVD 24(R6),R15
CMPU R9,R10 // bytes match?
BLT less // found to be less
BGT greater // found to be greater
ADD $32,R5 // bump up to next 32
ADD $32,R6
CMPU R14,R15 // bytes match?
BC 8,2,loop32a // br ctr and cr
BLT less // with BE, byte ordering is
BGT greater // good for compare
ANDCC $24,R8,R9 // Any 8 byte chunks?
BEQ leftover // and result is 0
setup8a:
SRADCC $3,R9,R9 // get the 8 byte count
BEQ leftover // shifted value is 0
MOVD R9,CTR // loop count for doublewords
loop8:
MOVD (R5),R9
MOVD (R6),R10
ADD $8,R5
ADD $8,R6
CMPU R9,R10 // match?
BC 8,2,loop8 // bt ctr <> 0 && cr
BGT greater
BLT less
leftover:
ANDCC $7,R8,R9 // check for leftover bytes
MOVD R9,CTR // save the ctr
BNE simple // leftover bytes
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less
BR greater
simplecheck:
CMP R8,$0 // remaining compare length 0
BNE simple // do simple compare
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less // 1st len < 2nd len, result less
BR greater // same len, must be equal
simple:
MOVBZ 0(R5),R9 // get byte from 1st operand
ADD $1,R5
MOVBZ 0(R6),R10 // get byte from 2nd operand
ADD $1,R6
CMPU R9,R10
BC 8,2,simple // bc ctr <> 0 && cr
BGT greater // 1st > 2nd
BLT less // 1st < 2nd
BC 12,10,equal // test CR2 for length comparison
BC 12,9,greater // 2nd len > 1st len
less:
MOVD $-1,R3
MOVD R3,(R7) // return value if A < B
RET
equal:
MOVD $0,(R7) // return value if A == B
RET
greater:
MOVD $1,R3
MOVD R3,(R7) // return value if A > B
RET
|