summaryrefslogtreecommitdiffstats
path: root/src/internal/bytealg/compare_riscv64.s
blob: a4164a2b81381b75e228c590dcb9e8f7530f5a59 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "go_asm.h"
#include "textflag.h"

TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
	// X10 = a_base
	// X11 = a_len
	// X12 = a_cap (unused)
	// X13 = b_base (want in X12)
	// X14 = b_len (want in X13)
	// X15 = b_cap (unused)
	MOV	X13, X12
	MOV	X14, X13
	JMP	compare<>(SB)

TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
	// X10 = a_base
	// X11 = a_len
	// X12 = b_base
	// X13 = b_len
	JMP	compare<>(SB)

// On entry:
// X10 points to start of a
// X11 length of a
// X12 points to start of b
// X13 length of b
// for non-regabi X14 points to the address to store the return value (-1/0/1)
// for regabi the return value in X10
TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
	BEQ	X10, X12, cmp_len

	MOV	X11, X5
	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
	MOV	X13, X5
use_a_len:
	BEQZ	X5, cmp_len

	MOV	$32, X6
	BLT	X5, X6, check8_unaligned

	// Check alignment - if alignment differs we have to do one byte at a time.
	AND	$7, X10, X7
	AND	$7, X12, X8
	BNE	X7, X8, check8_unaligned
	BEQZ	X7, compare32

	// Check one byte at a time until we reach 8 byte alignment.
	SUB	X7, X0, X7
	ADD	$8, X7, X7
	SUB	X7, X5, X5
align:
	ADD	$-1, X7
	MOVBU	0(X10), X8
	MOVBU	0(X12), X9
	BNE	X8, X9, cmp
	ADD	$1, X10
	ADD	$1, X12
	BNEZ	X7, align

check32:
	// X6 contains $32
	BLT	X5, X6, compare16
compare32:
	MOV	0(X10), X15
	MOV	0(X12), X16
	MOV	8(X10), X17
	MOV	8(X12), X18
	BNE	X15, X16, cmp8a
	BNE	X17, X18, cmp8b
	MOV	16(X10), X15
	MOV	16(X12), X16
	MOV	24(X10), X17
	MOV	24(X12), X18
	BNE	X15, X16, cmp8a
	BNE	X17, X18, cmp8b
	ADD	$32, X10
	ADD	$32, X12
	ADD	$-32, X5
	BGE	X5, X6, compare32
	BEQZ	X5, cmp_len

check16:
	MOV	$16, X6
	BLT	X5, X6, check8_unaligned
compare16:
	MOV	0(X10), X15
	MOV	0(X12), X16
	MOV	8(X10), X17
	MOV	8(X12), X18
	BNE	X15, X16, cmp8a
	BNE	X17, X18, cmp8b
	ADD	$16, X10
	ADD	$16, X12
	ADD	$-16, X5
	BEQZ	X5, cmp_len

check8_unaligned:
	MOV	$8, X6
	BLT	X5, X6, check4_unaligned
compare8_unaligned:
	MOVBU	0(X10), X8
	MOVBU	1(X10), X15
	MOVBU	2(X10), X17
	MOVBU	3(X10), X19
	MOVBU	4(X10), X21
	MOVBU	5(X10), X23
	MOVBU	6(X10), X25
	MOVBU	7(X10), X29
	MOVBU	0(X12), X9
	MOVBU	1(X12), X16
	MOVBU	2(X12), X18
	MOVBU	3(X12), X20
	MOVBU	4(X12), X22
	MOVBU	5(X12), X24
	MOVBU	6(X12), X28
	MOVBU	7(X12), X30
	BNE	X8, X9, cmp1a
	BNE	X15, X16, cmp1b
	BNE	X17, X18, cmp1c
	BNE	X19, X20, cmp1d
	BNE	X21, X22, cmp1e
	BNE	X23, X24, cmp1f
	BNE	X25, X28, cmp1g
	BNE	X29, X30, cmp1h
	ADD	$8, X10
	ADD	$8, X12
	ADD	$-8, X5
	BGE	X5, X6, compare8_unaligned
	BEQZ	X5, cmp_len

check4_unaligned:
	MOV	$4, X6
	BLT	X5, X6, compare1
compare4_unaligned:
	MOVBU	0(X10), X8
	MOVBU	1(X10), X15
	MOVBU	2(X10), X17
	MOVBU	3(X10), X19
	MOVBU	0(X12), X9
	MOVBU	1(X12), X16
	MOVBU	2(X12), X18
	MOVBU	3(X12), X20
	BNE	X8, X9, cmp1a
	BNE	X15, X16, cmp1b
	BNE	X17, X18, cmp1c
	BNE	X19, X20, cmp1d
	ADD	$4, X10
	ADD	$4, X12
	ADD	$-4, X5
	BGE	X5, X6, compare4_unaligned

compare1:
	BEQZ	X5, cmp_len
	MOVBU	0(X10), X8
	MOVBU	0(X12), X9
	BNE	X8, X9, cmp
	ADD	$1, X10
	ADD	$1, X12
	ADD	$-1, X5
	JMP	compare1

	// Compare 8 bytes of memory in X15/X16 that are known to differ.
cmp8a:
	MOV	X15, X17
	MOV	X16, X18

	// Compare 8 bytes of memory in X17/X18 that are known to differ.
cmp8b:
	MOV	$0xff, X19
cmp8_loop:
	AND	X17, X19, X8
	AND	X18, X19, X9
	BNE	X8, X9, cmp
	SLLI	$8, X19
	JMP	cmp8_loop

cmp1a:
	SLTU	X9, X8, X5
	SLTU	X8, X9, X6
	JMP	cmp_ret
cmp1b:
	SLTU	X16, X15, X5
	SLTU	X15, X16, X6
	JMP	cmp_ret
cmp1c:
	SLTU	X18, X17, X5
	SLTU	X17, X18, X6
	JMP	cmp_ret
cmp1d:
	SLTU	X20, X19, X5
	SLTU	X19, X20, X6
	JMP	cmp_ret
cmp1e:
	SLTU	X22, X21, X5
	SLTU	X21, X22, X6
	JMP	cmp_ret
cmp1f:
	SLTU	X24, X23, X5
	SLTU	X23, X24, X6
	JMP	cmp_ret
cmp1g:
	SLTU	X28, X25, X5
	SLTU	X25, X28, X6
	JMP	cmp_ret
cmp1h:
	SLTU	X30, X29, X5
	SLTU	X29, X30, X6
	JMP	cmp_ret

cmp_len:
	MOV	X11, X8
	MOV	X13, X9
cmp:
	SLTU	X9, X8, X5
	SLTU	X8, X9, X6
cmp_ret:
	SUB	X5, X6, X10
	RET