summaryrefslogtreecommitdiffstats
path: root/src/runtime/memmove_mipsx.s
blob: 6c86558f8d2a1a47a99c90bad47a4a7615cb9246 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build mips mipsle

#include "textflag.h"

#ifdef GOARCH_mips
#define MOVWHI  MOVWL
#define MOVWLO  MOVWR
#else
#define MOVWHI  MOVWR
#define MOVWLO  MOVWL
#endif

// See memmove Go doc for important implementation constraints.

// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB),NOSPLIT,$-0-12
	MOVW	n+8(FP), R3
	MOVW	from+4(FP), R2
	MOVW	to+0(FP), R1

	ADDU	R3, R2, R4	// end pointer for source
	ADDU	R3, R1, R5	// end pointer for destination

	// if destination is ahead of source, start at the end of the buffer and go backward.
	SGTU	R1, R2, R6
	BNE	R6, backward

	// if less than 4 bytes, use byte by byte copying
	SGTU	$4, R3, R6
	BNE	R6, f_small_copy

	// align destination to 4 bytes
	AND	$3, R1, R6
	BEQ	R6, f_dest_aligned
	SUBU	R1, R0, R6
	AND	$3, R6
	MOVWHI	0(R2), R7
	SUBU	R6, R3
	MOVWLO	3(R2), R7
	ADDU	R6, R2
	MOVWHI	R7, 0(R1)
	ADDU	R6, R1

f_dest_aligned:
	AND	$31, R3, R7
	AND	$3, R3, R6
	SUBU	R7, R5, R7	// end pointer for 32-byte chunks
	SUBU	R6, R5, R6	// end pointer for 4-byte chunks

	// if source is not aligned, use unaligned reads
	AND	$3, R2, R8
	BNE	R8, f_large_ua

f_large:
	BEQ	R1, R7, f_words
	ADDU	$32, R1
	MOVW	0(R2), R8
	MOVW	4(R2), R9
	MOVW	8(R2), R10
	MOVW	12(R2), R11
	MOVW	16(R2), R12
	MOVW	20(R2), R13
	MOVW	24(R2), R14
	MOVW	28(R2), R15
	ADDU	$32, R2
	MOVW	R8, -32(R1)
	MOVW	R9, -28(R1)
	MOVW	R10, -24(R1)
	MOVW	R11, -20(R1)
	MOVW	R12, -16(R1)
	MOVW	R13, -12(R1)
	MOVW	R14, -8(R1)
	MOVW	R15, -4(R1)
	JMP	f_large

f_words:
	BEQ	R1, R6, f_tail
	ADDU	$4, R1
	MOVW	0(R2), R8
	ADDU	$4, R2
	MOVW	R8, -4(R1)
	JMP	f_words

f_tail:
	BEQ	R1, R5, ret
	MOVWLO	-1(R4), R8
	MOVWLO	R8, -1(R5)

ret:
	RET

f_large_ua:
	BEQ	R1, R7, f_words_ua
	ADDU	$32, R1
	MOVWHI	0(R2), R8
	MOVWHI	4(R2), R9
	MOVWHI	8(R2), R10
	MOVWHI	12(R2), R11
	MOVWHI	16(R2), R12
	MOVWHI	20(R2), R13
	MOVWHI	24(R2), R14
	MOVWHI	28(R2), R15
	MOVWLO	3(R2), R8
	MOVWLO	7(R2), R9
	MOVWLO	11(R2), R10
	MOVWLO	15(R2), R11
	MOVWLO	19(R2), R12
	MOVWLO	23(R2), R13
	MOVWLO	27(R2), R14
	MOVWLO	31(R2), R15
	ADDU	$32, R2
	MOVW	R8, -32(R1)
	MOVW	R9, -28(R1)
	MOVW	R10, -24(R1)
	MOVW	R11, -20(R1)
	MOVW	R12, -16(R1)
	MOVW	R13, -12(R1)
	MOVW	R14, -8(R1)
	MOVW	R15, -4(R1)
	JMP	f_large_ua

f_words_ua:
	BEQ	R1, R6, f_tail_ua
	MOVWHI	0(R2), R8
	ADDU	$4, R1
	MOVWLO	3(R2), R8
	ADDU	$4, R2
	MOVW	R8, -4(R1)
	JMP	f_words_ua

f_tail_ua:
	BEQ	R1, R5, ret
	MOVWHI	-4(R4), R8
	MOVWLO	-1(R4), R8
	MOVWLO	R8, -1(R5)
	JMP	ret

f_small_copy:
	BEQ	R1, R5, ret
	ADDU	$1, R1
	MOVB	0(R2), R6
	ADDU	$1, R2
	MOVB	R6, -1(R1)
	JMP	f_small_copy

backward:
	SGTU	$4, R3, R6
	BNE	R6, b_small_copy

	AND	$3, R5, R6
	BEQ	R6, b_dest_aligned
	MOVWHI	-4(R4), R7
	SUBU	R6, R3
	MOVWLO	-1(R4), R7
	SUBU	R6, R4
	MOVWLO	R7, -1(R5)
	SUBU	R6, R5

b_dest_aligned:
	AND	$31, R3, R7
	AND	$3, R3, R6
	ADDU	R7, R1, R7
	ADDU	R6, R1, R6

	AND	$3, R4, R8
	BNE	R8, b_large_ua

b_large:
	BEQ	R5, R7, b_words
	ADDU	$-32, R5
	MOVW	-4(R4), R8
	MOVW	-8(R4), R9
	MOVW	-12(R4), R10
	MOVW	-16(R4), R11
	MOVW	-20(R4), R12
	MOVW	-24(R4), R13
	MOVW	-28(R4), R14
	MOVW	-32(R4), R15
	ADDU	$-32, R4
	MOVW	R8, 28(R5)
	MOVW	R9, 24(R5)
	MOVW	R10, 20(R5)
	MOVW	R11, 16(R5)
	MOVW	R12, 12(R5)
	MOVW	R13, 8(R5)
	MOVW	R14, 4(R5)
	MOVW	R15, 0(R5)
	JMP	b_large

b_words:
	BEQ	R5, R6, b_tail
	ADDU	$-4, R5
	MOVW	-4(R4), R8
	ADDU	$-4, R4
	MOVW	R8, 0(R5)
	JMP	b_words

b_tail:
	BEQ	R5, R1, ret
	MOVWHI	0(R2), R8	// R2 and R1 have the same alignment so we don't need to load a whole word
	MOVWHI	R8, 0(R1)
	JMP	ret

b_large_ua:
	BEQ	R5, R7, b_words_ua
	ADDU	$-32, R5
	MOVWHI	-4(R4), R8
	MOVWHI	-8(R4), R9
	MOVWHI	-12(R4), R10
	MOVWHI	-16(R4), R11
	MOVWHI	-20(R4), R12
	MOVWHI	-24(R4), R13
	MOVWHI	-28(R4), R14
	MOVWHI	-32(R4), R15
	MOVWLO	-1(R4), R8
	MOVWLO	-5(R4), R9
	MOVWLO	-9(R4), R10
	MOVWLO	-13(R4), R11
	MOVWLO	-17(R4), R12
	MOVWLO	-21(R4), R13
	MOVWLO	-25(R4), R14
	MOVWLO	-29(R4), R15
	ADDU	$-32, R4
	MOVW	R8, 28(R5)
	MOVW	R9, 24(R5)
	MOVW	R10, 20(R5)
	MOVW	R11, 16(R5)
	MOVW	R12, 12(R5)
	MOVW	R13, 8(R5)
	MOVW	R14, 4(R5)
	MOVW	R15, 0(R5)
	JMP	b_large_ua

b_words_ua:
	BEQ	R5, R6, b_tail_ua
	MOVWHI	-4(R4), R8
	ADDU	$-4, R5
	MOVWLO	-1(R4), R8
	ADDU	$-4, R4
	MOVW	R8, 0(R5)
	JMP	b_words_ua

b_tail_ua:
	BEQ	R5, R1, ret
	MOVWHI	(R2), R8
	MOVWLO	3(R2), R8
	MOVWHI	R8, 0(R1)
	JMP ret

b_small_copy:
	BEQ	R5, R1, ret
	ADDU	$-1, R5
	MOVB	-1(R4), R6
	ADDU	$-1, R4
	MOVB	R6, 0(R5)
	JMP	b_small_copy