1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
|
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// RISC-V's atomic operations have two bits, aq ("acquire") and rl ("release"),
// which may be toggled on and off. Their precise semantics are defined in
// section 6.3 of the specification, but the basic idea is as follows:
//
// - If neither aq nor rl is set, the CPU may reorder the atomic arbitrarily.
// It guarantees only that it will execute atomically.
//
// - If aq is set, the CPU may move the instruction backward, but not forward.
//
// - If rl is set, the CPU may move the instruction forward, but not backward.
//
// - If both are set, the CPU may not reorder the instruction at all.
//
// These four modes correspond to other well-known memory models on other CPUs.
// On ARM, aq corresponds to a dmb ishst, aq+rl corresponds to a dmb ish. On
// Intel, aq corresponds to an lfence, rl to an sfence, and aq+rl to an mfence
// (or a lock prefix).
//
// Go's memory model requires that
// - if a read happens after a write, the read must observe the write, and
// that
// - if a read happens concurrently with a write, the read may observe the
// write.
// aq is sufficient to guarantee this, so that's what we use here. (This jibes
// with ARM, which uses dmb ishst.)
#include "textflag.h"
// func Cas(ptr *uint64, old, new uint64) bool
// Atomically:
// if(*val == old){
// *val = new;
// return 1;
// } else {
// return 0;
// }
TEXT ·Cas(SB), NOSPLIT, $0-17
MOV ptr+0(FP), A0
MOVW old+8(FP), A1
MOVW new+12(FP), A2
cas_again:
LRW (A0), A3
BNE A3, A1, cas_fail
SCW A2, (A0), A4
BNE A4, ZERO, cas_again
MOV $1, A0
MOVB A0, ret+16(FP)
RET
cas_fail:
MOV $0, A0
MOV A0, ret+16(FP)
RET
// func Cas64(ptr *uint64, old, new uint64) bool
TEXT ·Cas64(SB), NOSPLIT, $0-25
MOV ptr+0(FP), A0
MOV old+8(FP), A1
MOV new+16(FP), A2
cas_again:
LRD (A0), A3
BNE A3, A1, cas_fail
SCD A2, (A0), A4
BNE A4, ZERO, cas_again
MOV $1, A0
MOVB A0, ret+24(FP)
RET
cas_fail:
MOVB ZERO, ret+24(FP)
RET
// func Load(ptr *uint32) uint32
TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
MOV ptr+0(FP), A0
LRW (A0), A0
MOVW A0, ret+8(FP)
RET
// func Load8(ptr *uint8) uint8
TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
MOV ptr+0(FP), A0
FENCE
MOVBU (A0), A1
FENCE
MOVB A1, ret+8(FP)
RET
// func Load64(ptr *uint64) uint64
TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
MOV ptr+0(FP), A0
LRD (A0), A0
MOV A0, ret+8(FP)
RET
// func Store(ptr *uint32, val uint32)
TEXT ·Store(SB), NOSPLIT, $0-12
MOV ptr+0(FP), A0
MOVW val+8(FP), A1
AMOSWAPW A1, (A0), ZERO
RET
// func Store8(ptr *uint8, val uint8)
TEXT ·Store8(SB), NOSPLIT, $0-9
MOV ptr+0(FP), A0
MOVBU val+8(FP), A1
FENCE
MOVB A1, (A0)
FENCE
RET
// func Store64(ptr *uint64, val uint64)
TEXT ·Store64(SB), NOSPLIT, $0-16
MOV ptr+0(FP), A0
MOV val+8(FP), A1
AMOSWAPD A1, (A0), ZERO
RET
TEXT ·Casp1(SB), NOSPLIT, $0-25
JMP ·Cas64(SB)
TEXT ·Casint32(SB),NOSPLIT,$0-17
JMP ·Cas(SB)
TEXT ·Casint64(SB),NOSPLIT,$0-25
JMP ·Cas64(SB)
TEXT ·Casuintptr(SB),NOSPLIT,$0-25
JMP ·Cas64(SB)
TEXT ·CasRel(SB), NOSPLIT, $0-17
JMP ·Cas(SB)
TEXT ·Loaduintptr(SB),NOSPLIT,$0-16
JMP ·Load64(SB)
TEXT ·Storeint32(SB),NOSPLIT,$0-12
JMP ·Store(SB)
TEXT ·Storeint64(SB),NOSPLIT,$0-16
JMP ·Store64(SB)
TEXT ·Storeuintptr(SB),NOSPLIT,$0-16
JMP ·Store64(SB)
TEXT ·Loaduint(SB),NOSPLIT,$0-16
JMP ·Loaduintptr(SB)
TEXT ·Loadint32(SB),NOSPLIT,$0-12
JMP ·Load(SB)
TEXT ·Loadint64(SB),NOSPLIT,$0-16
JMP ·Load64(SB)
TEXT ·Xaddint32(SB),NOSPLIT,$0-20
JMP ·Xadd(SB)
TEXT ·Xaddint64(SB),NOSPLIT,$0-24
MOV ptr+0(FP), A0
MOV delta+8(FP), A1
AMOADDD A1, (A0), A0
ADD A0, A1, A0
MOVW A0, ret+16(FP)
RET
TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
JMP ·Load(SB)
TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16
JMP ·Load64(SB)
TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
JMP ·Load64(SB)
// func Loadp(ptr unsafe.Pointer) unsafe.Pointer
TEXT ·Loadp(SB),NOSPLIT,$0-16
JMP ·Load64(SB)
// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
JMP ·Store64(SB)
TEXT ·StoreRel(SB), NOSPLIT, $0-12
JMP ·Store(SB)
TEXT ·StoreRel64(SB), NOSPLIT, $0-16
JMP ·Store64(SB)
TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
JMP ·Store64(SB)
// func Xchg(ptr *uint32, new uint32) uint32
TEXT ·Xchg(SB), NOSPLIT, $0-20
MOV ptr+0(FP), A0
MOVW new+8(FP), A1
AMOSWAPW A1, (A0), A1
MOVW A1, ret+16(FP)
RET
// func Xchg64(ptr *uint64, new uint64) uint64
TEXT ·Xchg64(SB), NOSPLIT, $0-24
MOV ptr+0(FP), A0
MOV new+8(FP), A1
AMOSWAPD A1, (A0), A1
MOV A1, ret+16(FP)
RET
// Atomically:
// *val += delta;
// return *val;
// func Xadd(ptr *uint32, delta int32) uint32
TEXT ·Xadd(SB), NOSPLIT, $0-20
MOV ptr+0(FP), A0
MOVW delta+8(FP), A1
AMOADDW A1, (A0), A2
ADD A2,A1,A0
MOVW A0, ret+16(FP)
RET
// func Xadd64(ptr *uint64, delta int64) uint64
TEXT ·Xadd64(SB), NOSPLIT, $0-24
MOV ptr+0(FP), A0
MOV delta+8(FP), A1
AMOADDD A1, (A0), A2
ADD A2, A1, A0
MOV A0, ret+16(FP)
RET
// func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
JMP ·Xadd64(SB)
// func Xchgint32(ptr *int32, new int32) int32
TEXT ·Xchgint32(SB), NOSPLIT, $0-20
JMP ·Xchg(SB)
// func Xchgint64(ptr *int64, new int64) int64
TEXT ·Xchgint64(SB), NOSPLIT, $0-24
JMP ·Xchg64(SB)
// func Xchguintptr(ptr *uintptr, new uintptr) uintptr
TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
JMP ·Xchg64(SB)
// func And8(ptr *uint8, val uint8)
TEXT ·And8(SB), NOSPLIT, $0-9
MOV ptr+0(FP), A0
MOVBU val+8(FP), A1
AND $3, A0, A2
AND $-4, A0
SLL $3, A2
XOR $255, A1
SLL A2, A1
XOR $-1, A1
AMOANDW A1, (A0), ZERO
RET
// func Or8(ptr *uint8, val uint8)
TEXT ·Or8(SB), NOSPLIT, $0-9
MOV ptr+0(FP), A0
MOVBU val+8(FP), A1
AND $3, A0, A2
AND $-4, A0
SLL $3, A2
SLL A2, A1
AMOORW A1, (A0), ZERO
RET
// func And(ptr *uint32, val uint32)
TEXT ·And(SB), NOSPLIT, $0-12
MOV ptr+0(FP), A0
MOVW val+8(FP), A1
AMOANDW A1, (A0), ZERO
RET
// func Or(ptr *uint32, val uint32)
TEXT ·Or(SB), NOSPLIT, $0-12
MOV ptr+0(FP), A0
MOVW val+8(FP), A1
AMOORW A1, (A0), ZERO
RET
// func Or32(ptr *uint32, val uint32) uint32
TEXT ·Or32(SB), NOSPLIT, $0-20
MOV ptr+0(FP), A0
MOVW val+8(FP), A1
AMOORW A1, (A0), A2
MOVW A2, ret+16(FP)
RET
// func And32(ptr *uint32, val uint32) uint32
TEXT ·And32(SB), NOSPLIT, $0-20
MOV ptr+0(FP), A0
MOVW val+8(FP), A1
AMOANDW A1, (A0), A2
MOVW A2, ret+16(FP)
RET
// func Or64(ptr *uint64, val uint64) uint64
TEXT ·Or64(SB), NOSPLIT, $0-24
MOV ptr+0(FP), A0
MOV val+8(FP), A1
AMOORD A1, (A0), A2
MOV A2, ret+16(FP)
RET
// func And64(ptr *uint64, val uint64) uint64
TEXT ·And64(SB), NOSPLIT, $0-24
MOV ptr+0(FP), A0
MOV val+8(FP), A1
AMOANDD A1, (A0), A2
MOV A2, ret+16(FP)
RET
// func Anduintptr(ptr *uintptr, val uintptr) uintptr
TEXT ·Anduintptr(SB), NOSPLIT, $0-24
JMP ·And64(SB)
// func Oruintptr(ptr *uintptr, val uintptr) uintptr
TEXT ·Oruintptr(SB), NOSPLIT, $0-24
JMP ·Or64(SB)
|