summaryrefslogtreecommitdiffstats
path: root/src/runtime/internal
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 13:18:25 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 13:18:25 +0000
commit109be507377fe7f6e8819ac94041d3fdcdf6fd2f (patch)
tree2806a689f8fab4a2ec9fc949830ef270a91d667d /src/runtime/internal
parentInitial commit. (diff)
downloadgolang-1.19-upstream.tar.xz
golang-1.19-upstream.zip
Adding upstream version 1.19.8.upstream/1.19.8upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/runtime/internal')
-rw-r--r--src/runtime/internal/atomic/atomic_386.go103
-rw-r--r--src/runtime/internal/atomic/atomic_386.s285
-rw-r--r--src/runtime/internal/atomic/atomic_amd64.go117
-rw-r--r--src/runtime/internal/atomic/atomic_amd64.s225
-rw-r--r--src/runtime/internal/atomic/atomic_arm.go244
-rw-r--r--src/runtime/internal/atomic/atomic_arm.s297
-rw-r--r--src/runtime/internal/atomic/atomic_arm64.go94
-rw-r--r--src/runtime/internal/atomic/atomic_arm64.s333
-rw-r--r--src/runtime/internal/atomic/atomic_loong64.go83
-rw-r--r--src/runtime/internal/atomic/atomic_loong64.s299
-rw-r--r--src/runtime/internal/atomic/atomic_mips64x.go89
-rw-r--r--src/runtime/internal/atomic/atomic_mips64x.s359
-rw-r--r--src/runtime/internal/atomic/atomic_mipsx.go167
-rw-r--r--src/runtime/internal/atomic/atomic_mipsx.s261
-rw-r--r--src/runtime/internal/atomic/atomic_ppc64x.go89
-rw-r--r--src/runtime/internal/atomic/atomic_ppc64x.s362
-rw-r--r--src/runtime/internal/atomic/atomic_riscv64.go85
-rw-r--r--src/runtime/internal/atomic/atomic_riscv64.s284
-rw-r--r--src/runtime/internal/atomic/atomic_s390x.go123
-rw-r--r--src/runtime/internal/atomic/atomic_s390x.s248
-rw-r--r--src/runtime/internal/atomic/atomic_test.go386
-rw-r--r--src/runtime/internal/atomic/atomic_wasm.go341
-rw-r--r--src/runtime/internal/atomic/atomic_wasm.s10
-rw-r--r--src/runtime/internal/atomic/bench_test.go195
-rw-r--r--src/runtime/internal/atomic/doc.go18
-rw-r--r--src/runtime/internal/atomic/stubs.go59
-rw-r--r--src/runtime/internal/atomic/sys_linux_arm.s144
-rw-r--r--src/runtime/internal/atomic/sys_nonlinux_arm.s79
-rw-r--r--src/runtime/internal/atomic/types.go431
-rw-r--r--src/runtime/internal/atomic/types_64bit.go29
-rw-r--r--src/runtime/internal/atomic/unaligned.go9
-rw-r--r--src/runtime/internal/math/math.go40
-rw-r--r--src/runtime/internal/math/math_test.go79
-rw-r--r--src/runtime/internal/sys/consts.go34
-rw-r--r--src/runtime/internal/sys/intrinsics.go91
-rw-r--r--src/runtime/internal/sys/intrinsics_386.s58
-rw-r--r--src/runtime/internal/sys/intrinsics_common.go158
-rw-r--r--src/runtime/internal/sys/intrinsics_stubs.go13
-rw-r--r--src/runtime/internal/sys/intrinsics_test.go38
-rw-r--r--src/runtime/internal/sys/sys.go7
-rw-r--r--src/runtime/internal/syscall/asm_linux_386.s34
-rw-r--r--src/runtime/internal/syscall/asm_linux_amd64.s47
-rw-r--r--src/runtime/internal/syscall/asm_linux_arm.s32
-rw-r--r--src/runtime/internal/syscall/asm_linux_arm64.s29
-rw-r--r--src/runtime/internal/syscall/asm_linux_loong64.s29
-rw-r--r--src/runtime/internal/syscall/asm_linux_mips64x.s29
-rw-r--r--src/runtime/internal/syscall/asm_linux_mipsx.s34
-rw-r--r--src/runtime/internal/syscall/asm_linux_ppc64x.s28
-rw-r--r--src/runtime/internal/syscall/asm_linux_riscv64.s29
-rw-r--r--src/runtime/internal/syscall/asm_linux_s390x.s28
-rw-r--r--src/runtime/internal/syscall/syscall_linux.go39
51 files changed, 6724 insertions, 0 deletions
diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go
new file mode 100644
index 0000000..bf2f4b9
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_386.go
@@ -0,0 +1,103 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386
+
+package atomic
+
+import "unsafe"
+
+// Export some functions via linkname to assembly in sync/atomic.
+//
+//go:linkname Load
+//go:linkname Loadp
+
+//go:nosplit
+//go:noinline
+func Load(ptr *uint32) uint32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
+ return *(*unsafe.Pointer)(ptr)
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcq(ptr *uint32) uint32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcquintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Load64(ptr *uint64) uint64
+
+//go:nosplit
+//go:noinline
+func Load8(ptr *uint8) uint8 {
+ return *ptr
+}
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_386.s b/src/runtime/internal/atomic/atomic_386.s
new file mode 100644
index 0000000..724d515
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_386.s
@@ -0,0 +1,285 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// bool Cas(int32 *val, int32 old, int32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// }else
+// return 0;
+TEXT ·Cas(SB), NOSPLIT, $0-13
+ MOVL ptr+0(FP), BX
+ MOVL old+4(FP), AX
+ MOVL new+8(FP), CX
+ LOCK
+ CMPXCHGL CX, 0(BX)
+ SETEQ ret+12(FP)
+ RET
+
+TEXT ·Casint32(SB), NOSPLIT, $0-13
+ JMP ·Cas(SB)
+
+TEXT ·Casint64(SB), NOSPLIT, $0-21
+ JMP ·Cas64(SB)
+
+TEXT ·Casuintptr(SB), NOSPLIT, $0-13
+ JMP ·Cas(SB)
+
+TEXT ·CasRel(SB), NOSPLIT, $0-13
+ JMP ·Cas(SB)
+
+TEXT ·Loaduintptr(SB), NOSPLIT, $0-8
+ JMP ·Load(SB)
+
+TEXT ·Loaduint(SB), NOSPLIT, $0-8
+ JMP ·Load(SB)
+
+TEXT ·Storeint32(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+TEXT ·Storeint64(SB), NOSPLIT, $0-12
+ JMP ·Store64(SB)
+
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-12
+ JMP ·Xadd(SB)
+
+TEXT ·Loadint32(SB), NOSPLIT, $0-8
+ JMP ·Load(SB)
+
+TEXT ·Loadint64(SB), NOSPLIT, $0-12
+ JMP ·Load64(SB)
+
+TEXT ·Xaddint32(SB), NOSPLIT, $0-12
+ JMP ·Xadd(SB)
+
+TEXT ·Xaddint64(SB), NOSPLIT, $0-20
+ JMP ·Xadd64(SB)
+
+// bool ·Cas64(uint64 *val, uint64 old, uint64 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT ·Cas64(SB), NOSPLIT, $0-21
+ NO_LOCAL_POINTERS
+ MOVL ptr+0(FP), BP
+ TESTL $7, BP
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ MOVL old_lo+4(FP), AX
+ MOVL old_hi+8(FP), DX
+ MOVL new_lo+12(FP), BX
+ MOVL new_hi+16(FP), CX
+ LOCK
+ CMPXCHG8B 0(BP)
+ SETEQ ret+20(FP)
+ RET
+
+// bool Casp1(void **p, void *old, void *new)
+// Atomically:
+// if(*p == old){
+// *p = new;
+// return 1;
+// }else
+// return 0;
+TEXT ·Casp1(SB), NOSPLIT, $0-13
+ MOVL ptr+0(FP), BX
+ MOVL old+4(FP), AX
+ MOVL new+8(FP), CX
+ LOCK
+ CMPXCHGL CX, 0(BX)
+ SETEQ ret+12(FP)
+ RET
+
+// uint32 Xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd(SB), NOSPLIT, $0-12
+ MOVL ptr+0(FP), BX
+ MOVL delta+4(FP), AX
+ MOVL AX, CX
+ LOCK
+ XADDL AX, 0(BX)
+ ADDL CX, AX
+ MOVL AX, ret+8(FP)
+ RET
+
+TEXT ·Xadd64(SB), NOSPLIT, $0-20
+ NO_LOCAL_POINTERS
+ // no XADDQ so use CMPXCHG8B loop
+ MOVL ptr+0(FP), BP
+ TESTL $7, BP
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ // DI:SI = delta
+ MOVL delta_lo+4(FP), SI
+ MOVL delta_hi+8(FP), DI
+ // DX:AX = *addr
+ MOVL 0(BP), AX
+ MOVL 4(BP), DX
+addloop:
+ // CX:BX = DX:AX (*addr) + DI:SI (delta)
+ MOVL AX, BX
+ MOVL DX, CX
+ ADDL SI, BX
+ ADCL DI, CX
+
+ // if *addr == DX:AX {
+ // *addr = CX:BX
+ // } else {
+ // DX:AX = *addr
+ // }
+ // all in one instruction
+ LOCK
+ CMPXCHG8B 0(BP)
+
+ JNZ addloop
+
+ // success
+ // return CX:BX
+ MOVL BX, ret_lo+12(FP)
+ MOVL CX, ret_hi+16(FP)
+ RET
+
+TEXT ·Xchg(SB), NOSPLIT, $0-12
+ MOVL ptr+0(FP), BX
+ MOVL new+4(FP), AX
+ XCHGL AX, 0(BX)
+ MOVL AX, ret+8(FP)
+ RET
+
+TEXT ·Xchgint32(SB), NOSPLIT, $0-12
+ JMP ·Xchg(SB)
+
+TEXT ·Xchgint64(SB), NOSPLIT, $0-20
+ JMP ·Xchg64(SB)
+
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-12
+ JMP ·Xchg(SB)
+
+TEXT ·Xchg64(SB),NOSPLIT,$0-20
+ NO_LOCAL_POINTERS
+ // no XCHGQ so use CMPXCHG8B loop
+ MOVL ptr+0(FP), BP
+ TESTL $7, BP
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ // CX:BX = new
+ MOVL new_lo+4(FP), BX
+ MOVL new_hi+8(FP), CX
+ // DX:AX = *addr
+ MOVL 0(BP), AX
+ MOVL 4(BP), DX
+swaploop:
+ // if *addr == DX:AX
+ // *addr = CX:BX
+ // else
+ // DX:AX = *addr
+ // all in one instruction
+ LOCK
+ CMPXCHG8B 0(BP)
+ JNZ swaploop
+
+ // success
+ // return DX:AX
+ MOVL AX, ret_lo+12(FP)
+ MOVL DX, ret_hi+16(FP)
+ RET
+
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), BX
+ MOVL val+4(FP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT ·Store(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), BX
+ MOVL val+4(FP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+// uint64 atomicload64(uint64 volatile* addr);
+TEXT ·Load64(SB), NOSPLIT, $0-12
+ NO_LOCAL_POINTERS
+ MOVL ptr+0(FP), AX
+ TESTL $7, AX
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ MOVQ (AX), M0
+ MOVQ M0, ret+4(FP)
+ EMMS
+ RET
+
+// void ·Store64(uint64 volatile* addr, uint64 v);
+TEXT ·Store64(SB), NOSPLIT, $0-12
+ NO_LOCAL_POINTERS
+ MOVL ptr+0(FP), AX
+ TESTL $7, AX
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ // MOVQ and EMMS were introduced on the Pentium MMX.
+ MOVQ val+4(FP), M0
+ MOVQ M0, (AX)
+ EMMS
+ // This is essentially a no-op, but it provides required memory fencing.
+ // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
+ XORL AX, AX
+ LOCK
+ XADDL AX, (SP)
+ RET
+
+// void ·Or8(byte volatile*, byte);
+TEXT ·Or8(SB), NOSPLIT, $0-5
+ MOVL ptr+0(FP), AX
+ MOVB val+4(FP), BX
+ LOCK
+ ORB BX, (AX)
+ RET
+
+// void ·And8(byte volatile*, byte);
+TEXT ·And8(SB), NOSPLIT, $0-5
+ MOVL ptr+0(FP), AX
+ MOVB val+4(FP), BX
+ LOCK
+ ANDB BX, (AX)
+ RET
+
+TEXT ·Store8(SB), NOSPLIT, $0-5
+ MOVL ptr+0(FP), BX
+ MOVB val+4(FP), AX
+ XCHGB AX, 0(BX)
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET
diff --git a/src/runtime/internal/atomic/atomic_amd64.go b/src/runtime/internal/atomic/atomic_amd64.go
new file mode 100644
index 0000000..52a8362
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_amd64.go
@@ -0,0 +1,117 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+import "unsafe"
+
+// Export some functions via linkname to assembly in sync/atomic.
+//
+//go:linkname Load
+//go:linkname Loadp
+//go:linkname Load64
+
+//go:nosplit
+//go:noinline
+func Load(ptr *uint32) uint32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
+ return *(*unsafe.Pointer)(ptr)
+}
+
+//go:nosplit
+//go:noinline
+func Load64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcq(ptr *uint32) uint32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcq64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcquintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:nosplit
+//go:noinline
+func Load8(ptr *uint8) uint8 {
+ return *ptr
+}
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
+
+// StorepNoWB performs *ptr = val atomically and without a write
+// barrier.
+//
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_amd64.s b/src/runtime/internal/atomic/atomic_amd64.s
new file mode 100644
index 0000000..d21514b
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_amd64.s
@@ -0,0 +1,225 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Note: some of these functions are semantically inlined
+// by the compiler (in src/cmd/compile/internal/gc/ssa.go).
+
+#include "textflag.h"
+
+TEXT ·Loaduintptr(SB), NOSPLIT, $0-16
+ JMP ·Load64(SB)
+
+TEXT ·Loaduint(SB), NOSPLIT, $0-16
+ JMP ·Load64(SB)
+
+TEXT ·Loadint32(SB), NOSPLIT, $0-12
+ JMP ·Load(SB)
+
+TEXT ·Loadint64(SB), NOSPLIT, $0-16
+ JMP ·Load64(SB)
+
+// bool Cas(int32 *val, int32 old, int32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Cas(SB),NOSPLIT,$0-17
+ MOVQ ptr+0(FP), BX
+ MOVL old+8(FP), AX
+ MOVL new+12(FP), CX
+ LOCK
+ CMPXCHGL CX, 0(BX)
+ SETEQ ret+16(FP)
+ RET
+
+// bool ·Cas64(uint64 *val, uint64 old, uint64 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+ MOVQ ptr+0(FP), BX
+ MOVQ old+8(FP), AX
+ MOVQ new+16(FP), CX
+ LOCK
+ CMPXCHGQ CX, 0(BX)
+ SETEQ ret+24(FP)
+ RET
+
+// bool Casp1(void **val, void *old, void *new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+ MOVQ ptr+0(FP), BX
+ MOVQ old+8(FP), AX
+ MOVQ new+16(FP), CX
+ LOCK
+ CMPXCHGQ CX, 0(BX)
+ SETEQ ret+24(FP)
+ RET
+
+TEXT ·Casint32(SB), NOSPLIT, $0-17
+ JMP ·Cas(SB)
+
+TEXT ·Casint64(SB), NOSPLIT, $0-25
+ JMP ·Cas64(SB)
+
+TEXT ·Casuintptr(SB), NOSPLIT, $0-25
+ JMP ·Cas64(SB)
+
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+ JMP ·Cas(SB)
+
+// uint32 Xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+ MOVQ ptr+0(FP), BX
+ MOVL delta+8(FP), AX
+ MOVL AX, CX
+ LOCK
+ XADDL AX, 0(BX)
+ ADDL CX, AX
+ MOVL AX, ret+16(FP)
+ RET
+
+// uint64 Xadd64(uint64 volatile *val, int64 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+ MOVQ ptr+0(FP), BX
+ MOVQ delta+8(FP), AX
+ MOVQ AX, CX
+ LOCK
+ XADDQ AX, 0(BX)
+ ADDQ CX, AX
+ MOVQ AX, ret+16(FP)
+ RET
+
+TEXT ·Xaddint32(SB), NOSPLIT, $0-20
+ JMP ·Xadd(SB)
+
+TEXT ·Xaddint64(SB), NOSPLIT, $0-24
+ JMP ·Xadd64(SB)
+
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+ JMP ·Xadd64(SB)
+
+// uint32 Xchg(ptr *uint32, new uint32)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+ MOVQ ptr+0(FP), BX
+ MOVL new+8(FP), AX
+ XCHGL AX, 0(BX)
+ MOVL AX, ret+16(FP)
+ RET
+
+// uint64 Xchg64(ptr *uint64, new uint64)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+ MOVQ ptr+0(FP), BX
+ MOVQ new+8(FP), AX
+ XCHGQ AX, 0(BX)
+ MOVQ AX, ret+16(FP)
+ RET
+
+TEXT ·Xchgint32(SB), NOSPLIT, $0-20
+ JMP ·Xchg(SB)
+
+TEXT ·Xchgint64(SB), NOSPLIT, $0-24
+ JMP ·Xchg64(SB)
+
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+ JMP ·Xchg64(SB)
+
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
+ MOVQ ptr+0(FP), BX
+ MOVQ val+8(FP), AX
+ XCHGQ AX, 0(BX)
+ RET
+
+TEXT ·Store(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), BX
+ MOVL val+8(FP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT ·Store8(SB), NOSPLIT, $0-9
+ MOVQ ptr+0(FP), BX
+ MOVB val+8(FP), AX
+ XCHGB AX, 0(BX)
+ RET
+
+TEXT ·Store64(SB), NOSPLIT, $0-16
+ MOVQ ptr+0(FP), BX
+ MOVQ val+8(FP), AX
+ XCHGQ AX, 0(BX)
+ RET
+
+TEXT ·Storeint32(SB), NOSPLIT, $0-12
+ JMP ·Store(SB)
+
+TEXT ·Storeint64(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-12
+ JMP ·Store(SB)
+
+TEXT ·StoreRel64(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+// void ·Or8(byte volatile*, byte);
+TEXT ·Or8(SB), NOSPLIT, $0-9
+ MOVQ ptr+0(FP), AX
+ MOVB val+8(FP), BX
+ LOCK
+ ORB BX, (AX)
+ RET
+
+// void ·And8(byte volatile*, byte);
+TEXT ·And8(SB), NOSPLIT, $0-9
+ MOVQ ptr+0(FP), AX
+ MOVB val+8(FP), BX
+ LOCK
+ ANDB BX, (AX)
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), AX
+ MOVL val+8(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), AX
+ MOVL val+8(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET
diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go
new file mode 100644
index 0000000..bdb1847
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_arm.go
@@ -0,0 +1,244 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm
+
+package atomic
+
+import (
+ "internal/cpu"
+ "unsafe"
+)
+
+// Export some functions via linkname to assembly in sync/atomic.
+//
+//go:linkname Xchg
+//go:linkname Xchguintptr
+
+type spinlock struct {
+ v uint32
+}
+
+//go:nosplit
+func (l *spinlock) lock() {
+ for {
+ if Cas(&l.v, 0, 1) {
+ return
+ }
+ }
+}
+
+//go:nosplit
+func (l *spinlock) unlock() {
+ Store(&l.v, 0)
+}
+
+var locktab [57]struct {
+ l spinlock
+ pad [cpu.CacheLinePadSize - unsafe.Sizeof(spinlock{})]byte
+}
+
+func addrLock(addr *uint64) *spinlock {
+ return &locktab[(uintptr(unsafe.Pointer(addr))>>3)%uintptr(len(locktab))].l
+}
+
+// Atomic add and return new value.
+//
+//go:nosplit
+func Xadd(val *uint32, delta int32) uint32 {
+ for {
+ oval := *val
+ nval := oval + uint32(delta)
+ if Cas(val, oval, nval) {
+ return nval
+ }
+ }
+}
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:nosplit
+func Xchg(addr *uint32, v uint32) uint32 {
+ for {
+ old := *addr
+ if Cas(addr, old, v) {
+ return old
+ }
+ }
+}
+
+//go:nosplit
+func Xchguintptr(addr *uintptr, v uintptr) uintptr {
+ return uintptr(Xchg((*uint32)(unsafe.Pointer(addr)), uint32(v)))
+}
+
+// Not noescape -- it installs a pointer to addr.
+func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer)
+
+//go:noescape
+func Store(addr *uint32, v uint32)
+
+//go:noescape
+func StoreRel(addr *uint32, v uint32)
+
+//go:noescape
+func StoreReluintptr(addr *uintptr, v uintptr)
+
+//go:nosplit
+func goCas64(addr *uint64, old, new uint64) bool {
+ if uintptr(unsafe.Pointer(addr))&7 != 0 {
+ *(*int)(nil) = 0 // crash on unaligned uint64
+ }
+ _ = *addr // if nil, fault before taking the lock
+ var ok bool
+ addrLock(addr).lock()
+ if *addr == old {
+ *addr = new
+ ok = true
+ }
+ addrLock(addr).unlock()
+ return ok
+}
+
+//go:nosplit
+func goXadd64(addr *uint64, delta int64) uint64 {
+ if uintptr(unsafe.Pointer(addr))&7 != 0 {
+ *(*int)(nil) = 0 // crash on unaligned uint64
+ }
+ _ = *addr // if nil, fault before taking the lock
+ var r uint64
+ addrLock(addr).lock()
+ r = *addr + uint64(delta)
+ *addr = r
+ addrLock(addr).unlock()
+ return r
+}
+
+//go:nosplit
+func goXchg64(addr *uint64, v uint64) uint64 {
+ if uintptr(unsafe.Pointer(addr))&7 != 0 {
+ *(*int)(nil) = 0 // crash on unaligned uint64
+ }
+ _ = *addr // if nil, fault before taking the lock
+ var r uint64
+ addrLock(addr).lock()
+ r = *addr
+ *addr = v
+ addrLock(addr).unlock()
+ return r
+}
+
+//go:nosplit
+func goLoad64(addr *uint64) uint64 {
+ if uintptr(unsafe.Pointer(addr))&7 != 0 {
+ *(*int)(nil) = 0 // crash on unaligned uint64
+ }
+ _ = *addr // if nil, fault before taking the lock
+ var r uint64
+ addrLock(addr).lock()
+ r = *addr
+ addrLock(addr).unlock()
+ return r
+}
+
+//go:nosplit
+func goStore64(addr *uint64, v uint64) {
+ if uintptr(unsafe.Pointer(addr))&7 != 0 {
+ *(*int)(nil) = 0 // crash on unaligned uint64
+ }
+ _ = *addr // if nil, fault before taking the lock
+ addrLock(addr).lock()
+ *addr = v
+ addrLock(addr).unlock()
+}
+
+//go:nosplit
+func Or8(addr *uint8, v uint8) {
+ // Align down to 4 bytes and use 32-bit CAS.
+ uaddr := uintptr(unsafe.Pointer(addr))
+ addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
+ word := uint32(v) << ((uaddr & 3) * 8) // little endian
+ for {
+ old := *addr32
+ if Cas(addr32, old, old|word) {
+ return
+ }
+ }
+}
+
+//go:nosplit
+func And8(addr *uint8, v uint8) {
+ // Align down to 4 bytes and use 32-bit CAS.
+ uaddr := uintptr(unsafe.Pointer(addr))
+ addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
+ word := uint32(v) << ((uaddr & 3) * 8) // little endian
+ mask := uint32(0xFF) << ((uaddr & 3) * 8) // little endian
+ word |= ^mask
+ for {
+ old := *addr32
+ if Cas(addr32, old, old&word) {
+ return
+ }
+ }
+}
+
+//go:nosplit
+func Or(addr *uint32, v uint32) {
+ for {
+ old := *addr
+ if Cas(addr, old, old|v) {
+ return
+ }
+ }
+}
+
+//go:nosplit
+func And(addr *uint32, v uint32) {
+ for {
+ old := *addr
+ if Cas(addr, old, old&v) {
+ return
+ }
+ }
+}
+
+//go:nosplit
+func armcas(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Load(addr *uint32) uint32
+
+// NO go:noescape annotation; *addr escapes if result escapes (#31525)
+func Loadp(addr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func Load8(addr *uint8) uint8
+
+//go:noescape
+func LoadAcq(addr *uint32) uint32
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func Cas64(addr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(addr *uint32, old, new uint32) bool
+
+//go:noescape
+func Xadd64(addr *uint64, delta int64) uint64
+
+//go:noescape
+func Xchg64(addr *uint64, v uint64) uint64
+
+//go:noescape
+func Load64(addr *uint64) uint64
+
+//go:noescape
+func Store8(addr *uint8, v uint8)
+
+//go:noescape
+func Store64(addr *uint64, v uint64)
diff --git a/src/runtime/internal/atomic/atomic_arm.s b/src/runtime/internal/atomic/atomic_arm.s
new file mode 100644
index 0000000..92cbe8a
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_arm.s
@@ -0,0 +1,297 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// bool armcas(int32 *val, int32 old, int32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// }else
+// return 0;
+//
+// To implement ·cas in sys_$GOOS_arm.s
+// using the native instructions, use:
+//
+// TEXT ·cas(SB),NOSPLIT,$0
+// B ·armcas(SB)
+//
+TEXT ·armcas(SB),NOSPLIT,$0-13
+ MOVW ptr+0(FP), R1
+ MOVW old+4(FP), R2
+ MOVW new+8(FP), R3
+casl:
+ LDREX (R1), R0
+ CMP R0, R2
+ BNE casfail
+
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
+ BLT 2(PC)
+ DMB MB_ISHST
+
+ STREX R3, (R1), R0
+ CMP $0, R0
+ BNE casl
+ MOVW $1, R0
+
+ CMP $7, R8
+ BLT 2(PC)
+ DMB MB_ISH
+
+ MOVB R0, ret+12(FP)
+ RET
+casfail:
+ MOVW $0, R0
+ MOVB R0, ret+12(FP)
+ RET
+
+// stubs
+
+TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-8
+ B ·Load(SB)
+
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8
+ B ·Load(SB)
+
+TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-8
+ B ·Load(SB)
+
+TEXT ·Casint32(SB),NOSPLIT,$0-13
+ B ·Cas(SB)
+
+TEXT ·Casint64(SB),NOSPLIT,$-4-21
+ B ·Cas64(SB)
+
+TEXT ·Casuintptr(SB),NOSPLIT,$0-13
+ B ·Cas(SB)
+
+TEXT ·Casp1(SB),NOSPLIT,$0-13
+ B ·Cas(SB)
+
+TEXT ·CasRel(SB),NOSPLIT,$0-13
+ B ·Cas(SB)
+
+TEXT ·Loadint32(SB),NOSPLIT,$0-8
+ B ·Load(SB)
+
+TEXT ·Loadint64(SB),NOSPLIT,$-4-12
+ B ·Load64(SB)
+
+TEXT ·Loaduintptr(SB),NOSPLIT,$0-8
+ B ·Load(SB)
+
+TEXT ·Loaduint(SB),NOSPLIT,$0-8
+ B ·Load(SB)
+
+TEXT ·Storeint32(SB),NOSPLIT,$0-8
+ B ·Store(SB)
+
+TEXT ·Storeint64(SB),NOSPLIT,$0-12
+ B ·Store64(SB)
+
+TEXT ·Storeuintptr(SB),NOSPLIT,$0-8
+ B ·Store(SB)
+
+TEXT ·StorepNoWB(SB),NOSPLIT,$0-8
+ B ·Store(SB)
+
+TEXT ·StoreRel(SB),NOSPLIT,$0-8
+ B ·Store(SB)
+
+TEXT ·StoreReluintptr(SB),NOSPLIT,$0-8
+ B ·Store(SB)
+
+TEXT ·Xaddint32(SB),NOSPLIT,$0-12
+ B ·Xadd(SB)
+
+TEXT ·Xaddint64(SB),NOSPLIT,$-4-20
+ B ·Xadd64(SB)
+
+TEXT ·Xadduintptr(SB),NOSPLIT,$0-12
+ B ·Xadd(SB)
+
+TEXT ·Xchgint32(SB),NOSPLIT,$0-12
+ B ·Xchg(SB)
+
+TEXT ·Xchgint64(SB),NOSPLIT,$-4-20
+ B ·Xchg64(SB)
+
+// 64-bit atomics
+// The native ARM implementations use LDREXD/STREXD, which are
+// available on ARMv6k or later. We use them only on ARMv7.
+// On older ARM, we use Go implementations which simulate 64-bit
+// atomics with locks.
+TEXT armCas64<>(SB),NOSPLIT,$0-21
+ // addr is already in R1
+ MOVW old_lo+4(FP), R2
+ MOVW old_hi+8(FP), R3
+ MOVW new_lo+12(FP), R4
+ MOVW new_hi+16(FP), R5
+cas64loop:
+ LDREXD (R1), R6 // loads R6 and R7
+ CMP R2, R6
+ BNE cas64fail
+ CMP R3, R7
+ BNE cas64fail
+
+ DMB MB_ISHST
+
+ STREXD R4, (R1), R0 // stores R4 and R5
+ CMP $0, R0
+ BNE cas64loop
+ MOVW $1, R0
+
+ DMB MB_ISH
+
+ MOVBU R0, swapped+20(FP)
+ RET
+cas64fail:
+ MOVW $0, R0
+ MOVBU R0, swapped+20(FP)
+ RET
+
+TEXT armXadd64<>(SB),NOSPLIT,$0-20
+ // addr is already in R1
+ MOVW delta_lo+4(FP), R2
+ MOVW delta_hi+8(FP), R3
+
+add64loop:
+ LDREXD (R1), R4 // loads R4 and R5
+ ADD.S R2, R4
+ ADC R3, R5
+
+ DMB MB_ISHST
+
+ STREXD R4, (R1), R0 // stores R4 and R5
+ CMP $0, R0
+ BNE add64loop
+
+ DMB MB_ISH
+
+ MOVW R4, new_lo+12(FP)
+ MOVW R5, new_hi+16(FP)
+ RET
+
+TEXT armXchg64<>(SB),NOSPLIT,$0-20
+ // addr is already in R1
+ MOVW new_lo+4(FP), R2
+ MOVW new_hi+8(FP), R3
+
+swap64loop:
+ LDREXD (R1), R4 // loads R4 and R5
+
+ DMB MB_ISHST
+
+ STREXD R2, (R1), R0 // stores R2 and R3
+ CMP $0, R0
+ BNE swap64loop
+
+ DMB MB_ISH
+
+ MOVW R4, old_lo+12(FP)
+ MOVW R5, old_hi+16(FP)
+ RET
+
+TEXT armLoad64<>(SB),NOSPLIT,$0-12
+ // addr is already in R1
+
+ LDREXD (R1), R2 // loads R2 and R3
+ DMB MB_ISH
+
+ MOVW R2, val_lo+4(FP)
+ MOVW R3, val_hi+8(FP)
+ RET
+
+TEXT armStore64<>(SB),NOSPLIT,$0-12
+ // addr is already in R1
+ MOVW val_lo+4(FP), R2
+ MOVW val_hi+8(FP), R3
+
+store64loop:
+ LDREXD (R1), R4 // loads R4 and R5
+
+ DMB MB_ISHST
+
+ STREXD R2, (R1), R0 // stores R2 and R3
+ CMP $0, R0
+ BNE store64loop
+
+ DMB MB_ISH
+ RET
+
+// The following functions all panic if their address argument isn't
+// 8-byte aligned. Since we're calling back into Go code to do this,
+// we have to cooperate with stack unwinding. In the normal case, the
+// functions tail-call into the appropriate implementation, which
+// means they must not open a frame. Hence, when they go down the
+// panic path, at that point they push the LR to create a real frame
+// (they don't need to pop it because panic won't return; however, we
+// do need to set the SP delta back).
+
+// Check if R1 is 8-byte aligned, panic if not.
+// Clobbers R2.
+#define CHECK_ALIGN \
+ AND.S $7, R1, R2 \
+ BEQ 4(PC) \
+ MOVW.W R14, -4(R13) /* prepare a real frame */ \
+ BL ·panicUnaligned(SB) \
+ ADD $4, R13 /* compensate SP delta */
+
+TEXT ·Cas64(SB),NOSPLIT,$-4-21
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ CHECK_ALIGN
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armCas64<>(SB)
+ JMP ·goCas64(SB)
+
+TEXT ·Xadd64(SB),NOSPLIT,$-4-20
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ CHECK_ALIGN
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armXadd64<>(SB)
+ JMP ·goXadd64(SB)
+
+TEXT ·Xchg64(SB),NOSPLIT,$-4-20
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ CHECK_ALIGN
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armXchg64<>(SB)
+ JMP ·goXchg64(SB)
+
+TEXT ·Load64(SB),NOSPLIT,$-4-12
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ CHECK_ALIGN
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armLoad64<>(SB)
+ JMP ·goLoad64(SB)
+
+TEXT ·Store64(SB),NOSPLIT,$-4-12
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ CHECK_ALIGN
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armStore64<>(SB)
+ JMP ·goStore64(SB)
diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go
new file mode 100644
index 0000000..459fb99
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_arm64.go
@@ -0,0 +1,94 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64
+
+package atomic
+
+import (
+ "internal/cpu"
+ "unsafe"
+)
+
+const (
+ offsetARM64HasATOMICS = unsafe.Offsetof(cpu.ARM64.HasATOMICS)
+)
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Load(ptr *uint32) uint32
+
+//go:noescape
+func Load8(ptr *uint8) uint8
+
+//go:noescape
+func Load64(ptr *uint64) uint64
+
+// NO go:noescape annotation; *ptr escapes if result escapes (#31525)
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func LoadAcq(addr *uint32) uint32
+
+//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s
new file mode 100644
index 0000000..5f77d92
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_arm64.s
@@ -0,0 +1,333 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Casint32(SB), NOSPLIT, $0-17
+ B ·Cas(SB)
+
+TEXT ·Casint64(SB), NOSPLIT, $0-25
+ B ·Cas64(SB)
+
+TEXT ·Casuintptr(SB), NOSPLIT, $0-25
+ B ·Cas64(SB)
+
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+ B ·Cas(SB)
+
+TEXT ·Loadint32(SB), NOSPLIT, $0-12
+ B ·Load(SB)
+
+TEXT ·Loadint64(SB), NOSPLIT, $0-16
+ B ·Load64(SB)
+
+TEXT ·Loaduintptr(SB), NOSPLIT, $0-16
+ B ·Load64(SB)
+
+TEXT ·Loaduint(SB), NOSPLIT, $0-16
+ B ·Load64(SB)
+
+TEXT ·Storeint32(SB), NOSPLIT, $0-12
+ B ·Store(SB)
+
+TEXT ·Storeint64(SB), NOSPLIT, $0-16
+ B ·Store64(SB)
+
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
+ B ·Store64(SB)
+
+TEXT ·Xaddint32(SB), NOSPLIT, $0-20
+ B ·Xadd(SB)
+
+TEXT ·Xaddint64(SB), NOSPLIT, $0-24
+ B ·Xadd64(SB)
+
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+ B ·Xadd64(SB)
+
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+ B ·Cas64(SB)
+
+// uint32 ·Load(uint32 volatile* addr)
+TEXT ·Load(SB),NOSPLIT,$0-12
+ MOVD ptr+0(FP), R0
+ LDARW (R0), R0
+ MOVW R0, ret+8(FP)
+ RET
+
+// uint8 ·Load8(uint8 volatile* addr)
+TEXT ·Load8(SB),NOSPLIT,$0-9
+ MOVD ptr+0(FP), R0
+ LDARB (R0), R0
+ MOVB R0, ret+8(FP)
+ RET
+
+// uint64 ·Load64(uint64 volatile* addr)
+TEXT ·Load64(SB),NOSPLIT,$0-16
+ MOVD ptr+0(FP), R0
+ LDAR (R0), R0
+ MOVD R0, ret+8(FP)
+ RET
+
+// void *·Loadp(void *volatile *addr)
+TEXT ·Loadp(SB),NOSPLIT,$0-16
+ MOVD ptr+0(FP), R0
+ LDAR (R0), R0
+ MOVD R0, ret+8(FP)
+ RET
+
+// uint32 ·LoadAcq(uint32 volatile* addr)
+TEXT ·LoadAcq(SB),NOSPLIT,$0-12
+ B ·Load(SB)
+
+// uint64 ·LoadAcquintptr(uint64 volatile* addr)
+TEXT ·LoadAcq64(SB),NOSPLIT,$0-16
+ B ·Load64(SB)
+
+// uintptr ·LoadAcq64(uintptr volatile* addr)
+TEXT ·LoadAcquintptr(SB),NOSPLIT,$0-16
+ B ·Load64(SB)
+
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
+ B ·Store64(SB)
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-12
+ B ·Store(SB)
+
+TEXT ·StoreRel64(SB), NOSPLIT, $0-16
+ B ·Store64(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
+ B ·Store64(SB)
+
+TEXT ·Store(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ STLRW R1, (R0)
+ RET
+
+TEXT ·Store8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R0
+ MOVB val+8(FP), R1
+ STLRB R1, (R0)
+ RET
+
+TEXT ·Store64(SB), NOSPLIT, $0-16
+ MOVD ptr+0(FP), R0
+ MOVD val+8(FP), R1
+ STLR R1, (R0)
+ RET
+
+// uint32 Xchg(ptr *uint32, new uint32)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+ MOVD ptr+0(FP), R0
+ MOVW new+8(FP), R1
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ SWPALW R1, (R0), R2
+ MOVW R2, ret+16(FP)
+ RET
+load_store_loop:
+ LDAXRW (R0), R2
+ STLXRW R1, (R0), R3
+ CBNZ R3, load_store_loop
+ MOVW R2, ret+16(FP)
+ RET
+
+// uint64 Xchg64(ptr *uint64, new uint64)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+ MOVD ptr+0(FP), R0
+ MOVD new+8(FP), R1
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ SWPALD R1, (R0), R2
+ MOVD R2, ret+16(FP)
+ RET
+load_store_loop:
+ LDAXR (R0), R2
+ STLXR R1, (R0), R3
+ CBNZ R3, load_store_loop
+ MOVD R2, ret+16(FP)
+ RET
+
+// bool Cas(uint32 *ptr, uint32 old, uint32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Cas(SB), NOSPLIT, $0-17
+ MOVD ptr+0(FP), R0
+ MOVW old+8(FP), R1
+ MOVW new+12(FP), R2
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ MOVD R1, R3
+ CASALW R3, (R0), R2
+ CMP R1, R3
+ CSET EQ, R0
+ MOVB R0, ret+16(FP)
+ RET
+load_store_loop:
+ LDAXRW (R0), R3
+ CMPW R1, R3
+ BNE ok
+ STLXRW R2, (R0), R3
+ CBNZ R3, load_store_loop
+ok:
+ CSET EQ, R0
+ MOVB R0, ret+16(FP)
+ RET
+
+// bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+ MOVD ptr+0(FP), R0
+ MOVD old+8(FP), R1
+ MOVD new+16(FP), R2
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ MOVD R1, R3
+ CASALD R3, (R0), R2
+ CMP R1, R3
+ CSET EQ, R0
+ MOVB R0, ret+24(FP)
+ RET
+load_store_loop:
+ LDAXR (R0), R3
+ CMP R1, R3
+ BNE ok
+ STLXR R2, (R0), R3
+ CBNZ R3, load_store_loop
+ok:
+ CSET EQ, R0
+ MOVB R0, ret+24(FP)
+ RET
+
+// uint32 xadd(uint32 volatile *ptr, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+ MOVD ptr+0(FP), R0
+ MOVW delta+8(FP), R1
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ LDADDALW R1, (R0), R2
+ ADD R1, R2
+ MOVW R2, ret+16(FP)
+ RET
+load_store_loop:
+ LDAXRW (R0), R2
+ ADDW R2, R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, load_store_loop
+ MOVW R2, ret+16(FP)
+ RET
+
+// uint64 Xadd64(uint64 volatile *ptr, int64 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+ MOVD ptr+0(FP), R0
+ MOVD delta+8(FP), R1
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ LDADDALD R1, (R0), R2
+ ADD R1, R2
+ MOVD R2, ret+16(FP)
+ RET
+load_store_loop:
+ LDAXR (R0), R2
+ ADD R2, R1, R2
+ STLXR R2, (R0), R3
+ CBNZ R3, load_store_loop
+ MOVD R2, ret+16(FP)
+ RET
+
+TEXT ·Xchgint32(SB), NOSPLIT, $0-20
+ B ·Xchg(SB)
+
+TEXT ·Xchgint64(SB), NOSPLIT, $0-24
+ B ·Xchg64(SB)
+
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+ B ·Xchg64(SB)
+
+TEXT ·And8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R0
+ MOVB val+8(FP), R1
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ MVN R1, R2
+ LDCLRALB R2, (R0), R3
+ RET
+load_store_loop:
+ LDAXRB (R0), R2
+ AND R1, R2
+ STLXRB R2, (R0), R3
+ CBNZ R3, load_store_loop
+ RET
+
+TEXT ·Or8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R0
+ MOVB val+8(FP), R1
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ LDORALB R1, (R0), R2
+ RET
+load_store_loop:
+ LDAXRB (R0), R2
+ ORR R1, R2
+ STLXRB R2, (R0), R3
+ CBNZ R3, load_store_loop
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ MVN R1, R2
+ LDCLRALW R2, (R0), R3
+ RET
+load_store_loop:
+ LDAXRW (R0), R2
+ AND R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, load_store_loop
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
+ CBZ R4, load_store_loop
+ LDORALW R1, (R0), R2
+ RET
+load_store_loop:
+ LDAXRW (R0), R2
+ ORR R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, load_store_loop
+ RET
diff --git a/src/runtime/internal/atomic/atomic_loong64.go b/src/runtime/internal/atomic/atomic_loong64.go
new file mode 100644
index 0000000..908a7d6
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_loong64.go
@@ -0,0 +1,83 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build loong64
+
+package atomic
+
+import "unsafe"
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Load(ptr *uint32) uint32
+
+//go:noescape
+func Load8(ptr *uint8) uint8
+
+//go:noescape
+func Load64(ptr *uint64) uint64
+
+// NO go:noescape annotation; *ptr escapes if result escapes (#31525)
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s
new file mode 100644
index 0000000..bfb6c7e
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_loong64.s
@@ -0,0 +1,299 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// bool cas(uint32 *ptr, uint32 old, uint32 new)
+// Atomically:
+// if(*ptr == old){
+// *ptr = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Cas(SB), NOSPLIT, $0-17
+ MOVV ptr+0(FP), R4
+ MOVW old+8(FP), R5
+ MOVW new+12(FP), R6
+ DBAR
+cas_again:
+ MOVV R6, R7
+ LL (R4), R8
+ BNE R5, R8, cas_fail
+ SC R7, (R4)
+ BEQ R7, cas_again
+ MOVV $1, R4
+ MOVB R4, ret+16(FP)
+ DBAR
+ RET
+cas_fail:
+ MOVV $0, R4
+ JMP -4(PC)
+
+// bool cas64(uint64 *ptr, uint64 old, uint64 new)
+// Atomically:
+// if(*ptr == old){
+// *ptr = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+ MOVV ptr+0(FP), R4
+ MOVV old+8(FP), R5
+ MOVV new+16(FP), R6
+ DBAR
+cas64_again:
+ MOVV R6, R7
+ LLV (R4), R8
+ BNE R5, R8, cas64_fail
+ SCV R7, (R4)
+ BEQ R7, cas64_again
+ MOVV $1, R4
+ MOVB R4, ret+24(FP)
+ DBAR
+ RET
+cas64_fail:
+ MOVV $0, R4
+ JMP -4(PC)
+
+TEXT ·Casuintptr(SB), NOSPLIT, $0-25
+ JMP ·Cas64(SB)
+
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+ JMP ·Cas(SB)
+
+TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
+ JMP ·Load64(SB)
+
+TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
+ JMP ·Load64(SB)
+
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+ JMP ·Xadd64(SB)
+
+TEXT ·Loadint64(SB), NOSPLIT, $0-16
+ JMP ·Load64(SB)
+
+TEXT ·Xaddint64(SB), NOSPLIT, $0-24
+ JMP ·Xadd64(SB)
+
+// bool casp(void **val, void *old, void *new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+ JMP runtime∕internal∕atomic·Cas64(SB)
+
+// uint32 xadd(uint32 volatile *ptr, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+ MOVV ptr+0(FP), R4
+ MOVW delta+8(FP), R5
+ DBAR
+ LL (R4), R6
+ ADDU R6, R5, R7
+ MOVV R7, R6
+ SC R7, (R4)
+ BEQ R7, -4(PC)
+ MOVW R6, ret+16(FP)
+ DBAR
+ RET
+
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+ MOVV ptr+0(FP), R4
+ MOVV delta+8(FP), R5
+ DBAR
+ LLV (R4), R6
+ ADDVU R6, R5, R7
+ MOVV R7, R6
+ SCV R7, (R4)
+ BEQ R7, -4(PC)
+ MOVV R6, ret+16(FP)
+ DBAR
+ RET
+
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+ MOVV ptr+0(FP), R4
+ MOVW new+8(FP), R5
+
+ DBAR
+ MOVV R5, R6
+ LL (R4), R7
+ SC R6, (R4)
+ BEQ R6, -3(PC)
+ MOVW R7, ret+16(FP)
+ DBAR
+ RET
+
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+ MOVV ptr+0(FP), R4
+ MOVV new+8(FP), R5
+
+ DBAR
+ MOVV R5, R6
+ LLV (R4), R7
+ SCV R6, (R4)
+ BEQ R6, -3(PC)
+ MOVV R7, ret+16(FP)
+ DBAR
+ RET
+
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+ JMP ·Xchg64(SB)
+
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-12
+ JMP ·Store(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·Store(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R4
+ MOVW val+8(FP), R5
+ DBAR
+ MOVW R5, 0(R4)
+ DBAR
+ RET
+
+TEXT ·Store8(SB), NOSPLIT, $0-9
+ MOVV ptr+0(FP), R4
+ MOVB val+8(FP), R5
+ DBAR
+ MOVB R5, 0(R4)
+ DBAR
+ RET
+
+TEXT ·Store64(SB), NOSPLIT, $0-16
+ MOVV ptr+0(FP), R4
+ MOVV val+8(FP), R5
+ DBAR
+ MOVV R5, 0(R4)
+ DBAR
+ RET
+
+// void Or8(byte volatile*, byte);
+TEXT ·Or8(SB), NOSPLIT, $0-9
+ MOVV ptr+0(FP), R4
+ MOVBU val+8(FP), R5
+ // Align ptr down to 4 bytes so we can use 32-bit load/store.
+ MOVV $~3, R6
+ AND R4, R6
+ // R7 = ((ptr & 3) * 8)
+ AND $3, R4, R7
+ SLLV $3, R7
+ // Shift val for aligned ptr. R5 = val << R4
+ SLLV R7, R5
+
+ DBAR
+ LL (R6), R7
+ OR R5, R7
+ SC R7, (R6)
+ BEQ R7, -4(PC)
+ DBAR
+ RET
+
+// void And8(byte volatile*, byte);
+TEXT ·And8(SB), NOSPLIT, $0-9
+ MOVV ptr+0(FP), R4
+ MOVBU val+8(FP), R5
+ // Align ptr down to 4 bytes so we can use 32-bit load/store.
+ MOVV $~3, R6
+ AND R4, R6
+ // R7 = ((ptr & 3) * 8)
+ AND $3, R4, R7
+ SLLV $3, R7
+ // Shift val for aligned ptr. R5 = val << R7 | ^(0xFF << R7)
+ MOVV $0xFF, R8
+ SLLV R7, R5
+ SLLV R7, R8
+ NOR R0, R8
+ OR R8, R5
+
+ DBAR
+ LL (R6), R7
+ AND R5, R7
+ SC R7, (R6)
+ BEQ R7, -4(PC)
+ DBAR
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R4
+ MOVW val+8(FP), R5
+ DBAR
+ LL (R4), R6
+ OR R5, R6
+ SC R6, (R4)
+ BEQ R6, -4(PC)
+ DBAR
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R4
+ MOVW val+8(FP), R5
+ DBAR
+ LL (R4), R6
+ AND R5, R6
+ SC R6, (R4)
+ BEQ R6, -4(PC)
+ DBAR
+ RET
+
+// uint32 runtime∕internal∕atomic·Load(uint32 volatile* ptr)
+TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
+ MOVV ptr+0(FP), R19
+ DBAR
+ MOVWU 0(R19), R19
+ DBAR
+ MOVW R19, ret+8(FP)
+ RET
+
+// uint8 runtime∕internal∕atomic·Load8(uint8 volatile* ptr)
+TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
+ MOVV ptr+0(FP), R19
+ DBAR
+ MOVBU 0(R19), R19
+ DBAR
+ MOVB R19, ret+8(FP)
+ RET
+
+// uint64 runtime∕internal∕atomic·Load64(uint64 volatile* ptr)
+TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
+ MOVV ptr+0(FP), R19
+ DBAR
+ MOVV 0(R19), R19
+ DBAR
+ MOVV R19, ret+8(FP)
+ RET
+
+// void *runtime∕internal∕atomic·Loadp(void *volatile *ptr)
+TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16
+ MOVV ptr+0(FP), R19
+ DBAR
+ MOVV 0(R19), R19
+ DBAR
+ MOVV R19, ret+8(FP)
+ RET
+
+// uint32 runtime∕internal∕atomic·LoadAcq(uint32 volatile* ptr)
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
+ JMP atomic·Load(SB)
+
+// uintptr ·LoadAcquintptr(uintptr volatile* ptr)
+TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
+ JMP atomic·Load64(SB)
+
diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go
new file mode 100644
index 0000000..1e12b83
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_mips64x.go
@@ -0,0 +1,89 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+
+package atomic
+
+import "unsafe"
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Load(ptr *uint32) uint32
+
+//go:noescape
+func Load8(ptr *uint8) uint8
+
+//go:noescape
+func Load64(ptr *uint64) uint64
+
+// NO go:noescape annotation; *ptr escapes if result escapes (#31525)
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
+//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
diff --git a/src/runtime/internal/atomic/atomic_mips64x.s b/src/runtime/internal/atomic/atomic_mips64x.s
new file mode 100644
index 0000000..b4411d8
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_mips64x.s
@@ -0,0 +1,359 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+
+#include "textflag.h"
+
+#define SYNC WORD $0xf
+
+// bool cas(uint32 *ptr, uint32 old, uint32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Cas(SB), NOSPLIT, $0-17
+ MOVV ptr+0(FP), R1
+ MOVW old+8(FP), R2
+ MOVW new+12(FP), R5
+ SYNC
+cas_again:
+ MOVV R5, R3
+ LL (R1), R4
+ BNE R2, R4, cas_fail
+ SC R3, (R1)
+ BEQ R3, cas_again
+ MOVV $1, R1
+ MOVB R1, ret+16(FP)
+ SYNC
+ RET
+cas_fail:
+ MOVV $0, R1
+ JMP -4(PC)
+
+// bool cas64(uint64 *ptr, uint64 old, uint64 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+ MOVV ptr+0(FP), R1
+ MOVV old+8(FP), R2
+ MOVV new+16(FP), R5
+ SYNC
+cas64_again:
+ MOVV R5, R3
+ LLV (R1), R4
+ BNE R2, R4, cas64_fail
+ SCV R3, (R1)
+ BEQ R3, cas64_again
+ MOVV $1, R1
+ MOVB R1, ret+24(FP)
+ SYNC
+ RET
+cas64_fail:
+ MOVV $0, R1
+ JMP -4(PC)
+
+TEXT ·Casint32(SB), NOSPLIT, $0-17
+ JMP ·Cas(SB)
+
+TEXT ·Casint64(SB), NOSPLIT, $0-25
+ JMP ·Cas64(SB)
+
+TEXT ·Casuintptr(SB), NOSPLIT, $0-25
+ JMP ·Cas64(SB)
+
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+ JMP ·Cas(SB)
+
+TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
+ JMP ·Load64(SB)
+
+TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
+ JMP ·Load64(SB)
+
+TEXT ·Storeint32(SB), NOSPLIT, $0-12
+ JMP ·Store(SB)
+
+TEXT ·Storeint64(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+ JMP ·Xadd64(SB)
+
+TEXT ·Loadint32(SB), NOSPLIT, $0-12
+ JMP ·Load(SB)
+
+TEXT ·Loadint64(SB), NOSPLIT, $0-16
+ JMP ·Load64(SB)
+
+TEXT ·Xaddint32(SB), NOSPLIT, $0-20
+ JMP ·Xadd(SB)
+
+TEXT ·Xaddint64(SB), NOSPLIT, $0-24
+ JMP ·Xadd64(SB)
+
+// bool casp(void **val, void *old, void *new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+ JMP ·Cas64(SB)
+
+// uint32 xadd(uint32 volatile *ptr, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+ MOVV ptr+0(FP), R2
+ MOVW delta+8(FP), R3
+ SYNC
+ LL (R2), R1
+ ADDU R1, R3, R4
+ MOVV R4, R1
+ SC R4, (R2)
+ BEQ R4, -4(PC)
+ MOVW R1, ret+16(FP)
+ SYNC
+ RET
+
+// uint64 Xadd64(uint64 volatile *ptr, int64 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+ MOVV ptr+0(FP), R2
+ MOVV delta+8(FP), R3
+ SYNC
+ LLV (R2), R1
+ ADDVU R1, R3, R4
+ MOVV R4, R1
+ SCV R4, (R2)
+ BEQ R4, -4(PC)
+ MOVV R1, ret+16(FP)
+ SYNC
+ RET
+
+// uint32 Xchg(ptr *uint32, new uint32)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+ MOVV ptr+0(FP), R2
+ MOVW new+8(FP), R5
+
+ SYNC
+ MOVV R5, R3
+ LL (R2), R1
+ SC R3, (R2)
+ BEQ R3, -3(PC)
+ MOVW R1, ret+16(FP)
+ SYNC
+ RET
+
+// uint64 Xchg64(ptr *uint64, new uint64)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+ MOVV ptr+0(FP), R2
+ MOVV new+8(FP), R5
+
+ SYNC
+ MOVV R5, R3
+ LLV (R2), R1
+ SCV R3, (R2)
+ BEQ R3, -3(PC)
+ MOVV R1, ret+16(FP)
+ SYNC
+ RET
+
+TEXT ·Xchgint32(SB), NOSPLIT, $0-20
+ JMP ·Xchg(SB)
+
+TEXT ·Xchgint64(SB), NOSPLIT, $0-24
+ JMP ·Xchg64(SB)
+
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+ JMP ·Xchg64(SB)
+
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-12
+ JMP ·Store(SB)
+
+TEXT ·StoreRel64(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·Store(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+ SYNC
+ MOVW R2, 0(R1)
+ SYNC
+ RET
+
+TEXT ·Store8(SB), NOSPLIT, $0-9
+ MOVV ptr+0(FP), R1
+ MOVB val+8(FP), R2
+ SYNC
+ MOVB R2, 0(R1)
+ SYNC
+ RET
+
+TEXT ·Store64(SB), NOSPLIT, $0-16
+ MOVV ptr+0(FP), R1
+ MOVV val+8(FP), R2
+ SYNC
+ MOVV R2, 0(R1)
+ SYNC
+ RET
+
+// void Or8(byte volatile*, byte);
+TEXT ·Or8(SB), NOSPLIT, $0-9
+ MOVV ptr+0(FP), R1
+ MOVBU val+8(FP), R2
+ // Align ptr down to 4 bytes so we can use 32-bit load/store.
+ MOVV $~3, R3
+ AND R1, R3
+ // Compute val shift.
+#ifdef GOARCH_mips64
+ // Big endian. ptr = ptr ^ 3
+ XOR $3, R1
+#endif
+ // R4 = ((ptr & 3) * 8)
+ AND $3, R1, R4
+ SLLV $3, R4
+ // Shift val for aligned ptr. R2 = val << R4
+ SLLV R4, R2
+
+ SYNC
+ LL (R3), R4
+ OR R2, R4
+ SC R4, (R3)
+ BEQ R4, -4(PC)
+ SYNC
+ RET
+
+// void And8(byte volatile*, byte);
+TEXT ·And8(SB), NOSPLIT, $0-9
+ MOVV ptr+0(FP), R1
+ MOVBU val+8(FP), R2
+ // Align ptr down to 4 bytes so we can use 32-bit load/store.
+ MOVV $~3, R3
+ AND R1, R3
+ // Compute val shift.
+#ifdef GOARCH_mips64
+ // Big endian. ptr = ptr ^ 3
+ XOR $3, R1
+#endif
+ // R4 = ((ptr & 3) * 8)
+ AND $3, R1, R4
+ SLLV $3, R4
+ // Shift val for aligned ptr. R2 = val << R4 | ^(0xFF << R4)
+ MOVV $0xFF, R5
+ SLLV R4, R2
+ SLLV R4, R5
+ NOR R0, R5
+ OR R5, R2
+
+ SYNC
+ LL (R3), R4
+ AND R2, R4
+ SC R4, (R3)
+ BEQ R4, -4(PC)
+ SYNC
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+
+ SYNC
+ LL (R1), R3
+ OR R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+
+ SYNC
+ LL (R1), R3
+ AND R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// uint32 ·Load(uint32 volatile* ptr)
+TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
+ MOVV ptr+0(FP), R1
+ SYNC
+ MOVWU 0(R1), R1
+ SYNC
+ MOVW R1, ret+8(FP)
+ RET
+
+// uint8 ·Load8(uint8 volatile* ptr)
+TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
+ MOVV ptr+0(FP), R1
+ SYNC
+ MOVBU 0(R1), R1
+ SYNC
+ MOVB R1, ret+8(FP)
+ RET
+
+// uint64 ·Load64(uint64 volatile* ptr)
+TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
+ MOVV ptr+0(FP), R1
+ SYNC
+ MOVV 0(R1), R1
+ SYNC
+ MOVV R1, ret+8(FP)
+ RET
+
+// void *·Loadp(void *volatile *ptr)
+TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16
+ MOVV ptr+0(FP), R1
+ SYNC
+ MOVV 0(R1), R1
+ SYNC
+ MOVV R1, ret+8(FP)
+ RET
+
+// uint32 ·LoadAcq(uint32 volatile* ptr)
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
+ JMP atomic·Load(SB)
+
+// uint64 ·LoadAcq64(uint64 volatile* ptr)
+TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16
+ JMP atomic·Load64(SB)
+
+// uintptr ·LoadAcquintptr(uintptr volatile* ptr)
+TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
+ JMP atomic·Load64(SB)
diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go
new file mode 100644
index 0000000..5dd15a0
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_mipsx.go
@@ -0,0 +1,167 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+
+// Export some functions via linkname to assembly in sync/atomic.
+//
+//go:linkname Xadd64
+//go:linkname Xchg64
+//go:linkname Cas64
+//go:linkname Load64
+//go:linkname Store64
+
+package atomic
+
+import (
+ "internal/cpu"
+ "unsafe"
+)
+
+// TODO implement lock striping
+var lock struct {
+ state uint32
+ pad [cpu.CacheLinePadSize - 4]byte
+}
+
+//go:noescape
+func spinLock(state *uint32)
+
+//go:noescape
+func spinUnlock(state *uint32)
+
+//go:nosplit
+func lockAndCheck(addr *uint64) {
+ // ensure 8-byte alignment
+ if uintptr(unsafe.Pointer(addr))&7 != 0 {
+ panicUnaligned()
+ }
+ // force dereference before taking lock
+ _ = *addr
+
+ spinLock(&lock.state)
+}
+
+//go:nosplit
+func unlock() {
+ spinUnlock(&lock.state)
+}
+
+//go:nosplit
+func unlockNoFence() {
+ lock.state = 0
+}
+
+//go:nosplit
+func Xadd64(addr *uint64, delta int64) (new uint64) {
+ lockAndCheck(addr)
+
+ new = *addr + uint64(delta)
+ *addr = new
+
+ unlock()
+ return
+}
+
+//go:nosplit
+func Xchg64(addr *uint64, new uint64) (old uint64) {
+ lockAndCheck(addr)
+
+ old = *addr
+ *addr = new
+
+ unlock()
+ return
+}
+
+//go:nosplit
+func Cas64(addr *uint64, old, new uint64) (swapped bool) {
+ lockAndCheck(addr)
+
+ if (*addr) == old {
+ *addr = new
+ unlock()
+ return true
+ }
+
+ unlockNoFence()
+ return false
+}
+
+//go:nosplit
+func Load64(addr *uint64) (val uint64) {
+ lockAndCheck(addr)
+
+ val = *addr
+
+ unlock()
+ return
+}
+
+//go:nosplit
+func Store64(addr *uint64, val uint64) {
+ lockAndCheck(addr)
+
+ *addr = val
+
+ unlock()
+ return
+}
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Load(ptr *uint32) uint32
+
+//go:noescape
+func Load8(ptr *uint8) uint8
+
+// NO go:noescape annotation; *ptr escapes if result escapes (#31525)
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
+
+//go:noescape
+func CasRel(addr *uint32, old, new uint32) bool
diff --git a/src/runtime/internal/atomic/atomic_mipsx.s b/src/runtime/internal/atomic/atomic_mipsx.s
new file mode 100644
index 0000000..390e9ce
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_mipsx.s
@@ -0,0 +1,261 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+
+#include "textflag.h"
+
+// bool Cas(int32 *val, int32 old, int32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Cas(SB),NOSPLIT,$0-13
+ MOVW ptr+0(FP), R1
+ MOVW old+4(FP), R2
+ MOVW new+8(FP), R5
+ SYNC
+try_cas:
+ MOVW R5, R3
+ LL (R1), R4 // R4 = *R1
+ BNE R2, R4, cas_fail
+ SC R3, (R1) // *R1 = R3
+ BEQ R3, try_cas
+ SYNC
+ MOVB R3, ret+12(FP)
+ RET
+cas_fail:
+ MOVB R0, ret+12(FP)
+ RET
+
+TEXT ·Store(SB),NOSPLIT,$0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+ SYNC
+ MOVW R2, 0(R1)
+ SYNC
+ RET
+
+TEXT ·Store8(SB),NOSPLIT,$0-5
+ MOVW ptr+0(FP), R1
+ MOVB val+4(FP), R2
+ SYNC
+ MOVB R2, 0(R1)
+ SYNC
+ RET
+
+TEXT ·Load(SB),NOSPLIT,$0-8
+ MOVW ptr+0(FP), R1
+ SYNC
+ MOVW 0(R1), R1
+ SYNC
+ MOVW R1, ret+4(FP)
+ RET
+
+TEXT ·Load8(SB),NOSPLIT,$0-5
+ MOVW ptr+0(FP), R1
+ SYNC
+ MOVB 0(R1), R1
+ SYNC
+ MOVB R1, ret+4(FP)
+ RET
+
+// uint32 Xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd(SB),NOSPLIT,$0-12
+ MOVW ptr+0(FP), R2
+ MOVW delta+4(FP), R3
+ SYNC
+try_xadd:
+ LL (R2), R1 // R1 = *R2
+ ADDU R1, R3, R4
+ MOVW R4, R1
+ SC R4, (R2) // *R2 = R4
+ BEQ R4, try_xadd
+ SYNC
+ MOVW R1, ret+8(FP)
+ RET
+
+// uint32 Xchg(ptr *uint32, new uint32)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg(SB),NOSPLIT,$0-12
+ MOVW ptr+0(FP), R2
+ MOVW new+4(FP), R5
+ SYNC
+try_xchg:
+ MOVW R5, R3
+ LL (R2), R1 // R1 = *R2
+ SC R3, (R2) // *R2 = R3
+ BEQ R3, try_xchg
+ SYNC
+ MOVW R1, ret+8(FP)
+ RET
+
+TEXT ·Casint32(SB),NOSPLIT,$0-13
+ JMP ·Cas(SB)
+
+TEXT ·Casint64(SB),NOSPLIT,$0-21
+ JMP ·Cas64(SB)
+
+TEXT ·Casuintptr(SB),NOSPLIT,$0-13
+ JMP ·Cas(SB)
+
+TEXT ·CasRel(SB),NOSPLIT,$0-13
+ JMP ·Cas(SB)
+
+TEXT ·Loaduintptr(SB),NOSPLIT,$0-8
+ JMP ·Load(SB)
+
+TEXT ·Loaduint(SB),NOSPLIT,$0-8
+ JMP ·Load(SB)
+
+TEXT ·Loadp(SB),NOSPLIT,$-0-8
+ JMP ·Load(SB)
+
+TEXT ·Storeint32(SB),NOSPLIT,$0-8
+ JMP ·Store(SB)
+
+TEXT ·Storeint64(SB),NOSPLIT,$0-12
+ JMP ·Store64(SB)
+
+TEXT ·Storeuintptr(SB),NOSPLIT,$0-8
+ JMP ·Store(SB)
+
+TEXT ·Xadduintptr(SB),NOSPLIT,$0-12
+ JMP ·Xadd(SB)
+
+TEXT ·Loadint32(SB),NOSPLIT,$0-8
+ JMP ·Load(SB)
+
+TEXT ·Loadint64(SB),NOSPLIT,$0-12
+ JMP ·Load64(SB)
+
+TEXT ·Xaddint32(SB),NOSPLIT,$0-12
+ JMP ·Xadd(SB)
+
+TEXT ·Xaddint64(SB),NOSPLIT,$0-20
+ JMP ·Xadd64(SB)
+
+TEXT ·Casp1(SB),NOSPLIT,$0-13
+ JMP ·Cas(SB)
+
+TEXT ·Xchgint32(SB),NOSPLIT,$0-12
+ JMP ·Xchg(SB)
+
+TEXT ·Xchgint64(SB),NOSPLIT,$0-20
+ JMP ·Xchg64(SB)
+
+TEXT ·Xchguintptr(SB),NOSPLIT,$0-12
+ JMP ·Xchg(SB)
+
+TEXT ·StorepNoWB(SB),NOSPLIT,$0-8
+ JMP ·Store(SB)
+
+TEXT ·StoreRel(SB),NOSPLIT,$0-8
+ JMP ·Store(SB)
+
+TEXT ·StoreReluintptr(SB),NOSPLIT,$0-8
+ JMP ·Store(SB)
+
+// void Or8(byte volatile*, byte);
+TEXT ·Or8(SB),NOSPLIT,$0-5
+ MOVW ptr+0(FP), R1
+ MOVBU val+4(FP), R2
+ MOVW $~3, R3 // Align ptr down to 4 bytes so we can use 32-bit load/store.
+ AND R1, R3
+#ifdef GOARCH_mips
+ // Big endian. ptr = ptr ^ 3
+ XOR $3, R1
+#endif
+ AND $3, R1, R4 // R4 = ((ptr & 3) * 8)
+ SLL $3, R4
+ SLL R4, R2, R2 // Shift val for aligned ptr. R2 = val << R4
+ SYNC
+try_or8:
+ LL (R3), R4 // R4 = *R3
+ OR R2, R4
+ SC R4, (R3) // *R3 = R4
+ BEQ R4, try_or8
+ SYNC
+ RET
+
+// void And8(byte volatile*, byte);
+TEXT ·And8(SB),NOSPLIT,$0-5
+ MOVW ptr+0(FP), R1
+ MOVBU val+4(FP), R2
+ MOVW $~3, R3
+ AND R1, R3
+#ifdef GOARCH_mips
+ // Big endian. ptr = ptr ^ 3
+ XOR $3, R1
+#endif
+ AND $3, R1, R4 // R4 = ((ptr & 3) * 8)
+ SLL $3, R4
+ MOVW $0xFF, R5
+ SLL R4, R2
+ SLL R4, R5
+ NOR R0, R5
+ OR R5, R2 // Shift val for aligned ptr. R2 = val << R4 | ^(0xFF << R4)
+ SYNC
+try_and8:
+ LL (R3), R4 // R4 = *R3
+ AND R2, R4
+ SC R4, (R3) // *R3 = R4
+ BEQ R4, try_and8
+ SYNC
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+
+ SYNC
+ LL (R1), R3
+ OR R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+
+ SYNC
+ LL (R1), R3
+ AND R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+TEXT ·spinLock(SB),NOSPLIT,$0-4
+ MOVW state+0(FP), R1
+ MOVW $1, R2
+ SYNC
+try_lock:
+ MOVW R2, R3
+check_again:
+ LL (R1), R4
+ BNE R4, check_again
+ SC R3, (R1)
+ BEQ R3, try_lock
+ SYNC
+ RET
+
+TEXT ·spinUnlock(SB),NOSPLIT,$0-4
+ MOVW state+0(FP), R1
+ SYNC
+ MOVW R0, (R1)
+ SYNC
+ RET
diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go
new file mode 100644
index 0000000..998d16e
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_ppc64x.go
@@ -0,0 +1,89 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+
+package atomic
+
+import "unsafe"
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Load(ptr *uint32) uint32
+
+//go:noescape
+func Load8(ptr *uint8) uint8
+
+//go:noescape
+func Load64(ptr *uint64) uint64
+
+// NO go:noescape annotation; *ptr escapes if result escapes (#31525)
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
+//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_ppc64x.s b/src/runtime/internal/atomic/atomic_ppc64x.s
new file mode 100644
index 0000000..04f0ead
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_ppc64x.s
@@ -0,0 +1,362 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+
+#include "textflag.h"
+
+// For more details about how various memory models are
+// enforced on POWER, the following paper provides more
+// details about how they enforce C/C++ like models. This
+// gives context about why the strange looking code
+// sequences below work.
+//
+// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+
+// uint32 ·Load(uint32 volatile* ptr)
+TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
+ MOVD ptr+0(FP), R3
+ SYNC
+ MOVWZ 0(R3), R3
+ CMPW R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7,0x4
+ ISYNC
+ MOVW R3, ret+8(FP)
+ RET
+
+// uint8 ·Load8(uint8 volatile* ptr)
+TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
+ MOVD ptr+0(FP), R3
+ SYNC
+ MOVBZ 0(R3), R3
+ CMP R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7,0x4
+ ISYNC
+ MOVB R3, ret+8(FP)
+ RET
+
+// uint64 ·Load64(uint64 volatile* ptr)
+TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
+ MOVD ptr+0(FP), R3
+ SYNC
+ MOVD 0(R3), R3
+ CMP R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7,0x4
+ ISYNC
+ MOVD R3, ret+8(FP)
+ RET
+
+// void *·Loadp(void *volatile *ptr)
+TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
+ MOVD ptr+0(FP), R3
+ SYNC
+ MOVD 0(R3), R3
+ CMP R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7,0x4
+ ISYNC
+ MOVD R3, ret+8(FP)
+ RET
+
+// uint32 ·LoadAcq(uint32 volatile* ptr)
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
+ MOVD ptr+0(FP), R3
+ MOVWZ 0(R3), R3
+ CMPW R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7, 0x4
+ ISYNC
+ MOVW R3, ret+8(FP)
+ RET
+
+// uint64 ·LoadAcq64(uint64 volatile* ptr)
+TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
+ MOVD ptr+0(FP), R3
+ MOVD 0(R3), R3
+ CMP R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7, 0x4
+ ISYNC
+ MOVD R3, ret+8(FP)
+ RET
+
+// bool cas(uint32 *ptr, uint32 old, uint32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Cas(SB), NOSPLIT, $0-17
+ MOVD ptr+0(FP), R3
+ MOVWZ old+8(FP), R4
+ MOVWZ new+12(FP), R5
+ LWSYNC
+cas_again:
+ LWAR (R3), R6
+ CMPW R6, R4
+ BNE cas_fail
+ STWCCC R5, (R3)
+ BNE cas_again
+ MOVD $1, R3
+ LWSYNC
+ MOVB R3, ret+16(FP)
+ RET
+cas_fail:
+ MOVB R0, ret+16(FP)
+ RET
+
+// bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+ MOVD ptr+0(FP), R3
+ MOVD old+8(FP), R4
+ MOVD new+16(FP), R5
+ LWSYNC
+cas64_again:
+ LDAR (R3), R6
+ CMP R6, R4
+ BNE cas64_fail
+ STDCCC R5, (R3)
+ BNE cas64_again
+ MOVD $1, R3
+ LWSYNC
+ MOVB R3, ret+24(FP)
+ RET
+cas64_fail:
+ MOVB R0, ret+24(FP)
+ RET
+
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+ MOVD ptr+0(FP), R3
+ MOVWZ old+8(FP), R4
+ MOVWZ new+12(FP), R5
+ LWSYNC
+cas_again:
+ LWAR (R3), $0, R6 // 0 = Mutex release hint
+ CMPW R6, R4
+ BNE cas_fail
+ STWCCC R5, (R3)
+ BNE cas_again
+ MOVD $1, R3
+ MOVB R3, ret+16(FP)
+ RET
+cas_fail:
+ MOVB R0, ret+16(FP)
+ RET
+
+TEXT ·Casint32(SB), NOSPLIT, $0-17
+ BR ·Cas(SB)
+
+TEXT ·Casint64(SB), NOSPLIT, $0-25
+ BR ·Cas64(SB)
+
+TEXT ·Casuintptr(SB), NOSPLIT, $0-25
+ BR ·Cas64(SB)
+
+TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
+ BR ·Load64(SB)
+
+TEXT ·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16
+ BR ·LoadAcq64(SB)
+
+TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
+ BR ·Load64(SB)
+
+TEXT ·Storeint32(SB), NOSPLIT, $0-12
+ BR ·Store(SB)
+
+TEXT ·Storeint64(SB), NOSPLIT, $0-16
+ BR ·Store64(SB)
+
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
+ BR ·Store64(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
+ BR ·StoreRel64(SB)
+
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+ BR ·Xadd64(SB)
+
+TEXT ·Loadint32(SB), NOSPLIT, $0-12
+ BR ·Load(SB)
+
+TEXT ·Loadint64(SB), NOSPLIT, $0-16
+ BR ·Load64(SB)
+
+TEXT ·Xaddint32(SB), NOSPLIT, $0-20
+ BR ·Xadd(SB)
+
+TEXT ·Xaddint64(SB), NOSPLIT, $0-24
+ BR ·Xadd64(SB)
+
+// bool casp(void **val, void *old, void *new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+ BR ·Cas64(SB)
+
+// uint32 xadd(uint32 volatile *ptr, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+ MOVD ptr+0(FP), R4
+ MOVW delta+8(FP), R5
+ LWSYNC
+ LWAR (R4), R3
+ ADD R5, R3
+ STWCCC R3, (R4)
+ BNE -3(PC)
+ MOVW R3, ret+16(FP)
+ RET
+
+// uint64 Xadd64(uint64 volatile *val, int64 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+ MOVD ptr+0(FP), R4
+ MOVD delta+8(FP), R5
+ LWSYNC
+ LDAR (R4), R3
+ ADD R5, R3
+ STDCCC R3, (R4)
+ BNE -3(PC)
+ MOVD R3, ret+16(FP)
+ RET
+
+// uint32 Xchg(ptr *uint32, new uint32)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+ MOVD ptr+0(FP), R4
+ MOVW new+8(FP), R5
+ LWSYNC
+ LWAR (R4), R3
+ STWCCC R5, (R4)
+ BNE -2(PC)
+ ISYNC
+ MOVW R3, ret+16(FP)
+ RET
+
+// uint64 Xchg64(ptr *uint64, new uint64)
+// Atomically:
+// old := *ptr;
+// *ptr = new;
+// return old;
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+ MOVD ptr+0(FP), R4
+ MOVD new+8(FP), R5
+ LWSYNC
+ LDAR (R4), R3
+ STDCCC R5, (R4)
+ BNE -2(PC)
+ ISYNC
+ MOVD R3, ret+16(FP)
+ RET
+
+TEXT ·Xchgint32(SB), NOSPLIT, $0-20
+ BR ·Xchg(SB)
+
+TEXT ·Xchgint64(SB), NOSPLIT, $0-24
+ BR ·Xchg64(SB)
+
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+ BR ·Xchg64(SB)
+
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
+ BR ·Store64(SB)
+
+TEXT ·Store(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ SYNC
+ MOVW R4, 0(R3)
+ RET
+
+TEXT ·Store8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R3
+ MOVB val+8(FP), R4
+ SYNC
+ MOVB R4, 0(R3)
+ RET
+
+TEXT ·Store64(SB), NOSPLIT, $0-16
+ MOVD ptr+0(FP), R3
+ MOVD val+8(FP), R4
+ SYNC
+ MOVD R4, 0(R3)
+ RET
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+ MOVW R4, 0(R3)
+ RET
+
+TEXT ·StoreRel64(SB), NOSPLIT, $0-16
+ MOVD ptr+0(FP), R3
+ MOVD val+8(FP), R4
+ LWSYNC
+ MOVD R4, 0(R3)
+ RET
+
+// void ·Or8(byte volatile*, byte);
+TEXT ·Or8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
+ LWSYNC
+again:
+ LBAR (R3), R6
+ OR R4, R6
+ STBCCC R6, (R3)
+ BNE again
+ RET
+
+// void ·And8(byte volatile*, byte);
+TEXT ·And8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
+ LWSYNC
+again:
+ LBAR (R3), R6
+ AND R4, R6
+ STBCCC R6, (R3)
+ BNE again
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+again:
+ LWAR (R3), R6
+ OR R4, R6
+ STWCCC R6, (R3)
+ BNE again
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+again:
+ LWAR (R3),R6
+ AND R4, R6
+ STWCCC R6, (R3)
+ BNE again
+ RET
diff --git a/src/runtime/internal/atomic/atomic_riscv64.go b/src/runtime/internal/atomic/atomic_riscv64.go
new file mode 100644
index 0000000..8f24d61
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_riscv64.go
@@ -0,0 +1,85 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+import "unsafe"
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Load(ptr *uint32) uint32
+
+//go:noescape
+func Load8(ptr *uint8) uint8
+
+//go:noescape
+func Load64(ptr *uint64) uint64
+
+// NO go:noescape annotation; *ptr escapes if result escapes (#31525)
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
+//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
diff --git a/src/runtime/internal/atomic/atomic_riscv64.s b/src/runtime/internal/atomic/atomic_riscv64.s
new file mode 100644
index 0000000..21d5adc
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_riscv64.s
@@ -0,0 +1,284 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// RISC-V's atomic operations have two bits, aq ("acquire") and rl ("release"),
+// which may be toggled on and off. Their precise semantics are defined in
+// section 6.3 of the specification, but the basic idea is as follows:
+//
+// - If neither aq nor rl is set, the CPU may reorder the atomic arbitrarily.
+// It guarantees only that it will execute atomically.
+//
+// - If aq is set, the CPU may move the instruction backward, but not forward.
+//
+// - If rl is set, the CPU may move the instruction forward, but not backward.
+//
+// - If both are set, the CPU may not reorder the instruction at all.
+//
+// These four modes correspond to other well-known memory models on other CPUs.
+// On ARM, aq corresponds to a dmb ishst, aq+rl corresponds to a dmb ish. On
+// Intel, aq corresponds to an lfence, rl to an sfence, and aq+rl to an mfence
+// (or a lock prefix).
+//
+// Go's memory model requires that
+// - if a read happens after a write, the read must observe the write, and
+// that
+// - if a read happens concurrently with a write, the read may observe the
+// write.
+// aq is sufficient to guarantee this, so that's what we use here. (This jibes
+// with ARM, which uses dmb ishst.)
+
+#include "textflag.h"
+
+// func Cas(ptr *uint64, old, new uint64) bool
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT ·Cas(SB), NOSPLIT, $0-17
+ MOV ptr+0(FP), A0
+ MOVW old+8(FP), A1
+ MOVW new+12(FP), A2
+cas_again:
+ LRW (A0), A3
+ BNE A3, A1, cas_fail
+ SCW A2, (A0), A4
+ BNE A4, ZERO, cas_again
+ MOV $1, A0
+ MOVB A0, ret+16(FP)
+ RET
+cas_fail:
+ MOV $0, A0
+ MOV A0, ret+16(FP)
+ RET
+
+// func Cas64(ptr *uint64, old, new uint64) bool
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+ MOV ptr+0(FP), A0
+ MOV old+8(FP), A1
+ MOV new+16(FP), A2
+cas_again:
+ LRD (A0), A3
+ BNE A3, A1, cas_fail
+ SCD A2, (A0), A4
+ BNE A4, ZERO, cas_again
+ MOV $1, A0
+ MOVB A0, ret+24(FP)
+ RET
+cas_fail:
+ MOVB ZERO, ret+24(FP)
+ RET
+
+// func Load(ptr *uint32) uint32
+TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
+ MOV ptr+0(FP), A0
+ LRW (A0), A0
+ MOVW A0, ret+8(FP)
+ RET
+
+// func Load8(ptr *uint8) uint8
+TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
+ MOV ptr+0(FP), A0
+ FENCE
+ MOVBU (A0), A1
+ FENCE
+ MOVB A1, ret+8(FP)
+ RET
+
+// func Load64(ptr *uint64) uint64
+TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
+ MOV ptr+0(FP), A0
+ LRD (A0), A0
+ MOV A0, ret+8(FP)
+ RET
+
+// func Store(ptr *uint32, val uint32)
+TEXT ·Store(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOSWAPW A1, (A0), ZERO
+ RET
+
+// func Store8(ptr *uint8, val uint8)
+TEXT ·Store8(SB), NOSPLIT, $0-9
+ MOV ptr+0(FP), A0
+ MOVBU val+8(FP), A1
+ FENCE
+ MOVB A1, (A0)
+ FENCE
+ RET
+
+// func Store64(ptr *uint64, val uint64)
+TEXT ·Store64(SB), NOSPLIT, $0-16
+ MOV ptr+0(FP), A0
+ MOV val+8(FP), A1
+ AMOSWAPD A1, (A0), ZERO
+ RET
+
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+ JMP ·Cas64(SB)
+
+TEXT ·Casint32(SB),NOSPLIT,$0-17
+ JMP ·Cas(SB)
+
+TEXT ·Casint64(SB),NOSPLIT,$0-25
+ JMP ·Cas64(SB)
+
+TEXT ·Casuintptr(SB),NOSPLIT,$0-25
+ JMP ·Cas64(SB)
+
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+ JMP ·Cas(SB)
+
+TEXT ·Loaduintptr(SB),NOSPLIT,$0-16
+ JMP ·Load64(SB)
+
+TEXT ·Storeint32(SB),NOSPLIT,$0-12
+ JMP ·Store(SB)
+
+TEXT ·Storeint64(SB),NOSPLIT,$0-16
+ JMP ·Store64(SB)
+
+TEXT ·Storeuintptr(SB),NOSPLIT,$0-16
+ JMP ·Store64(SB)
+
+TEXT ·Loaduint(SB),NOSPLIT,$0-16
+ JMP ·Loaduintptr(SB)
+
+TEXT ·Loadint32(SB),NOSPLIT,$0-12
+ JMP ·Load(SB)
+
+TEXT ·Loadint64(SB),NOSPLIT,$0-16
+ JMP ·Load64(SB)
+
+TEXT ·Xaddint32(SB),NOSPLIT,$0-20
+ JMP ·Xadd(SB)
+
+TEXT ·Xaddint64(SB),NOSPLIT,$0-24
+ MOV ptr+0(FP), A0
+ MOV delta+8(FP), A1
+ AMOADDD A1, (A0), A0
+ ADD A0, A1, A0
+ MOVW A0, ret+16(FP)
+ RET
+
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
+ JMP ·Load(SB)
+
+TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16
+ JMP ·Load64(SB)
+
+TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
+ JMP ·Load64(SB)
+
+// func Loadp(ptr unsafe.Pointer) unsafe.Pointer
+TEXT ·Loadp(SB),NOSPLIT,$0-16
+ JMP ·Load64(SB)
+
+// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-12
+ JMP ·Store(SB)
+
+TEXT ·StoreRel64(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+// func Xchg(ptr *uint32, new uint32) uint32
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+ MOV ptr+0(FP), A0
+ MOVW new+8(FP), A1
+ AMOSWAPW A1, (A0), A1
+ MOVW A1, ret+16(FP)
+ RET
+
+// func Xchg64(ptr *uint64, new uint64) uint64
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+ MOV ptr+0(FP), A0
+ MOV new+8(FP), A1
+ AMOSWAPD A1, (A0), A1
+ MOV A1, ret+16(FP)
+ RET
+
+// Atomically:
+// *val += delta;
+// return *val;
+
+// func Xadd(ptr *uint32, delta int32) uint32
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+ MOV ptr+0(FP), A0
+ MOVW delta+8(FP), A1
+ AMOADDW A1, (A0), A2
+ ADD A2,A1,A0
+ MOVW A0, ret+16(FP)
+ RET
+
+// func Xadd64(ptr *uint64, delta int64) uint64
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+ MOV ptr+0(FP), A0
+ MOV delta+8(FP), A1
+ AMOADDD A1, (A0), A2
+ ADD A2, A1, A0
+ MOV A0, ret+16(FP)
+ RET
+
+// func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+ JMP ·Xadd64(SB)
+
+// func Xchgint32(ptr *int32, new int32) int32
+TEXT ·Xchgint32(SB), NOSPLIT, $0-20
+ JMP ·Xchg(SB)
+
+// func Xchgint64(ptr *int64, new int64) int64
+TEXT ·Xchgint64(SB), NOSPLIT, $0-24
+ JMP ·Xchg64(SB)
+
+// func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+ JMP ·Xchg64(SB)
+
+// func And8(ptr *uint8, val uint8)
+TEXT ·And8(SB), NOSPLIT, $0-9
+ MOV ptr+0(FP), A0
+ MOVBU val+8(FP), A1
+ AND $3, A0, A2
+ AND $-4, A0
+ SLL $3, A2
+ XOR $255, A1
+ SLL A2, A1
+ XOR $-1, A1
+ AMOANDW A1, (A0), ZERO
+ RET
+
+// func Or8(ptr *uint8, val uint8)
+TEXT ·Or8(SB), NOSPLIT, $0-9
+ MOV ptr+0(FP), A0
+ MOVBU val+8(FP), A1
+ AND $3, A0, A2
+ AND $-4, A0
+ SLL $3, A2
+ SLL A2, A1
+ AMOORW A1, (A0), ZERO
+ RET
+
+// func And(ptr *uint32, val uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOANDW A1, (A0), ZERO
+ RET
+
+// func Or(ptr *uint32, val uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOORW A1, (A0), ZERO
+ RET
diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go
new file mode 100644
index 0000000..9855bf0
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_s390x.go
@@ -0,0 +1,123 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+import "unsafe"
+
+// Export some functions via linkname to assembly in sync/atomic.
+//
+//go:linkname Load
+//go:linkname Loadp
+//go:linkname Load64
+
+//go:nosplit
+//go:noinline
+func Load(ptr *uint32) uint32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
+ return *(*unsafe.Pointer)(ptr)
+}
+
+//go:nosplit
+//go:noinline
+func Load8(ptr *uint8) uint8 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Load64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcq(ptr *uint32) uint32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcq64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcquintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
+//go:noescape
+func Store(ptr *uint32, val uint32)
+
+//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
+func Store64(ptr *uint64, val uint64)
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:nosplit
+//go:noinline
+func StoreRel(ptr *uint32, val uint32) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func StoreRel64(ptr *uint64, val uint64) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func StoreReluintptr(ptr *uintptr, val uintptr) {
+ *ptr = val
+}
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
diff --git a/src/runtime/internal/atomic/atomic_s390x.s b/src/runtime/internal/atomic/atomic_s390x.s
new file mode 100644
index 0000000..a0c204b
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_s390x.s
@@ -0,0 +1,248 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Store(ptr *uint32, val uint32)
+TEXT ·Store(SB), NOSPLIT, $0
+ MOVD ptr+0(FP), R2
+ MOVWZ val+8(FP), R3
+ MOVW R3, 0(R2)
+ SYNC
+ RET
+
+// func Store8(ptr *uint8, val uint8)
+TEXT ·Store8(SB), NOSPLIT, $0
+ MOVD ptr+0(FP), R2
+ MOVB val+8(FP), R3
+ MOVB R3, 0(R2)
+ SYNC
+ RET
+
+// func Store64(ptr *uint64, val uint64)
+TEXT ·Store64(SB), NOSPLIT, $0
+ MOVD ptr+0(FP), R2
+ MOVD val+8(FP), R3
+ MOVD R3, 0(R2)
+ SYNC
+ RET
+
+// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+TEXT ·StorepNoWB(SB), NOSPLIT, $0
+ MOVD ptr+0(FP), R2
+ MOVD val+8(FP), R3
+ MOVD R3, 0(R2)
+ SYNC
+ RET
+
+// func Cas(ptr *uint32, old, new uint32) bool
+// Atomically:
+// if *ptr == old {
+// *val = new
+// return 1
+// } else {
+// return 0
+// }
+TEXT ·Cas(SB), NOSPLIT, $0-17
+ MOVD ptr+0(FP), R3
+ MOVWZ old+8(FP), R4
+ MOVWZ new+12(FP), R5
+ CS R4, R5, 0(R3) // if (R4 == 0(R3)) then 0(R3)= R5
+ BNE cas_fail
+ MOVB $1, ret+16(FP)
+ RET
+cas_fail:
+ MOVB $0, ret+16(FP)
+ RET
+
+// func Cas64(ptr *uint64, old, new uint64) bool
+// Atomically:
+// if *ptr == old {
+// *ptr = new
+// return 1
+// } else {
+// return 0
+// }
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+ MOVD ptr+0(FP), R3
+ MOVD old+8(FP), R4
+ MOVD new+16(FP), R5
+ CSG R4, R5, 0(R3) // if (R4 == 0(R3)) then 0(R3)= R5
+ BNE cas64_fail
+ MOVB $1, ret+24(FP)
+ RET
+cas64_fail:
+ MOVB $0, ret+24(FP)
+ RET
+
+// func Casint32(ptr *int32, old, new int32) bool
+TEXT ·Casint32(SB), NOSPLIT, $0-17
+ BR ·Cas(SB)
+
+// func Casint64(ptr *int64, old, new int64) bool
+TEXT ·Casint64(SB), NOSPLIT, $0-25
+ BR ·Cas64(SB)
+
+// func Casuintptr(ptr *uintptr, old, new uintptr) bool
+TEXT ·Casuintptr(SB), NOSPLIT, $0-25
+ BR ·Cas64(SB)
+
+// func CasRel(ptr *uint32, old, new uint32) bool
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+ BR ·Cas(SB)
+
+// func Loaduintptr(ptr *uintptr) uintptr
+TEXT ·Loaduintptr(SB), NOSPLIT, $0-16
+ BR ·Load64(SB)
+
+// func Loaduint(ptr *uint) uint
+TEXT ·Loaduint(SB), NOSPLIT, $0-16
+ BR ·Load64(SB)
+
+// func Storeint32(ptr *int32, new int32)
+TEXT ·Storeint32(SB), NOSPLIT, $0-12
+ BR ·Store(SB)
+
+// func Storeint64(ptr *int64, new int64)
+TEXT ·Storeint64(SB), NOSPLIT, $0-16
+ BR ·Store64(SB)
+
+// func Storeuintptr(ptr *uintptr, new uintptr)
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
+ BR ·Store64(SB)
+
+// func Loadint32(ptr *int32) int32
+TEXT ·Loadint32(SB), NOSPLIT, $0-12
+ BR ·Load(SB)
+
+// func Loadint64(ptr *int64) int64
+TEXT ·Loadint64(SB), NOSPLIT, $0-16
+ BR ·Load64(SB)
+
+// func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+ BR ·Xadd64(SB)
+
+// func Xaddint32(ptr *int32, delta int32) int32
+TEXT ·Xaddint32(SB), NOSPLIT, $0-20
+ BR ·Xadd(SB)
+
+// func Xaddint64(ptr *int64, delta int64) int64
+TEXT ·Xaddint64(SB), NOSPLIT, $0-24
+ BR ·Xadd64(SB)
+
+// func Casp1(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
+// Atomically:
+// if *ptr == old {
+// *ptr = new
+// return 1
+// } else {
+// return 0
+// }
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+ BR ·Cas64(SB)
+
+// func Xadd(ptr *uint32, delta int32) uint32
+// Atomically:
+// *ptr += delta
+// return *ptr
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+ MOVD ptr+0(FP), R4
+ MOVW delta+8(FP), R5
+ MOVW (R4), R3
+repeat:
+ ADD R5, R3, R6
+ CS R3, R6, (R4) // if R3==(R4) then (R4)=R6 else R3=(R4)
+ BNE repeat
+ MOVW R6, ret+16(FP)
+ RET
+
+// func Xadd64(ptr *uint64, delta int64) uint64
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+ MOVD ptr+0(FP), R4
+ MOVD delta+8(FP), R5
+ MOVD (R4), R3
+repeat:
+ ADD R5, R3, R6
+ CSG R3, R6, (R4) // if R3==(R4) then (R4)=R6 else R3=(R4)
+ BNE repeat
+ MOVD R6, ret+16(FP)
+ RET
+
+// func Xchg(ptr *uint32, new uint32) uint32
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+ MOVD ptr+0(FP), R4
+ MOVW new+8(FP), R3
+ MOVW (R4), R6
+repeat:
+ CS R6, R3, (R4) // if R6==(R4) then (R4)=R3 else R6=(R4)
+ BNE repeat
+ MOVW R6, ret+16(FP)
+ RET
+
+// func Xchg64(ptr *uint64, new uint64) uint64
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+ MOVD ptr+0(FP), R4
+ MOVD new+8(FP), R3
+ MOVD (R4), R6
+repeat:
+ CSG R6, R3, (R4) // if R6==(R4) then (R4)=R3 else R6=(R4)
+ BNE repeat
+ MOVD R6, ret+16(FP)
+ RET
+
+// func Xchgint32(ptr *int32, new int32) int32
+TEXT ·Xchgint32(SB), NOSPLIT, $0-20
+ BR ·Xchg(SB)
+
+// func Xchgint64(ptr *int64, new int64) int64
+TEXT ·Xchgint64(SB), NOSPLIT, $0-24
+ BR ·Xchg64(SB)
+
+// func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+ BR ·Xchg64(SB)
+
+// func Or8(addr *uint8, v uint8)
+TEXT ·Or8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
+ // We don't have atomic operations that work on individual bytes so we
+ // need to align addr down to a word boundary and create a mask
+ // containing v to OR with the entire word atomically.
+ MOVD $(3<<3), R5
+ RXSBG $59, $60, $3, R3, R5 // R5 = 24 - ((addr % 4) * 8) = ((addr & 3) << 3) ^ (3 << 3)
+ ANDW $~3, R3 // R3 = floor(addr, 4) = addr &^ 3
+ SLW R5, R4 // R4 = uint32(v) << R5
+ LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic)
+ RET
+
+// func And8(addr *uint8, v uint8)
+TEXT ·And8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
+ // We don't have atomic operations that work on individual bytes so we
+ // need to align addr down to a word boundary and create a mask
+ // containing v to AND with the entire word atomically.
+ ORW $~0xff, R4 // R4 = uint32(v) | 0xffffff00
+ MOVD $(3<<3), R5
+ RXSBG $59, $60, $3, R3, R5 // R5 = 24 - ((addr % 4) * 8) = ((addr & 3) << 3) ^ (3 << 3)
+ ANDW $~3, R3 // R3 = floor(addr, 4) = addr &^ 3
+ RLL R5, R4, R4 // R4 = rotl(R4, R5)
+ LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
+ RET
diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go
new file mode 100644
index 0000000..2427bfd
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_test.go
@@ -0,0 +1,386 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic_test
+
+import (
+ "internal/goarch"
+ "runtime"
+ "runtime/internal/atomic"
+ "testing"
+ "unsafe"
+)
+
+func runParallel(N, iter int, f func()) {
+ defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(int(N)))
+ done := make(chan bool)
+ for i := 0; i < N; i++ {
+ go func() {
+ for j := 0; j < iter; j++ {
+ f()
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < N; i++ {
+ <-done
+ }
+}
+
+func TestXadduintptr(t *testing.T) {
+ N := 20
+ iter := 100000
+ if testing.Short() {
+ N = 10
+ iter = 10000
+ }
+ inc := uintptr(100)
+ total := uintptr(0)
+ runParallel(N, iter, func() {
+ atomic.Xadduintptr(&total, inc)
+ })
+ if want := uintptr(N*iter) * inc; want != total {
+ t.Fatalf("xadduintpr error, want %d, got %d", want, total)
+ }
+ total = 0
+ runParallel(N, iter, func() {
+ atomic.Xadduintptr(&total, inc)
+ atomic.Xadduintptr(&total, uintptr(-int64(inc)))
+ })
+ if total != 0 {
+ t.Fatalf("xadduintpr total error, want %d, got %d", 0, total)
+ }
+}
+
+// Tests that xadduintptr correctly updates 64-bit values. The place where
+// we actually do so is mstats.go, functions mSysStat{Inc,Dec}.
+func TestXadduintptrOnUint64(t *testing.T) {
+ if goarch.BigEndian {
+ // On big endian architectures, we never use xadduintptr to update
+ // 64-bit values and hence we skip the test. (Note that functions
+ // mSysStat{Inc,Dec} in mstats.go have explicit checks for
+ // big-endianness.)
+ t.Skip("skip xadduintptr on big endian architecture")
+ }
+ const inc = 100
+ val := uint64(0)
+ atomic.Xadduintptr((*uintptr)(unsafe.Pointer(&val)), inc)
+ if inc != val {
+ t.Fatalf("xadduintptr should increase lower-order bits, want %d, got %d", inc, val)
+ }
+}
+
+func shouldPanic(t *testing.T, name string, f func()) {
+ defer func() {
+ // Check that all GC maps are sane.
+ runtime.GC()
+
+ err := recover()
+ want := "unaligned 64-bit atomic operation"
+ if err == nil {
+ t.Errorf("%s did not panic", name)
+ } else if s, _ := err.(string); s != want {
+ t.Errorf("%s: wanted panic %q, got %q", name, want, err)
+ }
+ }()
+ f()
+}
+
+// Variant of sync/atomic's TestUnaligned64:
+func TestUnaligned64(t *testing.T) {
+ // Unaligned 64-bit atomics on 32-bit systems are
+ // a continual source of pain. Test that on 32-bit systems they crash
+ // instead of failing silently.
+
+ if unsafe.Sizeof(int(0)) != 4 {
+ t.Skip("test only runs on 32-bit systems")
+ }
+
+ x := make([]uint32, 4)
+ u := unsafe.Pointer(uintptr(unsafe.Pointer(&x[0])) | 4) // force alignment to 4
+
+ up64 := (*uint64)(u) // misaligned
+ p64 := (*int64)(u) // misaligned
+
+ shouldPanic(t, "Load64", func() { atomic.Load64(up64) })
+ shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) })
+ shouldPanic(t, "Store64", func() { atomic.Store64(up64, 0) })
+ shouldPanic(t, "Xadd64", func() { atomic.Xadd64(up64, 1) })
+ shouldPanic(t, "Xchg64", func() { atomic.Xchg64(up64, 1) })
+ shouldPanic(t, "Cas64", func() { atomic.Cas64(up64, 1, 2) })
+}
+
+func TestAnd8(t *testing.T) {
+ // Basic sanity check.
+ x := uint8(0xff)
+ for i := uint8(0); i < 8; i++ {
+ atomic.And8(&x, ^(1 << i))
+ if r := uint8(0xff) << (i + 1); x != r {
+ t.Fatalf("clearing bit %#x: want %#x, got %#x", uint8(1<<i), r, x)
+ }
+ }
+
+ // Set every bit in array to 1.
+ a := make([]uint8, 1<<12)
+ for i := range a {
+ a[i] = 0xff
+ }
+
+ // Clear array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := ^uint8(1 << i)
+ go func() {
+ for i := range a {
+ atomic.And8(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint8(0), v)
+ }
+ }
+}
+
+func TestAnd(t *testing.T) {
+ // Basic sanity check.
+ x := uint32(0xffffffff)
+ for i := uint32(0); i < 32; i++ {
+ atomic.And(&x, ^(1 << i))
+ if r := uint32(0xffffffff) << (i + 1); x != r {
+ t.Fatalf("clearing bit %#x: want %#x, got %#x", uint32(1<<i), r, x)
+ }
+ }
+
+ // Set every bit in array to 1.
+ a := make([]uint32, 1<<12)
+ for i := range a {
+ a[i] = 0xffffffff
+ }
+
+ // Clear array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := ^uint32(1 << i)
+ go func() {
+ for i := range a {
+ atomic.And(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v)
+ }
+ }
+}
+
+func TestOr8(t *testing.T) {
+ // Basic sanity check.
+ x := uint8(0)
+ for i := uint8(0); i < 8; i++ {
+ atomic.Or8(&x, 1<<i)
+ if r := (uint8(1) << (i + 1)) - 1; x != r {
+ t.Fatalf("setting bit %#x: want %#x, got %#x", uint8(1)<<i, r, x)
+ }
+ }
+
+ // Start with every bit in array set to 0.
+ a := make([]uint8, 1<<12)
+
+ // Set every bit in array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := uint8(1 << i)
+ go func() {
+ for i := range a {
+ atomic.Or8(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally set.
+ for i, v := range a {
+ if v != 0xff {
+ t.Fatalf("a[%v] not fully set: want %#x, got %#x", i, uint8(0xff), v)
+ }
+ }
+}
+
+func TestOr(t *testing.T) {
+ // Basic sanity check.
+ x := uint32(0)
+ for i := uint32(0); i < 32; i++ {
+ atomic.Or(&x, 1<<i)
+ if r := (uint32(1) << (i + 1)) - 1; x != r {
+ t.Fatalf("setting bit %#x: want %#x, got %#x", uint32(1)<<i, r, x)
+ }
+ }
+
+ // Start with every bit in array set to 0.
+ a := make([]uint32, 1<<12)
+
+ // Set every bit in array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := uint32(1 << i)
+ go func() {
+ for i := range a {
+ atomic.Or(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally set.
+ for i, v := range a {
+ if v != 0xffffffff {
+ t.Fatalf("a[%v] not fully set: want %#x, got %#x", i, uint32(0xffffffff), v)
+ }
+ }
+}
+
+func TestBitwiseContended8(t *testing.T) {
+ // Start with every bit in array set to 0.
+ a := make([]uint8, 16)
+
+ // Iterations to try.
+ N := 1 << 16
+ if testing.Short() {
+ N = 1 << 10
+ }
+
+ // Set and then clear every bit in the array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := uint8(1 << i)
+ go func() {
+ for n := 0; n < N; n++ {
+ for i := range a {
+ atomic.Or8(&a[i], m)
+ if atomic.Load8(&a[i])&m != m {
+ t.Errorf("a[%v] bit %#x not set", i, m)
+ }
+ atomic.And8(&a[i], ^m)
+ if atomic.Load8(&a[i])&m != 0 {
+ t.Errorf("a[%v] bit %#x not clear", i, m)
+ }
+ }
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint8(0), v)
+ }
+ }
+}
+
+func TestBitwiseContended(t *testing.T) {
+ // Start with every bit in array set to 0.
+ a := make([]uint32, 16)
+
+ // Iterations to try.
+ N := 1 << 16
+ if testing.Short() {
+ N = 1 << 10
+ }
+
+ // Set and then clear every bit in the array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := uint32(1 << i)
+ go func() {
+ for n := 0; n < N; n++ {
+ for i := range a {
+ atomic.Or(&a[i], m)
+ if atomic.Load(&a[i])&m != m {
+ t.Errorf("a[%v] bit %#x not set", i, m)
+ }
+ atomic.And(&a[i], ^m)
+ if atomic.Load(&a[i])&m != 0 {
+ t.Errorf("a[%v] bit %#x not clear", i, m)
+ }
+ }
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v)
+ }
+ }
+}
+
+func TestCasRel(t *testing.T) {
+ const _magic = 0x5a5aa5a5
+ var x struct {
+ before uint32
+ i uint32
+ after uint32
+ o uint32
+ n uint32
+ }
+
+ x.before = _magic
+ x.after = _magic
+ for j := 0; j < 32; j += 1 {
+ x.i = (1 << j) + 0
+ x.o = (1 << j) + 0
+ x.n = (1 << j) + 1
+ if !atomic.CasRel(&x.i, x.o, x.n) {
+ t.Fatalf("should have swapped %#x %#x", x.o, x.n)
+ }
+
+ if x.i != x.n {
+ t.Fatalf("wrong x.i after swap: x.i=%#x x.n=%#x", x.i, x.n)
+ }
+
+ if x.before != _magic || x.after != _magic {
+ t.Fatalf("wrong magic: %#x _ %#x != %#x _ %#x", x.before, x.after, _magic, _magic)
+ }
+ }
+}
+
+func TestStorepNoWB(t *testing.T) {
+ var p [2]*int
+ for i := range p {
+ atomic.StorepNoWB(unsafe.Pointer(&p[i]), unsafe.Pointer(new(int)))
+ }
+ if p[0] == p[1] {
+ t.Error("Bad escape analysis of StorepNoWB")
+ }
+}
diff --git a/src/runtime/internal/atomic/atomic_wasm.go b/src/runtime/internal/atomic/atomic_wasm.go
new file mode 100644
index 0000000..835fc43
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_wasm.go
@@ -0,0 +1,341 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(neelance): implement with actual atomic operations as soon as threads are available
+// See https://github.com/WebAssembly/design/issues/1073
+
+// Export some functions via linkname to assembly in sync/atomic.
+//
+//go:linkname Load
+//go:linkname Loadp
+//go:linkname Load64
+//go:linkname Loadint32
+//go:linkname Loadint64
+//go:linkname Loaduintptr
+//go:linkname Xadd
+//go:linkname Xaddint32
+//go:linkname Xaddint64
+//go:linkname Xadd64
+//go:linkname Xadduintptr
+//go:linkname Xchg
+//go:linkname Xchg64
+//go:linkname Xchgint32
+//go:linkname Xchgint64
+//go:linkname Xchguintptr
+//go:linkname Cas
+//go:linkname Cas64
+//go:linkname Casint32
+//go:linkname Casint64
+//go:linkname Casuintptr
+//go:linkname Store
+//go:linkname Store64
+//go:linkname Storeint32
+//go:linkname Storeint64
+//go:linkname Storeuintptr
+
+package atomic
+
+import "unsafe"
+
+//go:nosplit
+//go:noinline
+func Load(ptr *uint32) uint32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
+ return *(*unsafe.Pointer)(ptr)
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcq(ptr *uint32) uint32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcq64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcquintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Load8(ptr *uint8) uint8 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Load64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Xadd(ptr *uint32, delta int32) uint32 {
+ new := *ptr + uint32(delta)
+ *ptr = new
+ return new
+}
+
+//go:nosplit
+//go:noinline
+func Xadd64(ptr *uint64, delta int64) uint64 {
+ new := *ptr + uint64(delta)
+ *ptr = new
+ return new
+}
+
+//go:nosplit
+//go:noinline
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr {
+ new := *ptr + delta
+ *ptr = new
+ return new
+}
+
+//go:nosplit
+//go:noinline
+func Xchg(ptr *uint32, new uint32) uint32 {
+ old := *ptr
+ *ptr = new
+ return old
+}
+
+//go:nosplit
+//go:noinline
+func Xchg64(ptr *uint64, new uint64) uint64 {
+ old := *ptr
+ *ptr = new
+ return old
+}
+
+//go:nosplit
+//go:noinline
+func Xchgint32(ptr *int32, new int32) int32 {
+ old := *ptr
+ *ptr = new
+ return old
+}
+
+//go:nosplit
+//go:noinline
+func Xchgint64(ptr *int64, new int64) int64 {
+ old := *ptr
+ *ptr = new
+ return old
+}
+
+//go:nosplit
+//go:noinline
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr {
+ old := *ptr
+ *ptr = new
+ return old
+}
+
+//go:nosplit
+//go:noinline
+func And8(ptr *uint8, val uint8) {
+ *ptr = *ptr & val
+}
+
+//go:nosplit
+//go:noinline
+func Or8(ptr *uint8, val uint8) {
+ *ptr = *ptr | val
+}
+
+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
+//go:nosplit
+//go:noinline
+func And(ptr *uint32, val uint32) {
+ *ptr = *ptr & val
+}
+
+//go:nosplit
+//go:noinline
+func Or(ptr *uint32, val uint32) {
+ *ptr = *ptr | val
+}
+
+//go:nosplit
+//go:noinline
+func Cas64(ptr *uint64, old, new uint64) bool {
+ if *ptr == old {
+ *ptr = new
+ return true
+ }
+ return false
+}
+
+//go:nosplit
+//go:noinline
+func Store(ptr *uint32, val uint32) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func StoreRel(ptr *uint32, val uint32) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func StoreRel64(ptr *uint64, val uint64) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func StoreReluintptr(ptr *uintptr, val uintptr) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func Store8(ptr *uint8, val uint8) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func Store64(ptr *uint64, val uint64) {
+ *ptr = val
+}
+
+// StorepNoWB performs *ptr = val atomically and without a write
+// barrier.
+//
+// NO go:noescape annotation; see atomic_pointer.go.
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:nosplit
+//go:noinline
+func Casint32(ptr *int32, old, new int32) bool {
+ if *ptr == old {
+ *ptr = new
+ return true
+ }
+ return false
+}
+
+//go:nosplit
+//go:noinline
+func Casint64(ptr *int64, old, new int64) bool {
+ if *ptr == old {
+ *ptr = new
+ return true
+ }
+ return false
+}
+
+//go:nosplit
+//go:noinline
+func Cas(ptr *uint32, old, new uint32) bool {
+ if *ptr == old {
+ *ptr = new
+ return true
+ }
+ return false
+}
+
+//go:nosplit
+//go:noinline
+func Casp1(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
+ if *ptr == old {
+ *ptr = new
+ return true
+ }
+ return false
+}
+
+//go:nosplit
+//go:noinline
+func Casuintptr(ptr *uintptr, old, new uintptr) bool {
+ if *ptr == old {
+ *ptr = new
+ return true
+ }
+ return false
+}
+
+//go:nosplit
+//go:noinline
+func CasRel(ptr *uint32, old, new uint32) bool {
+ if *ptr == old {
+ *ptr = new
+ return true
+ }
+ return false
+}
+
+//go:nosplit
+//go:noinline
+func Storeint32(ptr *int32, new int32) {
+ *ptr = new
+}
+
+//go:nosplit
+//go:noinline
+func Storeint64(ptr *int64, new int64) {
+ *ptr = new
+}
+
+//go:nosplit
+//go:noinline
+func Storeuintptr(ptr *uintptr, new uintptr) {
+ *ptr = new
+}
+
+//go:nosplit
+//go:noinline
+func Loaduintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Loaduint(ptr *uint) uint {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Loadint32(ptr *int32) int32 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Loadint64(ptr *int64) int64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Xaddint32(ptr *int32, delta int32) int32 {
+ new := *ptr + delta
+ *ptr = new
+ return new
+}
+
+//go:nosplit
+//go:noinline
+func Xaddint64(ptr *int64, delta int64) int64 {
+ new := *ptr + delta
+ *ptr = new
+ return new
+}
diff --git a/src/runtime/internal/atomic/atomic_wasm.s b/src/runtime/internal/atomic/atomic_wasm.s
new file mode 100644
index 0000000..1c2d1ce
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_wasm.s
@@ -0,0 +1,10 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
+ MOVD ptr+0(FP), R0
+ MOVD val+8(FP), 0(R0)
+ RET
diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go
new file mode 100644
index 0000000..efc0531
--- /dev/null
+++ b/src/runtime/internal/atomic/bench_test.go
@@ -0,0 +1,195 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic_test
+
+import (
+ "runtime/internal/atomic"
+ "testing"
+)
+
+var sink any
+
+func BenchmarkAtomicLoad64(b *testing.B) {
+ var x uint64
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ _ = atomic.Load64(&x)
+ }
+}
+
+func BenchmarkAtomicStore64(b *testing.B) {
+ var x uint64
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Store64(&x, 0)
+ }
+}
+
+func BenchmarkAtomicLoad(b *testing.B) {
+ var x uint32
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ _ = atomic.Load(&x)
+ }
+}
+
+func BenchmarkAtomicStore(b *testing.B) {
+ var x uint32
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Store(&x, 0)
+ }
+}
+
+func BenchmarkAnd8(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.And8(&x[255], uint8(i))
+ }
+}
+
+func BenchmarkAnd(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.And(&x[63], uint32(i))
+ }
+}
+
+func BenchmarkAnd8Parallel(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint8(0)
+ for pb.Next() {
+ atomic.And8(&x[255], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkAndParallel(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint32(0)
+ for pb.Next() {
+ atomic.And(&x[63], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkOr8(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Or8(&x[255], uint8(i))
+ }
+}
+
+func BenchmarkOr(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Or(&x[63], uint32(i))
+ }
+}
+
+func BenchmarkOr8Parallel(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint8(0)
+ for pb.Next() {
+ atomic.Or8(&x[255], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkOrParallel(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint32(0)
+ for pb.Next() {
+ atomic.Or(&x[63], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkXadd(b *testing.B) {
+ var x uint32
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ atomic.Xadd(ptr, 1)
+ }
+ })
+}
+
+func BenchmarkXadd64(b *testing.B) {
+ var x uint64
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ atomic.Xadd64(ptr, 1)
+ }
+ })
+}
+
+func BenchmarkCas(b *testing.B) {
+ var x uint32
+ x = 1
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ atomic.Cas(ptr, 1, 0)
+ atomic.Cas(ptr, 0, 1)
+ }
+ })
+}
+
+func BenchmarkCas64(b *testing.B) {
+ var x uint64
+ x = 1
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ atomic.Cas64(ptr, 1, 0)
+ atomic.Cas64(ptr, 0, 1)
+ }
+ })
+}
+func BenchmarkXchg(b *testing.B) {
+ var x uint32
+ x = 1
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ var y uint32
+ y = 1
+ for pb.Next() {
+ y = atomic.Xchg(ptr, y)
+ y += 1
+ }
+ })
+}
+
+func BenchmarkXchg64(b *testing.B) {
+ var x uint64
+ x = 1
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ var y uint64
+ y = 1
+ for pb.Next() {
+ y = atomic.Xchg64(ptr, y)
+ y += 1
+ }
+ })
+}
diff --git a/src/runtime/internal/atomic/doc.go b/src/runtime/internal/atomic/doc.go
new file mode 100644
index 0000000..08e6b6c
--- /dev/null
+++ b/src/runtime/internal/atomic/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package atomic provides atomic operations, independent of sync/atomic,
+to the runtime.
+
+On most platforms, the compiler is aware of the functions defined
+in this package, and they're replaced with platform-specific intrinsics.
+On other platforms, generic implementations are made available.
+
+Unless otherwise noted, operations defined in this package are sequentially
+consistent across threads with respect to the values they manipulate. More
+specifically, operations that happen in a specific order on one thread,
+will always be observed to happen in exactly that order by another thread.
+*/
+package atomic
diff --git a/src/runtime/internal/atomic/stubs.go b/src/runtime/internal/atomic/stubs.go
new file mode 100644
index 0000000..7df8d9c
--- /dev/null
+++ b/src/runtime/internal/atomic/stubs.go
@@ -0,0 +1,59 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !wasm
+
+package atomic
+
+import "unsafe"
+
+//go:noescape
+func Cas(ptr *uint32, old, new uint32) bool
+
+// NO go:noescape annotation; see atomic_pointer.go.
+func Casp1(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
+
+//go:noescape
+func Casint32(ptr *int32, old, new int32) bool
+
+//go:noescape
+func Casint64(ptr *int64, old, new int64) bool
+
+//go:noescape
+func Casuintptr(ptr *uintptr, old, new uintptr) bool
+
+//go:noescape
+func Storeint32(ptr *int32, new int32)
+
+//go:noescape
+func Storeint64(ptr *int64, new int64)
+
+//go:noescape
+func Storeuintptr(ptr *uintptr, new uintptr)
+
+//go:noescape
+func Loaduintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func Loaduint(ptr *uint) uint
+
+// TODO(matloob): Should these functions have the go:noescape annotation?
+
+//go:noescape
+func Loadint32(ptr *int32) int32
+
+//go:noescape
+func Loadint64(ptr *int64) int64
+
+//go:noescape
+func Xaddint32(ptr *int32, delta int32) int32
+
+//go:noescape
+func Xaddint64(ptr *int64, delta int64) int64
+
+//go:noescape
+func Xchgint32(ptr *int32, new int32) int32
+
+//go:noescape
+func Xchgint64(ptr *int64, new int64) int64
diff --git a/src/runtime/internal/atomic/sys_linux_arm.s b/src/runtime/internal/atomic/sys_linux_arm.s
new file mode 100644
index 0000000..0cc7fa7
--- /dev/null
+++ b/src/runtime/internal/atomic/sys_linux_arm.s
@@ -0,0 +1,144 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Linux/ARM atomic operations.
+
+// Because there is so much variation in ARM devices,
+// the Linux kernel provides an appropriate compare-and-swap
+// implementation at address 0xffff0fc0. Caller sets:
+// R0 = old value
+// R1 = new value
+// R2 = addr
+// LR = return address
+// The function returns with CS true if the swap happened.
+// http://lxr.linux.no/linux+v2.6.37.2/arch/arm/kernel/entry-armv.S#L850
+// On older kernels (before 2.6.24) the function can incorrectly
+// report a conflict, so we have to double-check the compare ourselves
+// and retry if necessary.
+//
+// https://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=b49c0f24cf6744a3f4fd09289fe7cade349dead5
+//
+TEXT cas<>(SB),NOSPLIT,$0
+ MOVW $0xffff0fc0, R15 // R15 is hardware PC.
+
+TEXT ·Cas(SB),NOSPLIT|NOFRAME,$0
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP ·armcas(SB)
+ JMP kernelcas<>(SB)
+
+TEXT kernelcas<>(SB),NOSPLIT,$0
+ MOVW ptr+0(FP), R2
+ // trigger potential paging fault here,
+ // because we don't know how to traceback through __kuser_cmpxchg
+ MOVW (R2), R0
+ MOVW old+4(FP), R0
+loop:
+ MOVW new+8(FP), R1
+ BL cas<>(SB)
+ BCC check
+ MOVW $1, R0
+ MOVB R0, ret+12(FP)
+ RET
+check:
+ // Kernel lies; double-check.
+ MOVW ptr+0(FP), R2
+ MOVW old+4(FP), R0
+ MOVW 0(R2), R3
+ CMP R0, R3
+ BEQ loop
+ MOVW $0, R0
+ MOVB R0, ret+12(FP)
+ RET
+
+// As for cas, memory barriers are complicated on ARM, but the kernel
+// provides a user helper. ARMv5 does not support SMP and has no
+// memory barrier instruction at all. ARMv6 added SMP support and has
+// a memory barrier, but it requires writing to a coprocessor
+// register. ARMv7 introduced the DMB instruction, but it's expensive
+// even on single-core devices. The kernel helper takes care of all of
+// this for us.
+
+// Use kernel helper version of memory_barrier, when compiled with GOARM < 7.
+TEXT memory_barrier<>(SB),NOSPLIT|NOFRAME,$0
+ MOVW $0xffff0fa0, R15 // R15 is hardware PC.
+
+TEXT ·Load(SB),NOSPLIT,$0-8
+ MOVW addr+0(FP), R0
+ MOVW (R0), R1
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BGE native_barrier
+ BL memory_barrier<>(SB)
+ B end
+native_barrier:
+ DMB MB_ISH
+end:
+ MOVW R1, ret+4(FP)
+ RET
+
+TEXT ·Store(SB),NOSPLIT,$0-8
+ MOVW addr+0(FP), R1
+ MOVW v+4(FP), R2
+
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
+ BGE native_barrier
+ BL memory_barrier<>(SB)
+ B store
+native_barrier:
+ DMB MB_ISH
+
+store:
+ MOVW R2, (R1)
+
+ CMP $7, R8
+ BGE native_barrier2
+ BL memory_barrier<>(SB)
+ RET
+native_barrier2:
+ DMB MB_ISH
+ RET
+
+TEXT ·Load8(SB),NOSPLIT,$0-5
+ MOVW addr+0(FP), R0
+ MOVB (R0), R1
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BGE native_barrier
+ BL memory_barrier<>(SB)
+ B end
+native_barrier:
+ DMB MB_ISH
+end:
+ MOVB R1, ret+4(FP)
+ RET
+
+TEXT ·Store8(SB),NOSPLIT,$0-5
+ MOVW addr+0(FP), R1
+ MOVB v+4(FP), R2
+
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
+ BGE native_barrier
+ BL memory_barrier<>(SB)
+ B store
+native_barrier:
+ DMB MB_ISH
+
+store:
+ MOVB R2, (R1)
+
+ CMP $7, R8
+ BGE native_barrier2
+ BL memory_barrier<>(SB)
+ RET
+native_barrier2:
+ DMB MB_ISH
+ RET
diff --git a/src/runtime/internal/atomic/sys_nonlinux_arm.s b/src/runtime/internal/atomic/sys_nonlinux_arm.s
new file mode 100644
index 0000000..b55bf90
--- /dev/null
+++ b/src/runtime/internal/atomic/sys_nonlinux_arm.s
@@ -0,0 +1,79 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux
+
+#include "textflag.h"
+
+// TODO(minux): this is only valid for ARMv6+
+// bool armcas(int32 *val, int32 old, int32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// }else
+// return 0;
+TEXT ·Cas(SB),NOSPLIT,$0
+ JMP ·armcas(SB)
+
+// Non-linux OSes support only single processor machines before ARMv7.
+// So we don't need memory barriers if goarm < 7. And we fail loud at
+// startup (runtime.checkgoarm) if it is a multi-processor but goarm < 7.
+
+TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-8
+ MOVW addr+0(FP), R0
+ MOVW (R0), R1
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ DMB MB_ISH
+
+ MOVW R1, ret+4(FP)
+ RET
+
+TEXT ·Store(SB),NOSPLIT,$0-8
+ MOVW addr+0(FP), R1
+ MOVW v+4(FP), R2
+
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
+ BLT 2(PC)
+ DMB MB_ISH
+
+ MOVW R2, (R1)
+
+ CMP $7, R8
+ BLT 2(PC)
+ DMB MB_ISH
+ RET
+
+TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-5
+ MOVW addr+0(FP), R0
+ MOVB (R0), R1
+
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ DMB MB_ISH
+
+ MOVB R1, ret+4(FP)
+ RET
+
+TEXT ·Store8(SB),NOSPLIT,$0-5
+ MOVW addr+0(FP), R1
+ MOVB v+4(FP), R2
+
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
+ BLT 2(PC)
+ DMB MB_ISH
+
+ MOVB R2, (R1)
+
+ CMP $7, R8
+ BLT 2(PC)
+ DMB MB_ISH
+ RET
+
diff --git a/src/runtime/internal/atomic/types.go b/src/runtime/internal/atomic/types.go
new file mode 100644
index 0000000..d346a76
--- /dev/null
+++ b/src/runtime/internal/atomic/types.go
@@ -0,0 +1,431 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+import "unsafe"
+
+// Int32 is an atomically accessed int32 value.
+//
+// An Int32 must not be copied.
+type Int32 struct {
+ noCopy noCopy
+ value int32
+}
+
+// Load accesses and returns the value atomically.
+func (i *Int32) Load() int32 {
+ return Loadint32(&i.value)
+}
+
+// Store updates the value atomically.
+func (i *Int32) Store(value int32) {
+ Storeint32(&i.value, value)
+}
+
+// CompareAndSwap atomically compares i's value with old,
+// and if they're equal, swaps i's value with new.
+//
+// Returns true if the operation succeeded.
+func (i *Int32) CompareAndSwap(old, new int32) bool {
+ return Casint32(&i.value, old, new)
+}
+
+// Swap replaces i's value with new, returning
+// i's value before the replacement.
+func (i *Int32) Swap(new int32) int32 {
+ return Xchgint32(&i.value, new)
+}
+
+// Add adds delta to i atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (i *Int32) Add(delta int32) int32 {
+ return Xaddint32(&i.value, delta)
+}
+
+// Int64 is an atomically accessed int64 value.
+//
+// 8-byte aligned on all platforms, unlike a regular int64.
+//
+// An Int64 must not be copied.
+type Int64 struct {
+ noCopy noCopy
+ _ align64
+ value int64
+}
+
+// Load accesses and returns the value atomically.
+func (i *Int64) Load() int64 {
+ return Loadint64(&i.value)
+}
+
+// Store updates the value atomically.
+func (i *Int64) Store(value int64) {
+ Storeint64(&i.value, value)
+}
+
+// CompareAndSwap atomically compares i's value with old,
+// and if they're equal, swaps i's value with new.
+//
+// Returns true if the operation succeeded.
+func (i *Int64) CompareAndSwap(old, new int64) bool {
+ return Casint64(&i.value, old, new)
+}
+
+// Swap replaces i's value with new, returning
+// i's value before the replacement.
+func (i *Int64) Swap(new int64) int64 {
+ return Xchgint64(&i.value, new)
+}
+
+// Add adds delta to i atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (i *Int64) Add(delta int64) int64 {
+ return Xaddint64(&i.value, delta)
+}
+
+// Uint8 is an atomically accessed uint8 value.
+//
+// A Uint8 must not be copied.
+type Uint8 struct {
+ noCopy noCopy
+ value uint8
+}
+
+// Load accesses and returns the value atomically.
+func (u *Uint8) Load() uint8 {
+ return Load8(&u.value)
+}
+
+// Store updates the value atomically.
+func (u *Uint8) Store(value uint8) {
+ Store8(&u.value, value)
+}
+
+// And takes value and performs a bit-wise
+// "and" operation with the value of u, storing
+// the result into u.
+//
+// The full process is performed atomically.
+func (u *Uint8) And(value uint8) {
+ And8(&u.value, value)
+}
+
+// Or takes value and performs a bit-wise
+// "or" operation with the value of u, storing
+// the result into u.
+//
+// The full process is performed atomically.
+func (u *Uint8) Or(value uint8) {
+ Or8(&u.value, value)
+}
+
+// Bool is an atomically accessed bool value.
+//
+// A Bool must not be copied.
+type Bool struct {
+ // Inherits noCopy from Uint8.
+ u Uint8
+}
+
+// Load accesses and returns the value atomically.
+func (b *Bool) Load() bool {
+ return b.u.Load() != 0
+}
+
+// Store updates the value atomically.
+func (b *Bool) Store(value bool) {
+ s := uint8(0)
+ if value {
+ s = 1
+ }
+ b.u.Store(s)
+}
+
+// Uint32 is an atomically accessed uint32 value.
+//
+// A Uint32 must not be copied.
+type Uint32 struct {
+ noCopy noCopy
+ value uint32
+}
+
+// Load accesses and returns the value atomically.
+func (u *Uint32) Load() uint32 {
+ return Load(&u.value)
+}
+
+// LoadAcquire is a partially unsynchronized version
+// of Load that relaxes ordering constraints. Other threads
+// may observe operations that precede this operation to
+// occur after it, but no operation that occurs after it
+// on this thread can be observed to occur before it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint32) LoadAcquire() uint32 {
+ return LoadAcq(&u.value)
+}
+
+// Store updates the value atomically.
+func (u *Uint32) Store(value uint32) {
+ Store(&u.value, value)
+}
+
+// StoreRelease is a partially unsynchronized version
+// of Store that relaxes ordering constraints. Other threads
+// may observe operations that occur after this operation to
+// precede it, but no operation that precedes it
+// on this thread can be observed to occur after it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint32) StoreRelease(value uint32) {
+ StoreRel(&u.value, value)
+}
+
+// CompareAndSwap atomically compares u's value with old,
+// and if they're equal, swaps u's value with new.
+//
+// Returns true if the operation succeeded.
+func (u *Uint32) CompareAndSwap(old, new uint32) bool {
+ return Cas(&u.value, old, new)
+}
+
+// CompareAndSwapRelease is a partially unsynchronized version
+// of Cas that relaxes ordering constraints. Other threads
+// may observe operations that occur after this operation to
+// precede it, but no operation that precedes it
+// on this thread can be observed to occur after it.
+//
+// Returns true if the operation succeeded.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint32) CompareAndSwapRelease(old, new uint32) bool {
+ return CasRel(&u.value, old, new)
+}
+
+// Swap replaces u's value with new, returning
+// u's value before the replacement.
+func (u *Uint32) Swap(value uint32) uint32 {
+ return Xchg(&u.value, value)
+}
+
+// And takes value and performs a bit-wise
+// "and" operation with the value of u, storing
+// the result into u.
+//
+// The full process is performed atomically.
+func (u *Uint32) And(value uint32) {
+ And(&u.value, value)
+}
+
+// Or takes value and performs a bit-wise
+// "or" operation with the value of u, storing
+// the result into u.
+//
+// The full process is performed atomically.
+func (u *Uint32) Or(value uint32) {
+ Or(&u.value, value)
+}
+
+// Add adds delta to u atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (u *Uint32) Add(delta int32) uint32 {
+ return Xadd(&u.value, delta)
+}
+
+// Uint64 is an atomically accessed uint64 value.
+//
+// 8-byte aligned on all platforms, unlike a regular uint64.
+//
+// A Uint64 must not be copied.
+type Uint64 struct {
+ noCopy noCopy
+ _ align64
+ value uint64
+}
+
+// Load accesses and returns the value atomically.
+func (u *Uint64) Load() uint64 {
+ return Load64(&u.value)
+}
+
+// Store updates the value atomically.
+func (u *Uint64) Store(value uint64) {
+ Store64(&u.value, value)
+}
+
+// CompareAndSwap atomically compares u's value with old,
+// and if they're equal, swaps u's value with new.
+//
+// Returns true if the operation succeeded.
+func (u *Uint64) CompareAndSwap(old, new uint64) bool {
+ return Cas64(&u.value, old, new)
+}
+
+// Swap replaces u's value with new, returning
+// u's value before the replacement.
+func (u *Uint64) Swap(value uint64) uint64 {
+ return Xchg64(&u.value, value)
+}
+
+// Add adds delta to u atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (u *Uint64) Add(delta int64) uint64 {
+ return Xadd64(&u.value, delta)
+}
+
+// Uintptr is an atomically accessed uintptr value.
+//
+// A Uintptr must not be copied.
+type Uintptr struct {
+ noCopy noCopy
+ value uintptr
+}
+
+// Load accesses and returns the value atomically.
+func (u *Uintptr) Load() uintptr {
+ return Loaduintptr(&u.value)
+}
+
+// LoadAcquire is a partially unsynchronized version
+// of Load that relaxes ordering constraints. Other threads
+// may observe operations that precede this operation to
+// occur after it, but no operation that occurs after it
+// on this thread can be observed to occur before it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uintptr) LoadAcquire() uintptr {
+ return LoadAcquintptr(&u.value)
+}
+
+// Store updates the value atomically.
+func (u *Uintptr) Store(value uintptr) {
+ Storeuintptr(&u.value, value)
+}
+
+// StoreRelease is a partially unsynchronized version
+// of Store that relaxes ordering constraints. Other threads
+// may observe operations that occur after this operation to
+// precede it, but no operation that precedes it
+// on this thread can be observed to occur after it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uintptr) StoreRelease(value uintptr) {
+ StoreReluintptr(&u.value, value)
+}
+
+// CompareAndSwap atomically compares u's value with old,
+// and if they're equal, swaps u's value with new.
+//
+// Returns true if the operation succeeded.
+func (u *Uintptr) CompareAndSwap(old, new uintptr) bool {
+ return Casuintptr(&u.value, old, new)
+}
+
+// Swap replaces u's value with new, returning
+// u's value before the replacement.
+func (u *Uintptr) Swap(value uintptr) uintptr {
+ return Xchguintptr(&u.value, value)
+}
+
+// Add adds delta to u atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (u *Uintptr) Add(delta uintptr) uintptr {
+ return Xadduintptr(&u.value, delta)
+}
+
+// Float64 is an atomically accessed float64 value.
+//
+// 8-byte aligned on all platforms, unlike a regular float64.
+//
+// A Float64 must not be copied.
+type Float64 struct {
+ // Inherits noCopy and align64 from Uint64.
+ u Uint64
+}
+
+// Load accesses and returns the value atomically.
+func (f *Float64) Load() float64 {
+ r := f.u.Load()
+ return *(*float64)(unsafe.Pointer(&r))
+}
+
+// Store updates the value atomically.
+func (f *Float64) Store(value float64) {
+ f.u.Store(*(*uint64)(unsafe.Pointer(&value)))
+}
+
+// UnsafePointer is an atomically accessed unsafe.Pointer value.
+//
+// Note that because of the atomicity guarantees, stores to values
+// of this type never trigger a write barrier, and the relevant
+// methods are suffixed with "NoWB" to indicate that explicitly.
+// As a result, this type should be used carefully, and sparingly,
+// mostly with values that do not live in the Go heap anyway.
+//
+// An UnsafePointer must not be copied.
+type UnsafePointer struct {
+ noCopy noCopy
+ value unsafe.Pointer
+}
+
+// Load accesses and returns the value atomically.
+func (u *UnsafePointer) Load() unsafe.Pointer {
+ return Loadp(unsafe.Pointer(&u.value))
+}
+
+// StoreNoWB updates the value atomically.
+//
+// WARNING: As the name implies this operation does *not*
+// perform a write barrier on value, and so this operation may
+// hide pointers from the GC. Use with care and sparingly.
+// It is safe to use with values not found in the Go heap.
+func (u *UnsafePointer) StoreNoWB(value unsafe.Pointer) {
+ StorepNoWB(unsafe.Pointer(&u.value), value)
+}
+
+// CompareAndSwapNoWB atomically (with respect to other methods)
+// compares u's value with old, and if they're equal,
+// swaps u's value with new.
+//
+// Returns true if the operation succeeded.
+//
+// WARNING: As the name implies this operation does *not*
+// perform a write barrier on value, and so this operation may
+// hide pointers from the GC. Use with care and sparingly.
+// It is safe to use with values not found in the Go heap.
+func (u *UnsafePointer) CompareAndSwapNoWB(old, new unsafe.Pointer) bool {
+ return Casp1(&u.value, old, new)
+}
+
+// noCopy may be embedded into structs which must not be copied
+// after the first use.
+//
+// See https://golang.org/issues/8005#issuecomment-190753527
+// for details.
+type noCopy struct{}
+
+// Lock is a no-op used by -copylocks checker from `go vet`.
+func (*noCopy) Lock() {}
+func (*noCopy) Unlock() {}
+
+// align64 may be added to structs that must be 64-bit aligned.
+// This struct is recognized by a special case in the compiler
+// and will not work if copied to any other package.
+type align64 struct{}
diff --git a/src/runtime/internal/atomic/types_64bit.go b/src/runtime/internal/atomic/types_64bit.go
new file mode 100644
index 0000000..43c1ba2
--- /dev/null
+++ b/src/runtime/internal/atomic/types_64bit.go
@@ -0,0 +1,29 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm
+
+package atomic
+
+// LoadAcquire is a partially unsynchronized version
+// of Load that relaxes ordering constraints. Other threads
+// may observe operations that precede this operation to
+// occur after it, but no operation that occurs after it
+// on this thread can be observed to occur before it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint64) LoadAcquire() uint64 {
+ return LoadAcq64(&u.value)
+}
+
+// StoreRelease is a partially unsynchronized version
+// of Store that relaxes ordering constraints. Other threads
+// may observe operations that occur after this operation to
+// precede it, but no operation that precedes it
+// on this thread can be observed to occur after it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint64) StoreRelease(value uint64) {
+ StoreRel64(&u.value, value)
+}
diff --git a/src/runtime/internal/atomic/unaligned.go b/src/runtime/internal/atomic/unaligned.go
new file mode 100644
index 0000000..a859de4
--- /dev/null
+++ b/src/runtime/internal/atomic/unaligned.go
@@ -0,0 +1,9 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+func panicUnaligned() {
+ panic("unaligned 64-bit atomic operation")
+}
diff --git a/src/runtime/internal/math/math.go b/src/runtime/internal/math/math.go
new file mode 100644
index 0000000..c3fac36
--- /dev/null
+++ b/src/runtime/internal/math/math.go
@@ -0,0 +1,40 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+import "internal/goarch"
+
+const MaxUintptr = ^uintptr(0)
+
+// MulUintptr returns a * b and whether the multiplication overflowed.
+// On supported platforms this is an intrinsic lowered by the compiler.
+func MulUintptr(a, b uintptr) (uintptr, bool) {
+ if a|b < 1<<(4*goarch.PtrSize) || a == 0 {
+ return a * b, false
+ }
+ overflow := b > MaxUintptr/a
+ return a * b, overflow
+}
+
+// Mul64 returns the 128-bit product of x and y: (hi, lo) = x * y
+// with the product bits' upper half returned in hi and the lower
+// half returned in lo.
+// This is a copy from math/bits.Mul64
+// On supported platforms this is an intrinsic lowered by the compiler.
+func Mul64(x, y uint64) (hi, lo uint64) {
+ const mask32 = 1<<32 - 1
+ x0 := x & mask32
+ x1 := x >> 32
+ y0 := y & mask32
+ y1 := y >> 32
+ w0 := x0 * y0
+ t := x1*y0 + w0>>32
+ w1 := t & mask32
+ w2 := t >> 32
+ w1 += x0 * y1
+ hi = x1*y1 + w2 + w1>>32
+ lo = x * y
+ return
+}
diff --git a/src/runtime/internal/math/math_test.go b/src/runtime/internal/math/math_test.go
new file mode 100644
index 0000000..303eb63
--- /dev/null
+++ b/src/runtime/internal/math/math_test.go
@@ -0,0 +1,79 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math_test
+
+import (
+ . "runtime/internal/math"
+ "testing"
+)
+
+const (
+ UintptrSize = 32 << (^uintptr(0) >> 63)
+)
+
+type mulUintptrTest struct {
+ a uintptr
+ b uintptr
+ overflow bool
+}
+
+var mulUintptrTests = []mulUintptrTest{
+ {0, 0, false},
+ {1000, 1000, false},
+ {MaxUintptr, 0, false},
+ {MaxUintptr, 1, false},
+ {MaxUintptr / 2, 2, false},
+ {MaxUintptr / 2, 3, true},
+ {MaxUintptr, 10, true},
+ {MaxUintptr, 100, true},
+ {MaxUintptr / 100, 100, false},
+ {MaxUintptr / 1000, 1001, true},
+ {1<<(UintptrSize/2) - 1, 1<<(UintptrSize/2) - 1, false},
+ {1 << (UintptrSize / 2), 1 << (UintptrSize / 2), true},
+ {MaxUintptr >> 32, MaxUintptr >> 32, false},
+ {MaxUintptr, MaxUintptr, true},
+}
+
+func TestMulUintptr(t *testing.T) {
+ for _, test := range mulUintptrTests {
+ a, b := test.a, test.b
+ for i := 0; i < 2; i++ {
+ mul, overflow := MulUintptr(a, b)
+ if mul != a*b || overflow != test.overflow {
+ t.Errorf("MulUintptr(%v, %v) = %v, %v want %v, %v",
+ a, b, mul, overflow, a*b, test.overflow)
+ }
+ a, b = b, a
+ }
+ }
+}
+
+var SinkUintptr uintptr
+var SinkBool bool
+
+var x, y uintptr
+
+func BenchmarkMulUintptr(b *testing.B) {
+ x, y = 1, 2
+ b.Run("small", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var overflow bool
+ SinkUintptr, overflow = MulUintptr(x, y)
+ if overflow {
+ SinkUintptr = 0
+ }
+ }
+ })
+ x, y = MaxUintptr, MaxUintptr-1
+ b.Run("large", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var overflow bool
+ SinkUintptr, overflow = MulUintptr(x, y)
+ if overflow {
+ SinkUintptr = 0
+ }
+ }
+ })
+}
diff --git a/src/runtime/internal/sys/consts.go b/src/runtime/internal/sys/consts.go
new file mode 100644
index 0000000..fffcf81
--- /dev/null
+++ b/src/runtime/internal/sys/consts.go
@@ -0,0 +1,34 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sys
+
+import (
+ "internal/goarch"
+ "internal/goos"
+)
+
+// AIX requires a larger stack for syscalls.
+const StackGuardMultiplier = StackGuardMultiplierDefault*(1-goos.IsAix) + 2*goos.IsAix
+
+// DefaultPhysPageSize is the default physical page size.
+const DefaultPhysPageSize = goarch.DefaultPhysPageSize
+
+// PCQuantum is the minimal unit for a program counter (1 on x86, 4 on most other systems).
+// The various PC tables record PC deltas pre-divided by PCQuantum.
+const PCQuantum = goarch.PCQuantum
+
+// Int64Align is the required alignment for a 64-bit integer (4 on 32-bit systems, 8 on 64-bit).
+const Int64Align = goarch.PtrSize
+
+// MinFrameSize is the size of the system-reserved words at the bottom
+// of a frame (just above the architectural stack pointer).
+// It is zero on x86 and PtrSize on most non-x86 (LR-based) systems.
+// On PowerPC it is larger, to cover three more reserved words:
+// the compiler word, the link editor word, and the TOC save word.
+const MinFrameSize = goarch.MinFrameSize
+
+// StackAlign is the required alignment of the SP register.
+// The stack must be at least word aligned, but some architectures require more.
+const StackAlign = goarch.StackAlign
diff --git a/src/runtime/internal/sys/intrinsics.go b/src/runtime/internal/sys/intrinsics.go
new file mode 100644
index 0000000..5af4901
--- /dev/null
+++ b/src/runtime/internal/sys/intrinsics.go
@@ -0,0 +1,91 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !386
+
+// TODO finish intrinsifying 386, deadcode the assembly, remove build tags, merge w/ intrinsics_common
+// TODO replace all uses of CtzXX with TrailingZerosXX; they are the same.
+
+package sys
+
+// Using techniques from http://supertech.csail.mit.edu/papers/debruijn.pdf
+
+const deBruijn64ctz = 0x0218a392cd3d5dbf
+
+var deBruijnIdx64ctz = [64]byte{
+ 0, 1, 2, 7, 3, 13, 8, 19,
+ 4, 25, 14, 28, 9, 34, 20, 40,
+ 5, 17, 26, 38, 15, 46, 29, 48,
+ 10, 31, 35, 54, 21, 50, 41, 57,
+ 63, 6, 12, 18, 24, 27, 33, 39,
+ 16, 37, 45, 47, 30, 53, 49, 56,
+ 62, 11, 23, 32, 36, 44, 52, 55,
+ 61, 22, 43, 51, 60, 42, 59, 58,
+}
+
+const deBruijn32ctz = 0x04653adf
+
+var deBruijnIdx32ctz = [32]byte{
+ 0, 1, 2, 6, 3, 11, 7, 16,
+ 4, 14, 12, 21, 8, 23, 17, 26,
+ 31, 5, 10, 15, 13, 20, 22, 25,
+ 30, 9, 19, 24, 29, 18, 28, 27,
+}
+
+// Ctz64 counts trailing (low-order) zeroes,
+// and if all are zero, then 64.
+func Ctz64(x uint64) int {
+ x &= -x // isolate low-order bit
+ y := x * deBruijn64ctz >> 58 // extract part of deBruijn sequence
+ i := int(deBruijnIdx64ctz[y]) // convert to bit index
+ z := int((x - 1) >> 57 & 64) // adjustment if zero
+ return i + z
+}
+
+// Ctz32 counts trailing (low-order) zeroes,
+// and if all are zero, then 32.
+func Ctz32(x uint32) int {
+ x &= -x // isolate low-order bit
+ y := x * deBruijn32ctz >> 27 // extract part of deBruijn sequence
+ i := int(deBruijnIdx32ctz[y]) // convert to bit index
+ z := int((x - 1) >> 26 & 32) // adjustment if zero
+ return i + z
+}
+
+// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
+func Ctz8(x uint8) int {
+ return int(ntz8tab[x])
+}
+
+// Bswap64 returns its input with byte order reversed
+// 0x0102030405060708 -> 0x0807060504030201
+func Bswap64(x uint64) uint64 {
+ c8 := uint64(0x00ff00ff00ff00ff)
+ a := x >> 8 & c8
+ b := (x & c8) << 8
+ x = a | b
+ c16 := uint64(0x0000ffff0000ffff)
+ a = x >> 16 & c16
+ b = (x & c16) << 16
+ x = a | b
+ c32 := uint64(0x00000000ffffffff)
+ a = x >> 32 & c32
+ b = (x & c32) << 32
+ x = a | b
+ return x
+}
+
+// Bswap32 returns its input with byte order reversed
+// 0x01020304 -> 0x04030201
+func Bswap32(x uint32) uint32 {
+ c8 := uint32(0x00ff00ff)
+ a := x >> 8 & c8
+ b := (x & c8) << 8
+ x = a | b
+ c16 := uint32(0x0000ffff)
+ a = x >> 16 & c16
+ b = (x & c16) << 16
+ x = a | b
+ return x
+}
diff --git a/src/runtime/internal/sys/intrinsics_386.s b/src/runtime/internal/sys/intrinsics_386.s
new file mode 100644
index 0000000..784b246
--- /dev/null
+++ b/src/runtime/internal/sys/intrinsics_386.s
@@ -0,0 +1,58 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT runtime∕internal∕sys·Ctz64(SB), NOSPLIT, $0-12
+ // Try low 32 bits.
+ MOVL x_lo+0(FP), AX
+ BSFL AX, AX
+ JZ tryhigh
+ MOVL AX, ret+8(FP)
+ RET
+
+tryhigh:
+ // Try high 32 bits.
+ MOVL x_hi+4(FP), AX
+ BSFL AX, AX
+ JZ none
+ ADDL $32, AX
+ MOVL AX, ret+8(FP)
+ RET
+
+none:
+ // No bits are set.
+ MOVL $64, ret+8(FP)
+ RET
+
+TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
+ MOVL x+0(FP), AX
+ BSFL AX, AX
+ JNZ 2(PC)
+ MOVL $32, AX
+ MOVL AX, ret+4(FP)
+ RET
+
+TEXT runtime∕internal∕sys·Ctz8(SB), NOSPLIT, $0-8
+ MOVBLZX x+0(FP), AX
+ BSFL AX, AX
+ JNZ 2(PC)
+ MOVL $8, AX
+ MOVL AX, ret+4(FP)
+ RET
+
+TEXT runtime∕internal∕sys·Bswap64(SB), NOSPLIT, $0-16
+ MOVL x_lo+0(FP), AX
+ MOVL x_hi+4(FP), BX
+ BSWAPL AX
+ BSWAPL BX
+ MOVL BX, ret_lo+8(FP)
+ MOVL AX, ret_hi+12(FP)
+ RET
+
+TEXT runtime∕internal∕sys·Bswap32(SB), NOSPLIT, $0-8
+ MOVL x+0(FP), AX
+ BSWAPL AX
+ MOVL AX, ret+4(FP)
+ RET
diff --git a/src/runtime/internal/sys/intrinsics_common.go b/src/runtime/internal/sys/intrinsics_common.go
new file mode 100644
index 0000000..48d9759
--- /dev/null
+++ b/src/runtime/internal/sys/intrinsics_common.go
@@ -0,0 +1,158 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sys
+
+// Copied from math/bits to avoid dependence.
+
+var len8tab = [256]uint8{
+ 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+}
+
+var ntz8tab = [256]uint8{
+ 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+}
+
+// len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len64(x uint64) (n int) {
+ if x >= 1<<32 {
+ x >>= 32
+ n = 32
+ }
+ if x >= 1<<16 {
+ x >>= 16
+ n += 16
+ }
+ if x >= 1<<8 {
+ x >>= 8
+ n += 8
+ }
+ return n + int(len8tab[x])
+}
+
+// --- OnesCount ---
+
+const m0 = 0x5555555555555555 // 01010101 ...
+const m1 = 0x3333333333333333 // 00110011 ...
+const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
+
+// OnesCount64 returns the number of one bits ("population count") in x.
+func OnesCount64(x uint64) int {
+ // Implementation: Parallel summing of adjacent bits.
+ // See "Hacker's Delight", Chap. 5: Counting Bits.
+ // The following pattern shows the general approach:
+ //
+ // x = x>>1&(m0&m) + x&(m0&m)
+ // x = x>>2&(m1&m) + x&(m1&m)
+ // x = x>>4&(m2&m) + x&(m2&m)
+ // x = x>>8&(m3&m) + x&(m3&m)
+ // x = x>>16&(m4&m) + x&(m4&m)
+ // x = x>>32&(m5&m) + x&(m5&m)
+ // return int(x)
+ //
+ // Masking (& operations) can be left away when there's no
+ // danger that a field's sum will carry over into the next
+ // field: Since the result cannot be > 64, 8 bits is enough
+ // and we can ignore the masks for the shifts by 8 and up.
+ // Per "Hacker's Delight", the first line can be simplified
+ // more, but it saves at best one instruction, so we leave
+ // it alone for clarity.
+ const m = 1<<64 - 1
+ x = x>>1&(m0&m) + x&(m0&m)
+ x = x>>2&(m1&m) + x&(m1&m)
+ x = (x>>4 + x) & (m2 & m)
+ x += x >> 8
+ x += x >> 16
+ x += x >> 32
+ return int(x) & (1<<7 - 1)
+}
+
+var deBruijn64tab = [64]byte{
+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+const deBruijn64 = 0x03f79d71b4ca8b09
+
+// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
+func TrailingZeros64(x uint64) int {
+ if x == 0 {
+ return 64
+ }
+ // If popcount is fast, replace code below with return popcount(^x & (x - 1)).
+ //
+ // x & -x leaves only the right-most bit set in the word. Let k be the
+ // index of that bit. Since only a single bit is set, the value is two
+ // to the power of k. Multiplying by a power of two is equivalent to
+ // left shifting, in this case by k bits. The de Bruijn (64 bit) constant
+ // is such that all six bit, consecutive substrings are distinct.
+ // Therefore, if we have a left shifted version of this constant we can
+ // find by how many bits it was shifted by looking at which six bit
+ // substring ended up at the top of the word.
+ // (Knuth, volume 4, section 7.3.1)
+ return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
+}
+
+// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
+func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
+
+// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
+func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
+
+// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
+func TrailingZeros8(x uint8) int {
+ return int(ntz8tab[x])
+}
+
+// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len8(x uint8) int {
+ return int(len8tab[x])
+}
+
+// Prefetch prefetches data from memory addr to cache
+//
+// AMD64: Produce PREFETCHT0 instruction
+//
+// ARM64: Produce PRFM instruction with PLDL1KEEP option
+func Prefetch(addr uintptr) {}
+
+// PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
+// That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
+//
+// AMD64: Produce PREFETCHNTA instruction
+//
+// ARM64: Produce PRFM instruction with PLDL1STRM option
+func PrefetchStreamed(addr uintptr) {}
diff --git a/src/runtime/internal/sys/intrinsics_stubs.go b/src/runtime/internal/sys/intrinsics_stubs.go
new file mode 100644
index 0000000..a020652
--- /dev/null
+++ b/src/runtime/internal/sys/intrinsics_stubs.go
@@ -0,0 +1,13 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386
+
+package sys
+
+func Ctz64(x uint64) int
+func Ctz32(x uint32) int
+func Ctz8(x uint8) int
+func Bswap64(x uint64) uint64
+func Bswap32(x uint32) uint32
diff --git a/src/runtime/internal/sys/intrinsics_test.go b/src/runtime/internal/sys/intrinsics_test.go
new file mode 100644
index 0000000..0444183
--- /dev/null
+++ b/src/runtime/internal/sys/intrinsics_test.go
@@ -0,0 +1,38 @@
+package sys_test
+
+import (
+ "runtime/internal/sys"
+ "testing"
+)
+
+func TestCtz64(t *testing.T) {
+ for i := 0; i <= 64; i++ {
+ x := uint64(5) << uint(i)
+ if got := sys.Ctz64(x); got != i {
+ t.Errorf("Ctz64(%d)=%d, want %d", x, got, i)
+ }
+ }
+}
+func TestCtz32(t *testing.T) {
+ for i := 0; i <= 32; i++ {
+ x := uint32(5) << uint(i)
+ if got := sys.Ctz32(x); got != i {
+ t.Errorf("Ctz32(%d)=%d, want %d", x, got, i)
+ }
+ }
+}
+
+func TestBswap64(t *testing.T) {
+ x := uint64(0x1122334455667788)
+ y := sys.Bswap64(x)
+ if y != 0x8877665544332211 {
+ t.Errorf("Bswap(%x)=%x, want 0x8877665544332211", x, y)
+ }
+}
+func TestBswap32(t *testing.T) {
+ x := uint32(0x11223344)
+ y := sys.Bswap32(x)
+ if y != 0x44332211 {
+ t.Errorf("Bswap(%x)=%x, want 0x44332211", x, y)
+ }
+}
diff --git a/src/runtime/internal/sys/sys.go b/src/runtime/internal/sys/sys.go
new file mode 100644
index 0000000..694101d
--- /dev/null
+++ b/src/runtime/internal/sys/sys.go
@@ -0,0 +1,7 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// package sys contains system- and configuration- and architecture-specific
+// constants used by the runtime.
+package sys
diff --git a/src/runtime/internal/syscall/asm_linux_386.s b/src/runtime/internal/syscall/asm_linux_386.s
new file mode 100644
index 0000000..15aae4d
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_386.s
@@ -0,0 +1,34 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// See ../sys_linux_386.s for the reason why we always use int 0x80
+// instead of the glibc-specific "CALL 0x10(GS)".
+#define INVOKE_SYSCALL INT $0x80
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+//
+// Syscall # in AX, args in BX CX DX SI DI BP, return in AX
+TEXT ·Syscall6(SB),NOSPLIT,$0-40
+ MOVL num+0(FP), AX // syscall entry
+ MOVL a1+4(FP), BX
+ MOVL a2+8(FP), CX
+ MOVL a3+12(FP), DX
+ MOVL a4+16(FP), SI
+ MOVL a5+20(FP), DI
+ MOVL a6+24(FP), BP
+ INVOKE_SYSCALL
+ CMPL AX, $0xfffff001
+ JLS ok
+ MOVL $-1, r1+28(FP)
+ MOVL $0, r2+32(FP)
+ NEGL AX
+ MOVL AX, errno+36(FP)
+ RET
+ok:
+ MOVL AX, r1+28(FP)
+ MOVL DX, r2+32(FP)
+ MOVL $0, errno+36(FP)
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_amd64.s b/src/runtime/internal/syscall/asm_linux_amd64.s
new file mode 100644
index 0000000..3740ef1
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_amd64.s
@@ -0,0 +1,47 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+//
+// We need to convert to the syscall ABI.
+//
+// arg | ABIInternal | Syscall
+// ---------------------------
+// num | AX | AX
+// a1 | BX | DI
+// a2 | CX | SI
+// a3 | DI | DX
+// a4 | SI | R10
+// a5 | R8 | R8
+// a6 | R9 | R9
+//
+// r1 | AX | AX
+// r2 | BX | DX
+// err | CX | part of AX
+//
+// Note that this differs from "standard" ABI convention, which would pass 4th
+// arg in CX, not R10.
+TEXT ·Syscall6<ABIInternal>(SB),NOSPLIT,$0
+ // a6 already in R9.
+ // a5 already in R8.
+ MOVQ SI, R10 // a4
+ MOVQ DI, DX // a3
+ MOVQ CX, SI // a2
+ MOVQ BX, DI // a1
+ // num already in AX.
+ SYSCALL
+ CMPQ AX, $0xfffffffffffff001
+ JLS ok
+ NEGQ AX
+ MOVQ AX, CX // errno
+ MOVQ $-1, AX // r1
+ MOVQ $0, BX // r2
+ RET
+ok:
+ // r1 already in AX.
+ MOVQ DX, BX // r2
+ MOVQ $0, CX // errno
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_arm.s b/src/runtime/internal/syscall/asm_linux_arm.s
new file mode 100644
index 0000000..dbf1826
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_arm.s
@@ -0,0 +1,32 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+TEXT ·Syscall6(SB),NOSPLIT,$0-40
+ MOVW num+0(FP), R7 // syscall entry
+ MOVW a1+4(FP), R0
+ MOVW a2+8(FP), R1
+ MOVW a3+12(FP), R2
+ MOVW a4+16(FP), R3
+ MOVW a5+20(FP), R4
+ MOVW a6+24(FP), R5
+ SWI $0
+ MOVW $0xfffff001, R6
+ CMP R6, R0
+ BLS ok
+ MOVW $-1, R1
+ MOVW R1, r1+28(FP)
+ MOVW $0, R2
+ MOVW R2, r2+32(FP)
+ RSB $0, R0, R0
+ MOVW R0, errno+36(FP)
+ RET
+ok:
+ MOVW R0, r1+28(FP)
+ MOVW R1, r2+32(FP)
+ MOVW $0, R0
+ MOVW R0, errno+36(FP)
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_arm64.s b/src/runtime/internal/syscall/asm_linux_arm64.s
new file mode 100644
index 0000000..83e862f
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_arm64.s
@@ -0,0 +1,29 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+TEXT ·Syscall6(SB),NOSPLIT,$0-80
+ MOVD num+0(FP), R8 // syscall entry
+ MOVD a1+8(FP), R0
+ MOVD a2+16(FP), R1
+ MOVD a3+24(FP), R2
+ MOVD a4+32(FP), R3
+ MOVD a5+40(FP), R4
+ MOVD a6+48(FP), R5
+ SVC
+ CMN $4095, R0
+ BCC ok
+ MOVD $-1, R4
+ MOVD R4, r1+56(FP)
+ MOVD ZR, r2+64(FP)
+ NEG R0, R0
+ MOVD R0, errno+72(FP)
+ RET
+ok:
+ MOVD R0, r1+56(FP)
+ MOVD R1, r2+64(FP)
+ MOVD ZR, errno+72(FP)
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_loong64.s b/src/runtime/internal/syscall/asm_linux_loong64.s
new file mode 100644
index 0000000..d6a33f9
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_loong64.s
@@ -0,0 +1,29 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+TEXT ·Syscall6(SB),NOSPLIT,$0-80
+ MOVV num+0(FP), R11 // syscall entry
+ MOVV a1+8(FP), R4
+ MOVV a2+16(FP), R5
+ MOVV a3+24(FP), R6
+ MOVV a4+32(FP), R7
+ MOVV a5+40(FP), R8
+ MOVV a6+48(FP), R9
+ SYSCALL
+ MOVW $-4096, R12
+ BGEU R12, R4, ok
+ MOVV $-1, R12
+ MOVV R12, r1+56(FP)
+ MOVV R0, r2+64(FP)
+ SUBVU R4, R0, R4
+ MOVV R4, errno+72(FP)
+ RET
+ok:
+ MOVV R4, r1+56(FP)
+ MOVV R0, r2+64(FP) // r2 is not used. Always set to 0.
+ MOVV R0, errno+72(FP)
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_mips64x.s b/src/runtime/internal/syscall/asm_linux_mips64x.s
new file mode 100644
index 0000000..0e88a2d
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_mips64x.s
@@ -0,0 +1,29 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (mips64 || mips64le)
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+TEXT ·Syscall6(SB),NOSPLIT,$0-80
+ MOVV num+0(FP), R2 // syscall entry
+ MOVV a1+8(FP), R4
+ MOVV a2+16(FP), R5
+ MOVV a3+24(FP), R6
+ MOVV a4+32(FP), R7
+ MOVV a5+40(FP), R8
+ MOVV a6+48(FP), R9
+ SYSCALL
+ BEQ R7, ok
+ MOVV $-1, R1
+ MOVV R1, r1+56(FP)
+ MOVV R0, r2+64(FP)
+ MOVV R2, errno+72(FP)
+ RET
+ok:
+ MOVV R2, r1+56(FP)
+ MOVV R3, r2+64(FP)
+ MOVV R0, errno+72(FP)
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_mipsx.s b/src/runtime/internal/syscall/asm_linux_mipsx.s
new file mode 100644
index 0000000..050029e
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_mipsx.s
@@ -0,0 +1,34 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (mips || mipsle)
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+//
+// The 5th and 6th arg go at sp+16, sp+20.
+// Note that frame size of 20 means that 24 bytes gets reserved on stack.
+TEXT ·Syscall6(SB),NOSPLIT,$20-40
+ MOVW num+0(FP), R2 // syscall entry
+ MOVW a1+4(FP), R4
+ MOVW a2+8(FP), R5
+ MOVW a3+12(FP), R6
+ MOVW a4+16(FP), R7
+ MOVW a5+20(FP), R8
+ MOVW a6+24(FP), R9
+ MOVW R8, 16(R29)
+ MOVW R9, 20(R29)
+ SYSCALL
+ BEQ R7, ok
+ MOVW $-1, R1
+ MOVW R1, r1+28(FP)
+ MOVW R0, r2+32(FP)
+ MOVW R2, errno+36(FP)
+ RET
+ok:
+ MOVW R2, r1+28(FP)
+ MOVW R3, r2+32(FP)
+ MOVW R0, errno+36(FP)
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_ppc64x.s b/src/runtime/internal/syscall/asm_linux_ppc64x.s
new file mode 100644
index 0000000..8cf8737
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_ppc64x.s
@@ -0,0 +1,28 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (ppc64 || ppc64le)
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+TEXT ·Syscall6(SB),NOSPLIT,$0-80
+ MOVD num+0(FP), R9 // syscall entry
+ MOVD a1+8(FP), R3
+ MOVD a2+16(FP), R4
+ MOVD a3+24(FP), R5
+ MOVD a4+32(FP), R6
+ MOVD a5+40(FP), R7
+ MOVD a6+48(FP), R8
+ SYSCALL R9
+ MOVD R0, r2+64(FP) // r2 is not used. Always set to 0.
+ BVC ok
+ MOVD $-1, R4
+ MOVD R4, r1+56(FP)
+ MOVD R3, errno+72(FP)
+ RET
+ok:
+ MOVD R3, r1+56(FP)
+ MOVD R0, errno+72(FP)
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_riscv64.s b/src/runtime/internal/syscall/asm_linux_riscv64.s
new file mode 100644
index 0000000..a8652fd
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_riscv64.s
@@ -0,0 +1,29 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+TEXT ·Syscall6(SB),NOSPLIT,$0-80
+ MOV num+0(FP), A7 // syscall entry
+ MOV a1+8(FP), A0
+ MOV a2+16(FP), A1
+ MOV a3+24(FP), A2
+ MOV a4+32(FP), A3
+ MOV a5+40(FP), A4
+ MOV a6+48(FP), A5
+ ECALL
+ MOV $-4096, T0
+ BLTU T0, A0, err
+ MOV A0, r1+56(FP)
+ MOV A1, r2+64(FP)
+ MOV ZERO, errno+72(FP)
+ RET
+err:
+ MOV $-1, T0
+ MOV T0, r1+56(FP)
+ MOV ZERO, r2+64(FP)
+ SUB A0, ZERO, A0
+ MOV A0, errno+72(FP)
+ RET
diff --git a/src/runtime/internal/syscall/asm_linux_s390x.s b/src/runtime/internal/syscall/asm_linux_s390x.s
new file mode 100644
index 0000000..1b27f29
--- /dev/null
+++ b/src/runtime/internal/syscall/asm_linux_s390x.s
@@ -0,0 +1,28 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+TEXT ·Syscall6(SB),NOSPLIT,$0-80
+ MOVD num+0(FP), R1 // syscall entry
+ MOVD a1+8(FP), R2
+ MOVD a2+16(FP), R3
+ MOVD a3+24(FP), R4
+ MOVD a4+32(FP), R5
+ MOVD a5+40(FP), R6
+ MOVD a6+48(FP), R7
+ SYSCALL
+ MOVD $0xfffffffffffff001, R8
+ CMPUBLT R2, R8, ok
+ MOVD $-1, r1+56(FP)
+ MOVD $0, r2+64(FP)
+ NEG R2, R2
+ MOVD R2, errno+72(FP)
+ RET
+ok:
+ MOVD R2, r1+56(FP)
+ MOVD R3, r2+64(FP)
+ MOVD $0, errno+72(FP)
+ RET
diff --git a/src/runtime/internal/syscall/syscall_linux.go b/src/runtime/internal/syscall/syscall_linux.go
new file mode 100644
index 0000000..7f268e8
--- /dev/null
+++ b/src/runtime/internal/syscall/syscall_linux.go
@@ -0,0 +1,39 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package syscall provides the syscall primitives required for the runtime.
+package syscall
+
+import (
+ _ "unsafe" // for go:linkname
+)
+
+// TODO(https://go.dev/issue/51087): This package is incomplete and currently
+// only contains very minimal support for Linux.
+
+// Syscall6 calls system call number 'num' with arguments a1-6.
+func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr)
+
+// syscall_RawSyscall6 is a push linkname to export Syscall6 as
+// syscall.RawSyscall6.
+//
+// //go:uintptrkeepalive because the uintptr argument may be converted pointers
+// that need to be kept alive in the caller (this is implied for Syscall6 since
+// it has no body).
+//
+// //go:nosplit because stack copying does not account for uintptrkeepalive, so
+// the stack must not grow. Stack copying cannot blindly assume that all
+// uintptr arguments are pointers, because some values may look like pointers,
+// but not really be pointers, and adjusting their value would break the call.
+//
+// This is a separate wrapper because we can't export one function as two
+// names. The assembly implementations name themselves Syscall6 would not be
+// affected by a linkname.
+//
+//go:uintptrkeepalive
+//go:nosplit
+//go:linkname syscall_RawSyscall6 syscall.RawSyscall6
+func syscall_RawSyscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr) {
+ return Syscall6(num, a1, a2, a3, a4, a5, a6)
+}