summaryrefslogtreecommitdiffstats
path: root/src/runtime/internal/atomic/atomic_386.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/internal/atomic/atomic_386.s')
-rw-r--r--src/runtime/internal/atomic/atomic_386.s285
1 files changed, 285 insertions, 0 deletions
diff --git a/src/runtime/internal/atomic/atomic_386.s b/src/runtime/internal/atomic/atomic_386.s
new file mode 100644
index 0000000..724d515
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_386.s
@@ -0,0 +1,285 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// bool Cas(int32 *val, int32 old, int32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// }else
+// return 0;
+TEXT ·Cas(SB), NOSPLIT, $0-13
+ MOVL ptr+0(FP), BX
+ MOVL old+4(FP), AX
+ MOVL new+8(FP), CX
+ LOCK
+ CMPXCHGL CX, 0(BX)
+ SETEQ ret+12(FP)
+ RET
+
+TEXT ·Casint32(SB), NOSPLIT, $0-13
+ JMP ·Cas(SB)
+
+TEXT ·Casint64(SB), NOSPLIT, $0-21
+ JMP ·Cas64(SB)
+
+TEXT ·Casuintptr(SB), NOSPLIT, $0-13
+ JMP ·Cas(SB)
+
+TEXT ·CasRel(SB), NOSPLIT, $0-13
+ JMP ·Cas(SB)
+
+TEXT ·Loaduintptr(SB), NOSPLIT, $0-8
+ JMP ·Load(SB)
+
+TEXT ·Loaduint(SB), NOSPLIT, $0-8
+ JMP ·Load(SB)
+
+TEXT ·Storeint32(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+TEXT ·Storeint64(SB), NOSPLIT, $0-12
+ JMP ·Store64(SB)
+
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-12
+ JMP ·Xadd(SB)
+
+TEXT ·Loadint32(SB), NOSPLIT, $0-8
+ JMP ·Load(SB)
+
+TEXT ·Loadint64(SB), NOSPLIT, $0-12
+ JMP ·Load64(SB)
+
+TEXT ·Xaddint32(SB), NOSPLIT, $0-12
+ JMP ·Xadd(SB)
+
+TEXT ·Xaddint64(SB), NOSPLIT, $0-20
+ JMP ·Xadd64(SB)
+
+// bool ·Cas64(uint64 *val, uint64 old, uint64 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT ·Cas64(SB), NOSPLIT, $0-21
+ NO_LOCAL_POINTERS
+ MOVL ptr+0(FP), BP
+ TESTL $7, BP
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ MOVL old_lo+4(FP), AX
+ MOVL old_hi+8(FP), DX
+ MOVL new_lo+12(FP), BX
+ MOVL new_hi+16(FP), CX
+ LOCK
+ CMPXCHG8B 0(BP)
+ SETEQ ret+20(FP)
+ RET
+
+// bool Casp1(void **p, void *old, void *new)
+// Atomically:
+// if(*p == old){
+// *p = new;
+// return 1;
+// }else
+// return 0;
+TEXT ·Casp1(SB), NOSPLIT, $0-13
+ MOVL ptr+0(FP), BX
+ MOVL old+4(FP), AX
+ MOVL new+8(FP), CX
+ LOCK
+ CMPXCHGL CX, 0(BX)
+ SETEQ ret+12(FP)
+ RET
+
+// uint32 Xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT ·Xadd(SB), NOSPLIT, $0-12
+ MOVL ptr+0(FP), BX
+ MOVL delta+4(FP), AX
+ MOVL AX, CX
+ LOCK
+ XADDL AX, 0(BX)
+ ADDL CX, AX
+ MOVL AX, ret+8(FP)
+ RET
+
+TEXT ·Xadd64(SB), NOSPLIT, $0-20
+ NO_LOCAL_POINTERS
+ // no XADDQ so use CMPXCHG8B loop
+ MOVL ptr+0(FP), BP
+ TESTL $7, BP
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ // DI:SI = delta
+ MOVL delta_lo+4(FP), SI
+ MOVL delta_hi+8(FP), DI
+ // DX:AX = *addr
+ MOVL 0(BP), AX
+ MOVL 4(BP), DX
+addloop:
+ // CX:BX = DX:AX (*addr) + DI:SI (delta)
+ MOVL AX, BX
+ MOVL DX, CX
+ ADDL SI, BX
+ ADCL DI, CX
+
+ // if *addr == DX:AX {
+ // *addr = CX:BX
+ // } else {
+ // DX:AX = *addr
+ // }
+ // all in one instruction
+ LOCK
+ CMPXCHG8B 0(BP)
+
+ JNZ addloop
+
+ // success
+ // return CX:BX
+ MOVL BX, ret_lo+12(FP)
+ MOVL CX, ret_hi+16(FP)
+ RET
+
+TEXT ·Xchg(SB), NOSPLIT, $0-12
+ MOVL ptr+0(FP), BX
+ MOVL new+4(FP), AX
+ XCHGL AX, 0(BX)
+ MOVL AX, ret+8(FP)
+ RET
+
+TEXT ·Xchgint32(SB), NOSPLIT, $0-12
+ JMP ·Xchg(SB)
+
+TEXT ·Xchgint64(SB), NOSPLIT, $0-20
+ JMP ·Xchg64(SB)
+
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-12
+ JMP ·Xchg(SB)
+
+TEXT ·Xchg64(SB),NOSPLIT,$0-20
+ NO_LOCAL_POINTERS
+ // no XCHGQ so use CMPXCHG8B loop
+ MOVL ptr+0(FP), BP
+ TESTL $7, BP
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ // CX:BX = new
+ MOVL new_lo+4(FP), BX
+ MOVL new_hi+8(FP), CX
+ // DX:AX = *addr
+ MOVL 0(BP), AX
+ MOVL 4(BP), DX
+swaploop:
+ // if *addr == DX:AX
+ // *addr = CX:BX
+ // else
+ // DX:AX = *addr
+ // all in one instruction
+ LOCK
+ CMPXCHG8B 0(BP)
+ JNZ swaploop
+
+ // success
+ // return DX:AX
+ MOVL AX, ret_lo+12(FP)
+ MOVL DX, ret_hi+16(FP)
+ RET
+
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), BX
+ MOVL val+4(FP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT ·Store(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), BX
+ MOVL val+4(FP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT ·StoreRel(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+// uint64 atomicload64(uint64 volatile* addr);
+TEXT ·Load64(SB), NOSPLIT, $0-12
+ NO_LOCAL_POINTERS
+ MOVL ptr+0(FP), AX
+ TESTL $7, AX
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ MOVQ (AX), M0
+ MOVQ M0, ret+4(FP)
+ EMMS
+ RET
+
+// void ·Store64(uint64 volatile* addr, uint64 v);
+TEXT ·Store64(SB), NOSPLIT, $0-12
+ NO_LOCAL_POINTERS
+ MOVL ptr+0(FP), AX
+ TESTL $7, AX
+ JZ 2(PC)
+ CALL ·panicUnaligned(SB)
+ // MOVQ and EMMS were introduced on the Pentium MMX.
+ MOVQ val+4(FP), M0
+ MOVQ M0, (AX)
+ EMMS
+ // This is essentially a no-op, but it provides required memory fencing.
+ // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
+ XORL AX, AX
+ LOCK
+ XADDL AX, (SP)
+ RET
+
+// void ·Or8(byte volatile*, byte);
+TEXT ·Or8(SB), NOSPLIT, $0-5
+ MOVL ptr+0(FP), AX
+ MOVB val+4(FP), BX
+ LOCK
+ ORB BX, (AX)
+ RET
+
+// void ·And8(byte volatile*, byte);
+TEXT ·And8(SB), NOSPLIT, $0-5
+ MOVL ptr+0(FP), AX
+ MOVB val+4(FP), BX
+ LOCK
+ ANDB BX, (AX)
+ RET
+
+TEXT ·Store8(SB), NOSPLIT, $0-5
+ MOVL ptr+0(FP), BX
+ MOVB val+4(FP), AX
+ XCHGB AX, 0(BX)
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET