summaryrefslogtreecommitdiffstats
path: root/src/runtime/race_arm64.s
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:19:13 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:19:13 +0000
commitccd992355df7192993c666236047820244914598 (patch)
treef00fea65147227b7743083c6148396f74cd66935 /src/runtime/race_arm64.s
parentInitial commit. (diff)
downloadgolang-1.21-ccd992355df7192993c666236047820244914598.tar.xz
golang-1.21-ccd992355df7192993c666236047820244914598.zip
Adding upstream version 1.21.8.upstream/1.21.8
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/runtime/race_arm64.s')
-rw-r--r--src/runtime/race_arm64.s498
1 files changed, 498 insertions, 0 deletions
diff --git a/src/runtime/race_arm64.s b/src/runtime/race_arm64.s
new file mode 100644
index 0000000..c818345
--- /dev/null
+++ b/src/runtime/race_arm64.s
@@ -0,0 +1,498 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build race
+
+#include "go_asm.h"
+#include "funcdata.h"
+#include "textflag.h"
+#include "tls_arm64.h"
+#include "cgo/abi_arm64.h"
+
+// The following thunks allow calling the gcc-compiled race runtime directly
+// from Go code without going all the way through cgo.
+// First, it's much faster (up to 50% speedup for real Go programs).
+// Second, it eliminates race-related special cases from cgocall and scheduler.
+// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
+
+// A brief recap of the arm64 calling convention.
+// Arguments are passed in R0...R7, the rest is on stack.
+// Callee-saved registers are: R19...R28.
+// Temporary registers are: R9...R15
+// SP must be 16-byte aligned.
+
+// When calling racecalladdr, R9 is the call target address.
+
+// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
+
+// Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
+// No-op on other OSes.
+#ifdef TLS_darwin
+#define TP_ALIGN AND $~7, R0
+#else
+#define TP_ALIGN
+#endif
+
+// Load g from TLS. (See tls_arm64.s)
+#define load_g \
+ MRS_TPIDR_R0 \
+ TP_ALIGN \
+ MOVD runtime·tls_g(SB), R11 \
+ MOVD (R0)(R11), g
+
+// func runtime·raceread(addr uintptr)
+// Called from instrumented code.
+// Defined as ABIInternal so as to avoid introducing a wrapper,
+// which would make caller's PC ineffective.
+TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
+ MOVD R0, R1 // addr
+ MOVD LR, R2
+ // void __tsan_read(ThreadState *thr, void *addr, void *pc);
+ MOVD $__tsan_read(SB), R9
+ JMP racecalladdr<>(SB)
+
+// func runtime·RaceRead(addr uintptr)
+TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
+ // This needs to be a tail call, because raceread reads caller pc.
+ JMP runtime·raceread(SB)
+
+// func runtime·racereadpc(void *addr, void *callpc, void *pc)
+TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
+ MOVD addr+0(FP), R1
+ MOVD callpc+8(FP), R2
+ MOVD pc+16(FP), R3
+ // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
+ MOVD $__tsan_read_pc(SB), R9
+ JMP racecalladdr<>(SB)
+
+// func runtime·racewrite(addr uintptr)
+// Called from instrumented code.
+// Defined as ABIInternal so as to avoid introducing a wrapper,
+// which would make caller's PC ineffective.
+TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
+ MOVD R0, R1 // addr
+ MOVD LR, R2
+ // void __tsan_write(ThreadState *thr, void *addr, void *pc);
+ MOVD $__tsan_write(SB), R9
+ JMP racecalladdr<>(SB)
+
+// func runtime·RaceWrite(addr uintptr)
+TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
+ // This needs to be a tail call, because racewrite reads caller pc.
+ JMP runtime·racewrite(SB)
+
+// func runtime·racewritepc(void *addr, void *callpc, void *pc)
+TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
+ MOVD addr+0(FP), R1
+ MOVD callpc+8(FP), R2
+ MOVD pc+16(FP), R3
+ // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
+ MOVD $__tsan_write_pc(SB), R9
+ JMP racecalladdr<>(SB)
+
+// func runtime·racereadrange(addr, size uintptr)
+// Called from instrumented code.
+// Defined as ABIInternal so as to avoid introducing a wrapper,
+// which would make caller's PC ineffective.
+TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
+ MOVD R1, R2 // size
+ MOVD R0, R1 // addr
+ MOVD LR, R3
+ // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
+ MOVD $__tsan_read_range(SB), R9
+ JMP racecalladdr<>(SB)
+
+// func runtime·RaceReadRange(addr, size uintptr)
+TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
+ // This needs to be a tail call, because racereadrange reads caller pc.
+ JMP runtime·racereadrange(SB)
+
+// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
+TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
+ MOVD addr+0(FP), R1
+ MOVD size+8(FP), R2
+ MOVD pc+16(FP), R3
+ ADD $4, R3 // pc is function start, tsan wants return address.
+ // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
+ MOVD $__tsan_read_range(SB), R9
+ JMP racecalladdr<>(SB)
+
+// func runtime·racewriterange(addr, size uintptr)
+// Called from instrumented code.
+// Defined as ABIInternal so as to avoid introducing a wrapper,
+// which would make caller's PC ineffective.
+TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
+ MOVD R1, R2 // size
+ MOVD R0, R1 // addr
+ MOVD LR, R3
+ // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
+ MOVD $__tsan_write_range(SB), R9
+ JMP racecalladdr<>(SB)
+
+// func runtime·RaceWriteRange(addr, size uintptr)
+TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
+ // This needs to be a tail call, because racewriterange reads caller pc.
+ JMP runtime·racewriterange(SB)
+
+// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
+TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
+ MOVD addr+0(FP), R1
+ MOVD size+8(FP), R2
+ MOVD pc+16(FP), R3
+ ADD $4, R3 // pc is function start, tsan wants return address.
+ // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
+ MOVD $__tsan_write_range(SB), R9
+ JMP racecalladdr<>(SB)
+
+// If addr (R1) is out of range, do nothing.
+// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
+TEXT racecalladdr<>(SB), NOSPLIT, $0-0
+ load_g
+ MOVD g_racectx(g), R0
+ // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
+ MOVD runtime·racearenastart(SB), R10
+ CMP R10, R1
+ BLT data
+ MOVD runtime·racearenaend(SB), R10
+ CMP R10, R1
+ BLT call
+data:
+ MOVD runtime·racedatastart(SB), R10
+ CMP R10, R1
+ BLT ret
+ MOVD runtime·racedataend(SB), R10
+ CMP R10, R1
+ BGT ret
+call:
+ JMP racecall<>(SB)
+ret:
+ RET
+
+// func runtime·racefuncenter(pc uintptr)
+// Called from instrumented code.
+TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
+ MOVD R0, R9 // callpc
+ JMP racefuncenter<>(SB)
+
+// Common code for racefuncenter
+// R9 = caller's return address
+TEXT racefuncenter<>(SB), NOSPLIT, $0-0
+ load_g
+ MOVD g_racectx(g), R0 // goroutine racectx
+ MOVD R9, R1
+ // void __tsan_func_enter(ThreadState *thr, void *pc);
+ MOVD $__tsan_func_enter(SB), R9
+ BL racecall<>(SB)
+ RET
+
+// func runtime·racefuncexit()
+// Called from instrumented code.
+TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
+ load_g
+ MOVD g_racectx(g), R0 // race context
+ // void __tsan_func_exit(ThreadState *thr);
+ MOVD $__tsan_func_exit(SB), R9
+ JMP racecall<>(SB)
+
+// Atomic operations for sync/atomic package.
+// R3 = addr of arguments passed to this function, it can
+// be fetched at 40(RSP) in racecallatomic after two times BL
+// R0, R1, R2 set in racecallatomic
+
+// Load
+TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
+ GO_ARGS
+ MOVD $__tsan_go_atomic32_load(SB), R9
+ BL racecallatomic<>(SB)
+ RET
+
+TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
+ GO_ARGS
+ MOVD $__tsan_go_atomic64_load(SB), R9
+ BL racecallatomic<>(SB)
+ RET
+
+TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
+ GO_ARGS
+ JMP sync∕atomic·LoadInt32(SB)
+
+TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
+ GO_ARGS
+ JMP sync∕atomic·LoadInt64(SB)
+
+TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
+ GO_ARGS
+ JMP sync∕atomic·LoadInt64(SB)
+
+TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
+ GO_ARGS
+ JMP sync∕atomic·LoadInt64(SB)
+
+// Store
+TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
+ GO_ARGS
+ MOVD $__tsan_go_atomic32_store(SB), R9
+ BL racecallatomic<>(SB)
+ RET
+
+TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
+ GO_ARGS
+ MOVD $__tsan_go_atomic64_store(SB), R9
+ BL racecallatomic<>(SB)
+ RET
+
+TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
+ GO_ARGS
+ JMP sync∕atomic·StoreInt32(SB)
+
+TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
+ GO_ARGS
+ JMP sync∕atomic·StoreInt64(SB)
+
+TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
+ GO_ARGS
+ JMP sync∕atomic·StoreInt64(SB)
+
+// Swap
+TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
+ GO_ARGS
+ MOVD $__tsan_go_atomic32_exchange(SB), R9
+ BL racecallatomic<>(SB)
+ RET
+
+TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
+ GO_ARGS
+ MOVD $__tsan_go_atomic64_exchange(SB), R9
+ BL racecallatomic<>(SB)
+ RET
+
+TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
+ GO_ARGS
+ JMP sync∕atomic·SwapInt32(SB)
+
+TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
+ GO_ARGS
+ JMP sync∕atomic·SwapInt64(SB)
+
+TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
+ GO_ARGS
+ JMP sync∕atomic·SwapInt64(SB)
+
+// Add
+TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
+ GO_ARGS
+ MOVD $__tsan_go_atomic32_fetch_add(SB), R9
+ BL racecallatomic<>(SB)
+ MOVW add+8(FP), R0 // convert fetch_add to add_fetch
+ MOVW ret+16(FP), R1
+ ADD R0, R1, R0
+ MOVW R0, ret+16(FP)
+ RET
+
+TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
+ GO_ARGS
+ MOVD $__tsan_go_atomic64_fetch_add(SB), R9
+ BL racecallatomic<>(SB)
+ MOVD add+8(FP), R0 // convert fetch_add to add_fetch
+ MOVD ret+16(FP), R1
+ ADD R0, R1, R0
+ MOVD R0, ret+16(FP)
+ RET
+
+TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
+ GO_ARGS
+ JMP sync∕atomic·AddInt32(SB)
+
+TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
+ GO_ARGS
+ JMP sync∕atomic·AddInt64(SB)
+
+TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
+ GO_ARGS
+ JMP sync∕atomic·AddInt64(SB)
+
+// CompareAndSwap
+TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
+ GO_ARGS
+ MOVD $__tsan_go_atomic32_compare_exchange(SB), R9
+ BL racecallatomic<>(SB)
+ RET
+
+TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
+ GO_ARGS
+ MOVD $__tsan_go_atomic64_compare_exchange(SB), R9
+ BL racecallatomic<>(SB)
+ RET
+
+TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
+ GO_ARGS
+ JMP sync∕atomic·CompareAndSwapInt32(SB)
+
+TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
+ GO_ARGS
+ JMP sync∕atomic·CompareAndSwapInt64(SB)
+
+TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
+ GO_ARGS
+ JMP sync∕atomic·CompareAndSwapInt64(SB)
+
+// Generic atomic operation implementation.
+// R9 = addr of target function
+TEXT racecallatomic<>(SB), NOSPLIT, $0
+ // Set up these registers
+ // R0 = *ThreadState
+ // R1 = caller pc
+ // R2 = pc
+ // R3 = addr of incoming arg list
+
+ // Trigger SIGSEGV early.
+ MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
+ MOVB (R3), R13 // segv here if addr is bad
+ // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
+ MOVD runtime·racearenastart(SB), R10
+ CMP R10, R3
+ BLT racecallatomic_data
+ MOVD runtime·racearenaend(SB), R10
+ CMP R10, R3
+ BLT racecallatomic_ok
+racecallatomic_data:
+ MOVD runtime·racedatastart(SB), R10
+ CMP R10, R3
+ BLT racecallatomic_ignore
+ MOVD runtime·racedataend(SB), R10
+ CMP R10, R3
+ BGE racecallatomic_ignore
+racecallatomic_ok:
+ // Addr is within the good range, call the atomic function.
+ load_g
+ MOVD g_racectx(g), R0 // goroutine context
+ MOVD 16(RSP), R1 // caller pc
+ MOVD R9, R2 // pc
+ ADD $40, RSP, R3
+ JMP racecall<>(SB) // does not return
+racecallatomic_ignore:
+ // Addr is outside the good range.
+ // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
+ // An attempt to synchronize on the address would cause crash.
+ MOVD R9, R21 // remember the original function
+ MOVD $__tsan_go_ignore_sync_begin(SB), R9
+ load_g
+ MOVD g_racectx(g), R0 // goroutine context
+ BL racecall<>(SB)
+ MOVD R21, R9 // restore the original function
+ // Call the atomic function.
+ // racecall will call LLVM race code which might clobber R28 (g)
+ load_g
+ MOVD g_racectx(g), R0 // goroutine context
+ MOVD 16(RSP), R1 // caller pc
+ MOVD R9, R2 // pc
+ ADD $40, RSP, R3 // arguments
+ BL racecall<>(SB)
+ // Call __tsan_go_ignore_sync_end.
+ MOVD $__tsan_go_ignore_sync_end(SB), R9
+ MOVD g_racectx(g), R0 // goroutine context
+ BL racecall<>(SB)
+ RET
+
+// func runtime·racecall(void(*f)(...), ...)
+// Calls C function f from race runtime and passes up to 4 arguments to it.
+// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
+TEXT runtime·racecall(SB), NOSPLIT, $0-0
+ MOVD fn+0(FP), R9
+ MOVD arg0+8(FP), R0
+ MOVD arg1+16(FP), R1
+ MOVD arg2+24(FP), R2
+ MOVD arg3+32(FP), R3
+ JMP racecall<>(SB)
+
+// Switches SP to g0 stack and calls (R9). Arguments already set.
+// Clobbers R19, R20.
+TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
+ MOVD g_m(g), R10
+ // Switch to g0 stack.
+ MOVD RSP, R19 // callee-saved, preserved across the CALL
+ MOVD R30, R20 // callee-saved, preserved across the CALL
+ MOVD m_g0(R10), R11
+ CMP R11, g
+ BEQ call // already on g0
+ MOVD (g_sched+gobuf_sp)(R11), R12
+ MOVD R12, RSP
+call:
+ BL R9
+ MOVD R19, RSP
+ JMP (R20)
+
+// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
+// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
+// The overall effect of Go->C->Go call chain is similar to that of mcall.
+// R0 contains command code. R1 contains command-specific context.
+// See racecallback for command codes.
+TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
+ // Handle command raceGetProcCmd (0) here.
+ // First, code below assumes that we are on curg, while raceGetProcCmd
+ // can be executed on g0. Second, it is called frequently, so will
+ // benefit from this fast path.
+ CBNZ R0, rest
+ MOVD g, R13
+#ifdef TLS_darwin
+ MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
+#endif
+ load_g
+#ifdef TLS_darwin
+ MOVD R12, R27
+#endif
+ MOVD g_m(g), R0
+ MOVD m_p(R0), R0
+ MOVD p_raceprocctx(R0), R0
+ MOVD R0, (R1)
+ MOVD R13, g
+ JMP (LR)
+rest:
+ // Save callee-saved registers (Go code won't respect that).
+ // 8(RSP) and 16(RSP) are for args passed through racecallback
+ SUB $176, RSP
+ MOVD LR, 0(RSP)
+
+ SAVE_R19_TO_R28(8*3)
+ SAVE_F8_TO_F15(8*13)
+ MOVD R29, (8*21)(RSP)
+ // Set g = g0.
+ // load_g will clobber R0, Save R0
+ MOVD R0, R13
+ load_g
+ // restore R0
+ MOVD R13, R0
+ MOVD g_m(g), R13
+ MOVD m_g0(R13), R14
+ CMP R14, g
+ BEQ noswitch // branch if already on g0
+ MOVD R14, g
+
+ MOVD R0, 8(RSP) // func arg
+ MOVD R1, 16(RSP) // func arg
+ BL runtime·racecallback(SB)
+
+ // All registers are smashed after Go code, reload.
+ MOVD g_m(g), R13
+ MOVD m_curg(R13), g // g = m->curg
+ret:
+ // Restore callee-saved registers.
+ MOVD 0(RSP), LR
+ MOVD (8*21)(RSP), R29
+ RESTORE_F8_TO_F15(8*13)
+ RESTORE_R19_TO_R28(8*3)
+ ADD $176, RSP
+ JMP (LR)
+
+noswitch:
+ // already on g0
+ MOVD R0, 8(RSP) // func arg
+ MOVD R1, 16(RSP) // func arg
+ BL runtime·racecallback(SB)
+ JMP ret
+
+#ifndef TLSG_IS_VARIABLE
+// tls_g, g value for each thread in TLS
+GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
+#endif