summaryrefslogtreecommitdiffstats
path: root/src/runtime/asm_arm64.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/asm_arm64.s')
-rw-r--r--src/runtime/asm_arm64.s1525
1 files changed, 1525 insertions, 0 deletions
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
new file mode 100644
index 0000000..7eb5bcf
--- /dev/null
+++ b/src/runtime/asm_arm64.s
@@ -0,0 +1,1525 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "tls_arm64.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
+ // SP = stack; R0 = argc; R1 = argv
+
+ SUB $32, RSP
+ MOVW R0, 8(RSP) // argc
+ MOVD R1, 16(RSP) // argv
+
+#ifdef TLS_darwin
+ // Initialize TLS.
+ MOVD ZR, g // clear g, make sure it's not junk.
+ SUB $32, RSP
+ MRS_TPIDR_R0
+ AND $~7, R0
+ MOVD R0, 16(RSP) // arg2: TLS base
+ MOVD $runtime·tls_g(SB), R2
+ MOVD R2, 8(RSP) // arg1: &tlsg
+ BL ·tlsinit(SB)
+ ADD $32, RSP
+#endif
+
+ // create istack out of the given (operating system) stack.
+ // _cgo_init may update stackguard.
+ MOVD $runtime·g0(SB), g
+ MOVD RSP, R7
+ MOVD $(-64*1024)(R7), R0
+ MOVD R0, g_stackguard0(g)
+ MOVD R0, g_stackguard1(g)
+ MOVD R0, (g_stack+stack_lo)(g)
+ MOVD R7, (g_stack+stack_hi)(g)
+
+ // if there is a _cgo_init, call it using the gcc ABI.
+ MOVD _cgo_init(SB), R12
+ CBZ R12, nocgo
+
+#ifdef GOOS_android
+ MRS_TPIDR_R0 // load TLS base pointer
+ MOVD R0, R3 // arg 3: TLS base pointer
+ MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g
+#else
+ MOVD $0, R2 // arg 2: not used when using platform's TLS
+#endif
+ MOVD $setg_gcc<>(SB), R1 // arg 1: setg
+ MOVD g, R0 // arg 0: G
+ SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
+ BL (R12)
+ ADD $16, RSP
+
+nocgo:
+ BL runtime·save_g(SB)
+ // update stackguard after _cgo_init
+ MOVD (g_stack+stack_lo)(g), R0
+ ADD $const__StackGuard, R0
+ MOVD R0, g_stackguard0(g)
+ MOVD R0, g_stackguard1(g)
+
+ // set the per-goroutine and per-mach "registers"
+ MOVD $runtime·m0(SB), R0
+
+ // save m->g0 = g0
+ MOVD g, m_g0(R0)
+ // save m0 to g0->m
+ MOVD R0, g_m(g)
+
+ BL runtime·check(SB)
+
+#ifdef GOOS_windows
+ BL runtime·wintls(SB)
+#endif
+
+ MOVW 8(RSP), R0 // copy argc
+ MOVW R0, -8(RSP)
+ MOVD 16(RSP), R0 // copy argv
+ MOVD R0, 0(RSP)
+ BL runtime·args(SB)
+ BL runtime·osinit(SB)
+ BL runtime·schedinit(SB)
+
+ // create a new goroutine to start program
+ MOVD $runtime·mainPC(SB), R0 // entry
+ SUB $16, RSP
+ MOVD R0, 8(RSP) // arg
+ MOVD $0, 0(RSP) // dummy LR
+ BL runtime·newproc(SB)
+ ADD $16, RSP
+
+ // start this M
+ BL runtime·mstart(SB)
+
+ // Prevent dead-code elimination of debugCallV2, which is
+ // intended to be called by debuggers.
+ MOVD $runtime·debugCallV2<ABIInternal>(SB), R0
+
+ MOVD $0, R0
+ MOVD R0, (R0) // boom
+ UNDEF
+
+DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
+GLOBL runtime·mainPC(SB),RODATA,$8
+
+// Windows ARM64 needs an immediate 0xf000 argument.
+// See go.dev/issues/53837.
+#define BREAK \
+#ifdef GOOS_windows \
+ BRK $0xf000 \
+#else \
+ BRK \
+#endif \
+
+
+TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
+ BREAK
+ RET
+
+TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
+ RET
+
+TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
+ BL runtime·mstart0(SB)
+ RET // not reached
+
+/*
+ * go-routine
+ */
+
+// void gogo(Gobuf*)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
+ MOVD buf+0(FP), R5
+ MOVD gobuf_g(R5), R6
+ MOVD 0(R6), R4 // make sure g != nil
+ B gogo<>(SB)
+
+TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
+ MOVD R6, g
+ BL runtime·save_g(SB)
+
+ MOVD gobuf_sp(R5), R0
+ MOVD R0, RSP
+ MOVD gobuf_bp(R5), R29
+ MOVD gobuf_lr(R5), LR
+ MOVD gobuf_ret(R5), R0
+ MOVD gobuf_ctxt(R5), R26
+ MOVD $0, gobuf_sp(R5)
+ MOVD $0, gobuf_bp(R5)
+ MOVD $0, gobuf_ret(R5)
+ MOVD $0, gobuf_lr(R5)
+ MOVD $0, gobuf_ctxt(R5)
+ CMP ZR, ZR // set condition codes for == test, needed by stack split
+ MOVD gobuf_pc(R5), R6
+ B (R6)
+
+// void mcall(fn func(*g))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return. It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
+ MOVD R0, R26 // context
+
+ // Save caller state in g->sched
+ MOVD RSP, R0
+ MOVD R0, (g_sched+gobuf_sp)(g)
+ MOVD R29, (g_sched+gobuf_bp)(g)
+ MOVD LR, (g_sched+gobuf_pc)(g)
+ MOVD $0, (g_sched+gobuf_lr)(g)
+
+ // Switch to m->g0 & its stack, call fn.
+ MOVD g, R3
+ MOVD g_m(g), R8
+ MOVD m_g0(R8), g
+ BL runtime·save_g(SB)
+ CMP g, R3
+ BNE 2(PC)
+ B runtime·badmcall(SB)
+
+ MOVD (g_sched+gobuf_sp)(g), R0
+ MOVD R0, RSP // sp = m->g0->sched.sp
+ MOVD (g_sched+gobuf_bp)(g), R29
+ MOVD R3, R0 // arg = g
+ MOVD $0, -16(RSP) // dummy LR
+ SUB $16, RSP
+ MOVD 0(R26), R4 // code pointer
+ BL (R4)
+ B runtime·badmcall2(SB)
+
+// systemstack_switch is a dummy routine that systemstack leaves at the bottom
+// of the G stack. We need to distinguish the routine that
+// lives at the bottom of the G stack from the one that lives
+// at the top of the system stack because the one at the top of
+// the system stack terminates the stack walk (see topofstack()).
+TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
+ UNDEF
+ BL (LR) // make sure this function is not leaf
+ RET
+
+// func systemstack(fn func())
+TEXT runtime·systemstack(SB), NOSPLIT, $0-8
+ MOVD fn+0(FP), R3 // R3 = fn
+ MOVD R3, R26 // context
+ MOVD g_m(g), R4 // R4 = m
+
+ MOVD m_gsignal(R4), R5 // R5 = gsignal
+ CMP g, R5
+ BEQ noswitch
+
+ MOVD m_g0(R4), R5 // R5 = g0
+ CMP g, R5
+ BEQ noswitch
+
+ MOVD m_curg(R4), R6
+ CMP g, R6
+ BEQ switch
+
+ // Bad: g is not gsignal, not g0, not curg. What is it?
+ // Hide call from linker nosplit analysis.
+ MOVD $runtime·badsystemstack(SB), R3
+ BL (R3)
+ B runtime·abort(SB)
+
+switch:
+ // save our state in g->sched. Pretend to
+ // be systemstack_switch if the G stack is scanned.
+ BL gosave_systemstack_switch<>(SB)
+
+ // switch to g0
+ MOVD R5, g
+ BL runtime·save_g(SB)
+ MOVD (g_sched+gobuf_sp)(g), R3
+ MOVD R3, RSP
+ MOVD (g_sched+gobuf_bp)(g), R29
+
+ // call target function
+ MOVD 0(R26), R3 // code pointer
+ BL (R3)
+
+ // switch back to g
+ MOVD g_m(g), R3
+ MOVD m_curg(R3), g
+ BL runtime·save_g(SB)
+ MOVD (g_sched+gobuf_sp)(g), R0
+ MOVD R0, RSP
+ MOVD (g_sched+gobuf_bp)(g), R29
+ MOVD $0, (g_sched+gobuf_sp)(g)
+ MOVD $0, (g_sched+gobuf_bp)(g)
+ RET
+
+noswitch:
+ // already on m stack, just call directly
+ // Using a tail call here cleans up tracebacks since we won't stop
+ // at an intermediate systemstack.
+ MOVD 0(R26), R3 // code pointer
+ MOVD.P 16(RSP), R30 // restore LR
+ SUB $8, RSP, R29 // restore FP
+ B (R3)
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// Caller has already loaded:
+// R3 prolog's LR (R30)
+//
+// The traceback routines see morestack on a g0 as being
+// the top of a stack (for example, morestack calling newstack
+// calling the scheduler calling newm calling gc), so we must
+// record an argument size. For that purpose, it has no arguments.
+TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
+ // Cannot grow scheduler stack (m->g0).
+ MOVD g_m(g), R8
+ MOVD m_g0(R8), R4
+ CMP g, R4
+ BNE 3(PC)
+ BL runtime·badmorestackg0(SB)
+ B runtime·abort(SB)
+
+ // Cannot grow signal stack (m->gsignal).
+ MOVD m_gsignal(R8), R4
+ CMP g, R4
+ BNE 3(PC)
+ BL runtime·badmorestackgsignal(SB)
+ B runtime·abort(SB)
+
+ // Called from f.
+ // Set g->sched to context in f
+ MOVD RSP, R0
+ MOVD R0, (g_sched+gobuf_sp)(g)
+ MOVD R29, (g_sched+gobuf_bp)(g)
+ MOVD LR, (g_sched+gobuf_pc)(g)
+ MOVD R3, (g_sched+gobuf_lr)(g)
+ MOVD R26, (g_sched+gobuf_ctxt)(g)
+
+ // Called from f.
+ // Set m->morebuf to f's callers.
+ MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
+ MOVD RSP, R0
+ MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP
+ MOVD g, (m_morebuf+gobuf_g)(R8)
+
+ // Call newstack on m->g0's stack.
+ MOVD m_g0(R8), g
+ BL runtime·save_g(SB)
+ MOVD (g_sched+gobuf_sp)(g), R0
+ MOVD R0, RSP
+ MOVD (g_sched+gobuf_bp)(g), R29
+ MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
+ BL runtime·newstack(SB)
+
+ // Not reached, but make sure the return PC from the call to newstack
+ // is still in this function, and not the beginning of the next.
+ UNDEF
+
+TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+ // Force SPWRITE. This function doesn't actually write SP,
+ // but it is called with a special calling convention where
+ // the caller doesn't save LR on stack but passes it as a
+ // register (R3), and the unwinder currently doesn't understand.
+ // Make it SPWRITE to stop unwinding. (See issue 54332)
+ MOVD RSP, RSP
+
+ MOVW $0, R26
+ B runtime·morestack(SB)
+
+// spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
+TEXT ·spillArgs(SB),NOSPLIT,$0-0
+ STP (R0, R1), (0*8)(R20)
+ STP (R2, R3), (2*8)(R20)
+ STP (R4, R5), (4*8)(R20)
+ STP (R6, R7), (6*8)(R20)
+ STP (R8, R9), (8*8)(R20)
+ STP (R10, R11), (10*8)(R20)
+ STP (R12, R13), (12*8)(R20)
+ STP (R14, R15), (14*8)(R20)
+ FSTPD (F0, F1), (16*8)(R20)
+ FSTPD (F2, F3), (18*8)(R20)
+ FSTPD (F4, F5), (20*8)(R20)
+ FSTPD (F6, F7), (22*8)(R20)
+ FSTPD (F8, F9), (24*8)(R20)
+ FSTPD (F10, F11), (26*8)(R20)
+ FSTPD (F12, F13), (28*8)(R20)
+ FSTPD (F14, F15), (30*8)(R20)
+ RET
+
+// unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
+TEXT ·unspillArgs(SB),NOSPLIT,$0-0
+ LDP (0*8)(R20), (R0, R1)
+ LDP (2*8)(R20), (R2, R3)
+ LDP (4*8)(R20), (R4, R5)
+ LDP (6*8)(R20), (R6, R7)
+ LDP (8*8)(R20), (R8, R9)
+ LDP (10*8)(R20), (R10, R11)
+ LDP (12*8)(R20), (R12, R13)
+ LDP (14*8)(R20), (R14, R15)
+ FLDPD (16*8)(R20), (F0, F1)
+ FLDPD (18*8)(R20), (F2, F3)
+ FLDPD (20*8)(R20), (F4, F5)
+ FLDPD (22*8)(R20), (F6, F7)
+ FLDPD (24*8)(R20), (F8, F9)
+ FLDPD (26*8)(R20), (F10, F11)
+ FLDPD (28*8)(R20), (F12, F13)
+ FLDPD (30*8)(R20), (F14, F15)
+ RET
+
+// reflectcall: call a function with the given argument list
+// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
+// we don't have variable-sized frames, so we use a small number
+// of constant-sized-frame functions to encode a few bits of size in the pc.
+// Caution: ugly multiline assembly macros in your future!
+
+#define DISPATCH(NAME,MAXSIZE) \
+ MOVD $MAXSIZE, R27; \
+ CMP R27, R16; \
+ BGT 3(PC); \
+ MOVD $NAME(SB), R27; \
+ B (R27)
+// Note: can't just "B NAME(SB)" - bad inlining results.
+
+TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
+ MOVWU frameSize+32(FP), R16
+ DISPATCH(runtime·call16, 16)
+ DISPATCH(runtime·call32, 32)
+ DISPATCH(runtime·call64, 64)
+ DISPATCH(runtime·call128, 128)
+ DISPATCH(runtime·call256, 256)
+ DISPATCH(runtime·call512, 512)
+ DISPATCH(runtime·call1024, 1024)
+ DISPATCH(runtime·call2048, 2048)
+ DISPATCH(runtime·call4096, 4096)
+ DISPATCH(runtime·call8192, 8192)
+ DISPATCH(runtime·call16384, 16384)
+ DISPATCH(runtime·call32768, 32768)
+ DISPATCH(runtime·call65536, 65536)
+ DISPATCH(runtime·call131072, 131072)
+ DISPATCH(runtime·call262144, 262144)
+ DISPATCH(runtime·call524288, 524288)
+ DISPATCH(runtime·call1048576, 1048576)
+ DISPATCH(runtime·call2097152, 2097152)
+ DISPATCH(runtime·call4194304, 4194304)
+ DISPATCH(runtime·call8388608, 8388608)
+ DISPATCH(runtime·call16777216, 16777216)
+ DISPATCH(runtime·call33554432, 33554432)
+ DISPATCH(runtime·call67108864, 67108864)
+ DISPATCH(runtime·call134217728, 134217728)
+ DISPATCH(runtime·call268435456, 268435456)
+ DISPATCH(runtime·call536870912, 536870912)
+ DISPATCH(runtime·call1073741824, 1073741824)
+ MOVD $runtime·badreflectcall(SB), R0
+ B (R0)
+
+#define CALLFN(NAME,MAXSIZE) \
+TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
+ NO_LOCAL_POINTERS; \
+ /* copy arguments to stack */ \
+ MOVD stackArgs+16(FP), R3; \
+ MOVWU stackArgsSize+24(FP), R4; \
+ ADD $8, RSP, R5; \
+ BIC $0xf, R4, R6; \
+ CBZ R6, 6(PC); \
+ /* if R6=(argsize&~15) != 0 */ \
+ ADD R6, R5, R6; \
+ /* copy 16 bytes a time */ \
+ LDP.P 16(R3), (R7, R8); \
+ STP.P (R7, R8), 16(R5); \
+ CMP R5, R6; \
+ BNE -3(PC); \
+ AND $0xf, R4, R6; \
+ CBZ R6, 6(PC); \
+ /* if R6=(argsize&15) != 0 */ \
+ ADD R6, R5, R6; \
+ /* copy 1 byte a time for the rest */ \
+ MOVBU.P 1(R3), R7; \
+ MOVBU.P R7, 1(R5); \
+ CMP R5, R6; \
+ BNE -3(PC); \
+ /* set up argument registers */ \
+ MOVD regArgs+40(FP), R20; \
+ CALL ·unspillArgs(SB); \
+ /* call function */ \
+ MOVD f+8(FP), R26; \
+ MOVD (R26), R20; \
+ PCDATA $PCDATA_StackMapIndex, $0; \
+ BL (R20); \
+ /* copy return values back */ \
+ MOVD regArgs+40(FP), R20; \
+ CALL ·spillArgs(SB); \
+ MOVD stackArgsType+0(FP), R7; \
+ MOVD stackArgs+16(FP), R3; \
+ MOVWU stackArgsSize+24(FP), R4; \
+ MOVWU stackRetOffset+28(FP), R6; \
+ ADD $8, RSP, R5; \
+ ADD R6, R5; \
+ ADD R6, R3; \
+ SUB R6, R4; \
+ BL callRet<>(SB); \
+ RET
+
+// callRet copies return values back at the end of call*. This is a
+// separate function so it can allocate stack space for the arguments
+// to reflectcallmove. It does not follow the Go ABI; it expects its
+// arguments in registers.
+TEXT callRet<>(SB), NOSPLIT, $48-0
+ NO_LOCAL_POINTERS
+ STP (R7, R3), 8(RSP)
+ STP (R5, R4), 24(RSP)
+ MOVD R20, 40(RSP)
+ BL runtime·reflectcallmove(SB)
+ RET
+
+CALLFN(·call16, 16)
+CALLFN(·call32, 32)
+CALLFN(·call64, 64)
+CALLFN(·call128, 128)
+CALLFN(·call256, 256)
+CALLFN(·call512, 512)
+CALLFN(·call1024, 1024)
+CALLFN(·call2048, 2048)
+CALLFN(·call4096, 4096)
+CALLFN(·call8192, 8192)
+CALLFN(·call16384, 16384)
+CALLFN(·call32768, 32768)
+CALLFN(·call65536, 65536)
+CALLFN(·call131072, 131072)
+CALLFN(·call262144, 262144)
+CALLFN(·call524288, 524288)
+CALLFN(·call1048576, 1048576)
+CALLFN(·call2097152, 2097152)
+CALLFN(·call4194304, 4194304)
+CALLFN(·call8388608, 8388608)
+CALLFN(·call16777216, 16777216)
+CALLFN(·call33554432, 33554432)
+CALLFN(·call67108864, 67108864)
+CALLFN(·call134217728, 134217728)
+CALLFN(·call268435456, 268435456)
+CALLFN(·call536870912, 536870912)
+CALLFN(·call1073741824, 1073741824)
+
+// func memhash32(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
+ MOVB runtime·useAeshash(SB), R10
+ CBZ R10, noaes
+ MOVD $runtime·aeskeysched+0(SB), R3
+
+ VEOR V0.B16, V0.B16, V0.B16
+ VLD1 (R3), [V2.B16]
+ VLD1 (R0), V0.S[1]
+ VMOV R1, V0.S[0]
+
+ AESE V2.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ AESE V2.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ AESE V2.B16, V0.B16
+
+ VMOV V0.D[0], R0
+ RET
+noaes:
+ B runtime·memhash32Fallback<ABIInternal>(SB)
+
+// func memhash64(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
+ MOVB runtime·useAeshash(SB), R10
+ CBZ R10, noaes
+ MOVD $runtime·aeskeysched+0(SB), R3
+
+ VEOR V0.B16, V0.B16, V0.B16
+ VLD1 (R3), [V2.B16]
+ VLD1 (R0), V0.D[1]
+ VMOV R1, V0.D[0]
+
+ AESE V2.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ AESE V2.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ AESE V2.B16, V0.B16
+
+ VMOV V0.D[0], R0
+ RET
+noaes:
+ B runtime·memhash64Fallback<ABIInternal>(SB)
+
+// func memhash(p unsafe.Pointer, h, size uintptr) uintptr
+TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
+ MOVB runtime·useAeshash(SB), R10
+ CBZ R10, noaes
+ B aeshashbody<>(SB)
+noaes:
+ B runtime·memhashFallback<ABIInternal>(SB)
+
+// func strhash(p unsafe.Pointer, h uintptr) uintptr
+TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
+ MOVB runtime·useAeshash(SB), R10
+ CBZ R10, noaes
+ LDP (R0), (R0, R2) // string data / length
+ B aeshashbody<>(SB)
+noaes:
+ B runtime·strhashFallback<ABIInternal>(SB)
+
+// R0: data
+// R1: seed data
+// R2: length
+// At return, R0 = return value
+TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
+ VEOR V30.B16, V30.B16, V30.B16
+ VMOV R1, V30.D[0]
+ VMOV R2, V30.D[1] // load length into seed
+
+ MOVD $runtime·aeskeysched+0(SB), R4
+ VLD1.P 16(R4), [V0.B16]
+ AESE V30.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ CMP $16, R2
+ BLO aes0to15
+ BEQ aes16
+ CMP $32, R2
+ BLS aes17to32
+ CMP $64, R2
+ BLS aes33to64
+ CMP $128, R2
+ BLS aes65to128
+ B aes129plus
+
+aes0to15:
+ CBZ R2, aes0
+ VEOR V2.B16, V2.B16, V2.B16
+ TBZ $3, R2, less_than_8
+ VLD1.P 8(R0), V2.D[0]
+
+less_than_8:
+ TBZ $2, R2, less_than_4
+ VLD1.P 4(R0), V2.S[2]
+
+less_than_4:
+ TBZ $1, R2, less_than_2
+ VLD1.P 2(R0), V2.H[6]
+
+less_than_2:
+ TBZ $0, R2, done
+ VLD1 (R0), V2.B[14]
+done:
+ AESE V0.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V0.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V0.B16, V2.B16
+
+ VMOV V2.D[0], R0
+ RET
+
+aes0:
+ VMOV V0.D[0], R0
+ RET
+
+aes16:
+ VLD1 (R0), [V2.B16]
+ B done
+
+aes17to32:
+ // make second seed
+ VLD1 (R4), [V1.B16]
+ AESE V30.B16, V1.B16
+ AESMC V1.B16, V1.B16
+ SUB $16, R2, R10
+ VLD1.P (R0)(R10), [V2.B16]
+ VLD1 (R0), [V3.B16]
+
+ AESE V0.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V1.B16, V3.B16
+ AESMC V3.B16, V3.B16
+
+ AESE V0.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V1.B16, V3.B16
+ AESMC V3.B16, V3.B16
+
+ AESE V0.B16, V2.B16
+ AESE V1.B16, V3.B16
+
+ VEOR V3.B16, V2.B16, V2.B16
+
+ VMOV V2.D[0], R0
+ RET
+
+aes33to64:
+ VLD1 (R4), [V1.B16, V2.B16, V3.B16]
+ AESE V30.B16, V1.B16
+ AESMC V1.B16, V1.B16
+ AESE V30.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V30.B16, V3.B16
+ AESMC V3.B16, V3.B16
+ SUB $32, R2, R10
+
+ VLD1.P (R0)(R10), [V4.B16, V5.B16]
+ VLD1 (R0), [V6.B16, V7.B16]
+
+ AESE V0.B16, V4.B16
+ AESMC V4.B16, V4.B16
+ AESE V1.B16, V5.B16
+ AESMC V5.B16, V5.B16
+ AESE V2.B16, V6.B16
+ AESMC V6.B16, V6.B16
+ AESE V3.B16, V7.B16
+ AESMC V7.B16, V7.B16
+
+ AESE V0.B16, V4.B16
+ AESMC V4.B16, V4.B16
+ AESE V1.B16, V5.B16
+ AESMC V5.B16, V5.B16
+ AESE V2.B16, V6.B16
+ AESMC V6.B16, V6.B16
+ AESE V3.B16, V7.B16
+ AESMC V7.B16, V7.B16
+
+ AESE V0.B16, V4.B16
+ AESE V1.B16, V5.B16
+ AESE V2.B16, V6.B16
+ AESE V3.B16, V7.B16
+
+ VEOR V6.B16, V4.B16, V4.B16
+ VEOR V7.B16, V5.B16, V5.B16
+ VEOR V5.B16, V4.B16, V4.B16
+
+ VMOV V4.D[0], R0
+ RET
+
+aes65to128:
+ VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
+ VLD1 (R4), [V5.B16, V6.B16, V7.B16]
+ AESE V30.B16, V1.B16
+ AESMC V1.B16, V1.B16
+ AESE V30.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V30.B16, V3.B16
+ AESMC V3.B16, V3.B16
+ AESE V30.B16, V4.B16
+ AESMC V4.B16, V4.B16
+ AESE V30.B16, V5.B16
+ AESMC V5.B16, V5.B16
+ AESE V30.B16, V6.B16
+ AESMC V6.B16, V6.B16
+ AESE V30.B16, V7.B16
+ AESMC V7.B16, V7.B16
+
+ SUB $64, R2, R10
+ VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
+ VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
+ AESE V0.B16, V8.B16
+ AESMC V8.B16, V8.B16
+ AESE V1.B16, V9.B16
+ AESMC V9.B16, V9.B16
+ AESE V2.B16, V10.B16
+ AESMC V10.B16, V10.B16
+ AESE V3.B16, V11.B16
+ AESMC V11.B16, V11.B16
+ AESE V4.B16, V12.B16
+ AESMC V12.B16, V12.B16
+ AESE V5.B16, V13.B16
+ AESMC V13.B16, V13.B16
+ AESE V6.B16, V14.B16
+ AESMC V14.B16, V14.B16
+ AESE V7.B16, V15.B16
+ AESMC V15.B16, V15.B16
+
+ AESE V0.B16, V8.B16
+ AESMC V8.B16, V8.B16
+ AESE V1.B16, V9.B16
+ AESMC V9.B16, V9.B16
+ AESE V2.B16, V10.B16
+ AESMC V10.B16, V10.B16
+ AESE V3.B16, V11.B16
+ AESMC V11.B16, V11.B16
+ AESE V4.B16, V12.B16
+ AESMC V12.B16, V12.B16
+ AESE V5.B16, V13.B16
+ AESMC V13.B16, V13.B16
+ AESE V6.B16, V14.B16
+ AESMC V14.B16, V14.B16
+ AESE V7.B16, V15.B16
+ AESMC V15.B16, V15.B16
+
+ AESE V0.B16, V8.B16
+ AESE V1.B16, V9.B16
+ AESE V2.B16, V10.B16
+ AESE V3.B16, V11.B16
+ AESE V4.B16, V12.B16
+ AESE V5.B16, V13.B16
+ AESE V6.B16, V14.B16
+ AESE V7.B16, V15.B16
+
+ VEOR V12.B16, V8.B16, V8.B16
+ VEOR V13.B16, V9.B16, V9.B16
+ VEOR V14.B16, V10.B16, V10.B16
+ VEOR V15.B16, V11.B16, V11.B16
+ VEOR V10.B16, V8.B16, V8.B16
+ VEOR V11.B16, V9.B16, V9.B16
+ VEOR V9.B16, V8.B16, V8.B16
+
+ VMOV V8.D[0], R0
+ RET
+
+aes129plus:
+ PRFM (R0), PLDL1KEEP
+ VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
+ VLD1 (R4), [V5.B16, V6.B16, V7.B16]
+ AESE V30.B16, V1.B16
+ AESMC V1.B16, V1.B16
+ AESE V30.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V30.B16, V3.B16
+ AESMC V3.B16, V3.B16
+ AESE V30.B16, V4.B16
+ AESMC V4.B16, V4.B16
+ AESE V30.B16, V5.B16
+ AESMC V5.B16, V5.B16
+ AESE V30.B16, V6.B16
+ AESMC V6.B16, V6.B16
+ AESE V30.B16, V7.B16
+ AESMC V7.B16, V7.B16
+ ADD R0, R2, R10
+ SUB $128, R10, R10
+ VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
+ VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
+ SUB $1, R2, R2
+ LSR $7, R2, R2
+
+aesloop:
+ AESE V8.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ AESE V9.B16, V1.B16
+ AESMC V1.B16, V1.B16
+ AESE V10.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V11.B16, V3.B16
+ AESMC V3.B16, V3.B16
+ AESE V12.B16, V4.B16
+ AESMC V4.B16, V4.B16
+ AESE V13.B16, V5.B16
+ AESMC V5.B16, V5.B16
+ AESE V14.B16, V6.B16
+ AESMC V6.B16, V6.B16
+ AESE V15.B16, V7.B16
+ AESMC V7.B16, V7.B16
+
+ VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
+ AESE V8.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ AESE V9.B16, V1.B16
+ AESMC V1.B16, V1.B16
+ AESE V10.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V11.B16, V3.B16
+ AESMC V3.B16, V3.B16
+
+ VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
+ AESE V12.B16, V4.B16
+ AESMC V4.B16, V4.B16
+ AESE V13.B16, V5.B16
+ AESMC V5.B16, V5.B16
+ AESE V14.B16, V6.B16
+ AESMC V6.B16, V6.B16
+ AESE V15.B16, V7.B16
+ AESMC V7.B16, V7.B16
+ SUB $1, R2, R2
+ CBNZ R2, aesloop
+
+ AESE V8.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ AESE V9.B16, V1.B16
+ AESMC V1.B16, V1.B16
+ AESE V10.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V11.B16, V3.B16
+ AESMC V3.B16, V3.B16
+ AESE V12.B16, V4.B16
+ AESMC V4.B16, V4.B16
+ AESE V13.B16, V5.B16
+ AESMC V5.B16, V5.B16
+ AESE V14.B16, V6.B16
+ AESMC V6.B16, V6.B16
+ AESE V15.B16, V7.B16
+ AESMC V7.B16, V7.B16
+
+ AESE V8.B16, V0.B16
+ AESMC V0.B16, V0.B16
+ AESE V9.B16, V1.B16
+ AESMC V1.B16, V1.B16
+ AESE V10.B16, V2.B16
+ AESMC V2.B16, V2.B16
+ AESE V11.B16, V3.B16
+ AESMC V3.B16, V3.B16
+ AESE V12.B16, V4.B16
+ AESMC V4.B16, V4.B16
+ AESE V13.B16, V5.B16
+ AESMC V5.B16, V5.B16
+ AESE V14.B16, V6.B16
+ AESMC V6.B16, V6.B16
+ AESE V15.B16, V7.B16
+ AESMC V7.B16, V7.B16
+
+ AESE V8.B16, V0.B16
+ AESE V9.B16, V1.B16
+ AESE V10.B16, V2.B16
+ AESE V11.B16, V3.B16
+ AESE V12.B16, V4.B16
+ AESE V13.B16, V5.B16
+ AESE V14.B16, V6.B16
+ AESE V15.B16, V7.B16
+
+ VEOR V0.B16, V1.B16, V0.B16
+ VEOR V2.B16, V3.B16, V2.B16
+ VEOR V4.B16, V5.B16, V4.B16
+ VEOR V6.B16, V7.B16, V6.B16
+ VEOR V0.B16, V2.B16, V0.B16
+ VEOR V4.B16, V6.B16, V4.B16
+ VEOR V4.B16, V0.B16, V0.B16
+
+ VMOV V0.D[0], R0
+ RET
+
+TEXT runtime·procyield(SB),NOSPLIT,$0-0
+ MOVWU cycles+0(FP), R0
+again:
+ YIELD
+ SUBW $1, R0
+ CBNZ R0, again
+ RET
+
+// Save state of caller into g->sched,
+// but using fake PC from systemstack_switch.
+// Must only be called from functions with no locals ($0)
+// or else unwinding from systemstack_switch is incorrect.
+// Smashes R0.
+TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
+ MOVD $runtime·systemstack_switch(SB), R0
+ ADD $8, R0 // get past prologue
+ MOVD R0, (g_sched+gobuf_pc)(g)
+ MOVD RSP, R0
+ MOVD R0, (g_sched+gobuf_sp)(g)
+ MOVD R29, (g_sched+gobuf_bp)(g)
+ MOVD $0, (g_sched+gobuf_lr)(g)
+ MOVD $0, (g_sched+gobuf_ret)(g)
+ // Assert ctxt is zero. See func save.
+ MOVD (g_sched+gobuf_ctxt)(g), R0
+ CBZ R0, 2(PC)
+ CALL runtime·abort(SB)
+ RET
+
+// func asmcgocall_no_g(fn, arg unsafe.Pointer)
+// Call fn(arg) aligned appropriately for the gcc ABI.
+// Called on a system stack, and there may be no g yet (during needm).
+TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
+ MOVD fn+0(FP), R1
+ MOVD arg+8(FP), R0
+ SUB $16, RSP // skip over saved frame pointer below RSP
+ BL (R1)
+ ADD $16, RSP // skip over saved frame pointer below RSP
+ RET
+
+// func asmcgocall(fn, arg unsafe.Pointer) int32
+// Call fn(arg) on the scheduler stack,
+// aligned appropriately for the gcc ABI.
+// See cgocall.go for more details.
+TEXT ·asmcgocall(SB),NOSPLIT,$0-20
+ MOVD fn+0(FP), R1
+ MOVD arg+8(FP), R0
+
+ MOVD RSP, R2 // save original stack pointer
+ CBZ g, nosave
+ MOVD g, R4
+
+ // Figure out if we need to switch to m->g0 stack.
+ // We get called to create new OS threads too, and those
+ // come in on the m->g0 stack already. Or we might already
+ // be on the m->gsignal stack.
+ MOVD g_m(g), R8
+ MOVD m_gsignal(R8), R3
+ CMP R3, g
+ BEQ nosave
+ MOVD m_g0(R8), R3
+ CMP R3, g
+ BEQ nosave
+
+ // Switch to system stack.
+ MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0
+ BL gosave_systemstack_switch<>(SB)
+ MOVD R3, g
+ BL runtime·save_g(SB)
+ MOVD (g_sched+gobuf_sp)(g), R0
+ MOVD R0, RSP
+ MOVD (g_sched+gobuf_bp)(g), R29
+ MOVD R9, R0
+
+ // Now on a scheduling stack (a pthread-created stack).
+ // Save room for two of our pointers /*, plus 32 bytes of callee
+ // save area that lives on the caller stack. */
+ MOVD RSP, R13
+ SUB $16, R13
+ MOVD R13, RSP
+ MOVD R4, 0(RSP) // save old g on stack
+ MOVD (g_stack+stack_hi)(R4), R4
+ SUB R2, R4
+ MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
+ BL (R1)
+ MOVD R0, R9
+
+ // Restore g, stack pointer. R0 is errno, so don't touch it
+ MOVD 0(RSP), g
+ BL runtime·save_g(SB)
+ MOVD (g_stack+stack_hi)(g), R5
+ MOVD 8(RSP), R6
+ SUB R6, R5
+ MOVD R9, R0
+ MOVD R5, RSP
+
+ MOVW R0, ret+16(FP)
+ RET
+
+nosave:
+ // Running on a system stack, perhaps even without a g.
+ // Having no g can happen during thread creation or thread teardown
+ // (see needm/dropm on Solaris, for example).
+ // This code is like the above sequence but without saving/restoring g
+ // and without worrying about the stack moving out from under us
+ // (because we're on a system stack, not a goroutine stack).
+ // The above code could be used directly if already on a system stack,
+ // but then the only path through this code would be a rare case on Solaris.
+ // Using this code for all "already on system stack" calls exercises it more,
+ // which should help keep it correct.
+ MOVD RSP, R13
+ SUB $16, R13
+ MOVD R13, RSP
+ MOVD $0, R4
+ MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging.
+ MOVD R2, 8(RSP) // Save original stack pointer.
+ BL (R1)
+ // Restore stack pointer.
+ MOVD 8(RSP), R2
+ MOVD R2, RSP
+ MOVD R0, ret+16(FP)
+ RET
+
+// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
+// See cgocall.go for more details.
+TEXT ·cgocallback(SB),NOSPLIT,$24-24
+ NO_LOCAL_POINTERS
+
+ // Load g from thread-local storage.
+ BL runtime·load_g(SB)
+
+ // If g is nil, Go did not create the current thread.
+ // Call needm to obtain one for temporary use.
+ // In this case, we're running on the thread stack, so there's
+ // lots of space, but the linker doesn't know. Hide the call from
+ // the linker analysis by using an indirect call.
+ CBZ g, needm
+
+ MOVD g_m(g), R8
+ MOVD R8, savedm-8(SP)
+ B havem
+
+needm:
+ MOVD g, savedm-8(SP) // g is zero, so is m.
+ MOVD $runtime·needm(SB), R0
+ BL (R0)
+
+ // Set m->g0->sched.sp = SP, so that if a panic happens
+ // during the function we are about to execute, it will
+ // have a valid SP to run on the g0 stack.
+ // The next few lines (after the havem label)
+ // will save this SP onto the stack and then write
+ // the same SP back to m->sched.sp. That seems redundant,
+ // but if an unrecovered panic happens, unwindm will
+ // restore the g->sched.sp from the stack location
+ // and then systemstack will try to use it. If we don't set it here,
+ // that restored SP will be uninitialized (typically 0) and
+ // will not be usable.
+ MOVD g_m(g), R8
+ MOVD m_g0(R8), R3
+ MOVD RSP, R0
+ MOVD R0, (g_sched+gobuf_sp)(R3)
+ MOVD R29, (g_sched+gobuf_bp)(R3)
+
+havem:
+ // Now there's a valid m, and we're running on its m->g0.
+ // Save current m->g0->sched.sp on stack and then set it to SP.
+ // Save current sp in m->g0->sched.sp in preparation for
+ // switch back to m->curg stack.
+ // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
+ // Beware that the frame size is actually 32+16.
+ MOVD m_g0(R8), R3
+ MOVD (g_sched+gobuf_sp)(R3), R4
+ MOVD R4, savedsp-16(SP)
+ MOVD RSP, R0
+ MOVD R0, (g_sched+gobuf_sp)(R3)
+
+ // Switch to m->curg stack and call runtime.cgocallbackg.
+ // Because we are taking over the execution of m->curg
+ // but *not* resuming what had been running, we need to
+ // save that information (m->curg->sched) so we can restore it.
+ // We can restore m->curg->sched.sp easily, because calling
+ // runtime.cgocallbackg leaves SP unchanged upon return.
+ // To save m->curg->sched.pc, we push it onto the curg stack and
+ // open a frame the same size as cgocallback's g0 frame.
+ // Once we switch to the curg stack, the pushed PC will appear
+ // to be the return PC of cgocallback, so that the traceback
+ // will seamlessly trace back into the earlier calls.
+ MOVD m_curg(R8), g
+ BL runtime·save_g(SB)
+ MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
+ MOVD (g_sched+gobuf_pc)(g), R5
+ MOVD R5, -48(R4)
+ MOVD (g_sched+gobuf_bp)(g), R5
+ MOVD R5, -56(R4)
+ // Gather our arguments into registers.
+ MOVD fn+0(FP), R1
+ MOVD frame+8(FP), R2
+ MOVD ctxt+16(FP), R3
+ MOVD $-48(R4), R0 // maintain 16-byte SP alignment
+ MOVD R0, RSP // switch stack
+ MOVD R1, 8(RSP)
+ MOVD R2, 16(RSP)
+ MOVD R3, 24(RSP)
+ MOVD $runtime·cgocallbackg(SB), R0
+ CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
+
+ // Restore g->sched (== m->curg->sched) from saved values.
+ MOVD 0(RSP), R5
+ MOVD R5, (g_sched+gobuf_pc)(g)
+ MOVD RSP, R4
+ ADD $48, R4, R4
+ MOVD R4, (g_sched+gobuf_sp)(g)
+
+ // Switch back to m->g0's stack and restore m->g0->sched.sp.
+ // (Unlike m->curg, the g0 goroutine never uses sched.pc,
+ // so we do not have to restore it.)
+ MOVD g_m(g), R8
+ MOVD m_g0(R8), g
+ BL runtime·save_g(SB)
+ MOVD (g_sched+gobuf_sp)(g), R0
+ MOVD R0, RSP
+ MOVD savedsp-16(SP), R4
+ MOVD R4, (g_sched+gobuf_sp)(g)
+
+ // If the m on entry was nil, we called needm above to borrow an m
+ // for the duration of the call. Since the call is over, return it with dropm.
+ MOVD savedm-8(SP), R6
+ CBNZ R6, droppedm
+ MOVD $runtime·dropm(SB), R0
+ BL (R0)
+droppedm:
+
+ // Done!
+ RET
+
+// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
+// Must obey the gcc calling convention.
+TEXT _cgo_topofstack(SB),NOSPLIT,$24
+ // g (R28) and REGTMP (R27) might be clobbered by load_g. They
+ // are callee-save in the gcc calling convention, so save them.
+ MOVD R27, savedR27-8(SP)
+ MOVD g, saveG-16(SP)
+
+ BL runtime·load_g(SB)
+ MOVD g_m(g), R0
+ MOVD m_curg(R0), R0
+ MOVD (g_stack+stack_hi)(R0), R0
+
+ MOVD saveG-16(SP), g
+ MOVD savedR28-8(SP), R27
+ RET
+
+// void setg(G*); set g. for use by needm.
+TEXT runtime·setg(SB), NOSPLIT, $0-8
+ MOVD gg+0(FP), g
+ // This only happens if iscgo, so jump straight to save_g
+ BL runtime·save_g(SB)
+ RET
+
+// void setg_gcc(G*); set g called from gcc
+TEXT setg_gcc<>(SB),NOSPLIT,$8
+ MOVD R0, g
+ MOVD R27, savedR27-8(SP)
+ BL runtime·save_g(SB)
+ MOVD savedR27-8(SP), R27
+ RET
+
+TEXT runtime·emptyfunc(SB),0,$0-0
+ RET
+
+TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
+ MOVD ZR, R0
+ MOVD (R0), R0
+ UNDEF
+
+TEXT runtime·return0(SB), NOSPLIT, $0
+ MOVW $0, R0
+ RET
+
+// The top-most function running on a goroutine
+// returns to goexit+PCQuantum.
+TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
+ MOVD R0, R0 // NOP
+ BL runtime·goexit1(SB) // does not return
+
+// This is called from .init_array and follows the platform, not Go, ABI.
+TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
+ SUB $0x10, RSP
+ MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
+ MOVD runtime·lastmoduledatap(SB), R1
+ MOVD R0, moduledata_next(R1)
+ MOVD R0, runtime·lastmoduledatap(SB)
+ MOVD 8(RSP), R27
+ ADD $0x10, RSP
+ RET
+
+TEXT ·checkASM(SB),NOSPLIT,$0-1
+ MOVW $1, R3
+ MOVB R3, ret+0(FP)
+ RET
+
+// gcWriteBarrier performs a heap pointer write and informs the GC.
+//
+// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
+// - R2 is the destination of the write
+// - R3 is the value being written at R2
+// It clobbers condition codes.
+// It does not clobber any general-purpose registers,
+// but may clobber others (e.g., floating point registers)
+// The act of CALLing gcWriteBarrier will clobber R30 (LR).
+//
+// Defined as ABIInternal since the compiler generates ABIInternal
+// calls to it directly and it does not use the stack-based Go ABI.
+TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$200
+ // Save the registers clobbered by the fast path.
+ STP (R0, R1), 184(RSP)
+ MOVD g_m(g), R0
+ MOVD m_p(R0), R0
+ MOVD (p_wbBuf+wbBuf_next)(R0), R1
+ // Increment wbBuf.next position.
+ ADD $16, R1
+ MOVD R1, (p_wbBuf+wbBuf_next)(R0)
+ MOVD (p_wbBuf+wbBuf_end)(R0), R0
+ CMP R1, R0
+ // Record the write.
+ MOVD R3, -16(R1) // Record value
+ MOVD (R2), R0 // TODO: This turns bad writes into bad reads.
+ MOVD R0, -8(R1) // Record *slot
+ // Is the buffer full? (flags set in CMP above)
+ BEQ flush
+ret:
+ LDP 184(RSP), (R0, R1)
+ // Do the write.
+ MOVD R3, (R2)
+ RET
+
+flush:
+ // Save all general purpose registers since these could be
+ // clobbered by wbBufFlush and were not saved by the caller.
+ // R0 and R1 already saved
+ STP (R2, R3), 1*8(RSP) // Also first and second arguments to wbBufFlush
+ STP (R4, R5), 3*8(RSP)
+ STP (R6, R7), 5*8(RSP)
+ STP (R8, R9), 7*8(RSP)
+ STP (R10, R11), 9*8(RSP)
+ STP (R12, R13), 11*8(RSP)
+ STP (R14, R15), 13*8(RSP)
+ // R16, R17 may be clobbered by linker trampoline
+ // R18 is unused.
+ STP (R19, R20), 15*8(RSP)
+ STP (R21, R22), 17*8(RSP)
+ STP (R23, R24), 19*8(RSP)
+ STP (R25, R26), 21*8(RSP)
+ // R27 is temp register.
+ // R28 is g.
+ // R29 is frame pointer (unused).
+ // R30 is LR, which was saved by the prologue.
+ // R31 is SP.
+
+ // This takes arguments R2 and R3.
+ CALL runtime·wbBufFlush(SB)
+ LDP 1*8(RSP), (R2, R3)
+ LDP 3*8(RSP), (R4, R5)
+ LDP 5*8(RSP), (R6, R7)
+ LDP 7*8(RSP), (R8, R9)
+ LDP 9*8(RSP), (R10, R11)
+ LDP 11*8(RSP), (R12, R13)
+ LDP 13*8(RSP), (R14, R15)
+ LDP 15*8(RSP), (R19, R20)
+ LDP 17*8(RSP), (R21, R22)
+ LDP 19*8(RSP), (R23, R24)
+ LDP 21*8(RSP), (R25, R26)
+ JMP ret
+
+DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
+GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
+
+// debugCallV2 is the entry point for debugger-injected function
+// calls on running goroutines. It informs the runtime that a
+// debug call has been injected and creates a call frame for the
+// debugger to fill in.
+//
+// To inject a function call, a debugger should:
+// 1. Check that the goroutine is in state _Grunning and that
+// there are at least 288 bytes free on the stack.
+// 2. Set SP as SP-16.
+// 3. Store the current LR in (SP) (using the SP after step 2).
+// 4. Store the current PC in the LR register.
+// 5. Write the desired argument frame size at SP-16
+// 6. Save all machine registers (including flags and fpsimd registers)
+// so they can be restored later by the debugger.
+// 7. Set the PC to debugCallV2 and resume execution.
+//
+// If the goroutine is in state _Grunnable, then it's not generally
+// safe to inject a call because it may return out via other runtime
+// operations. Instead, the debugger should unwind the stack to find
+// the return to non-runtime code, add a temporary breakpoint there,
+// and inject the call once that breakpoint is hit.
+//
+// If the goroutine is in any other state, it's not safe to inject a call.
+//
+// This function communicates back to the debugger by setting R20 and
+// invoking BRK to raise a breakpoint signal. Note that the signal PC of
+// the signal triggered by the BRK instruction is the PC where the signal
+// is trapped, not the next PC, so to resume execution, the debugger needs
+// to set the signal PC to PC+4. See the comments in the implementation for
+// the protocol the debugger is expected to follow. InjectDebugCall in the
+// runtime tests demonstrates this protocol.
+//
+// The debugger must ensure that any pointers passed to the function
+// obey escape analysis requirements. Specifically, it must not pass
+// a stack pointer to an escaping argument. debugCallV2 cannot check
+// this invariant.
+//
+// This is ABIInternal because Go code injects its PC directly into new
+// goroutine stacks.
+TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
+ STP (R29, R30), -280(RSP)
+ SUB $272, RSP, RSP
+ SUB $8, RSP, R29
+ // Save all registers that may contain pointers so they can be
+ // conservatively scanned.
+ //
+ // We can't do anything that might clobber any of these
+ // registers before this.
+ STP (R27, g), (30*8)(RSP)
+ STP (R25, R26), (28*8)(RSP)
+ STP (R23, R24), (26*8)(RSP)
+ STP (R21, R22), (24*8)(RSP)
+ STP (R19, R20), (22*8)(RSP)
+ STP (R16, R17), (20*8)(RSP)
+ STP (R14, R15), (18*8)(RSP)
+ STP (R12, R13), (16*8)(RSP)
+ STP (R10, R11), (14*8)(RSP)
+ STP (R8, R9), (12*8)(RSP)
+ STP (R6, R7), (10*8)(RSP)
+ STP (R4, R5), (8*8)(RSP)
+ STP (R2, R3), (6*8)(RSP)
+ STP (R0, R1), (4*8)(RSP)
+
+ // Perform a safe-point check.
+ MOVD R30, 8(RSP) // Caller's PC
+ CALL runtime·debugCallCheck(SB)
+ MOVD 16(RSP), R0
+ CBZ R0, good
+
+ // The safety check failed. Put the reason string at the top
+ // of the stack.
+ MOVD R0, 8(RSP)
+ MOVD 24(RSP), R0
+ MOVD R0, 16(RSP)
+
+ // Set R20 to 8 and invoke BRK. The debugger should get the
+ // reason a call can't be injected from SP+8 and resume execution.
+ MOVD $8, R20
+ BREAK
+ JMP restore
+
+good:
+ // Registers are saved and it's safe to make a call.
+ // Open up a call frame, moving the stack if necessary.
+ //
+ // Once the frame is allocated, this will set R20 to 0 and
+ // invoke BRK. The debugger should write the argument
+ // frame for the call at SP+8, set up argument registers,
+ // set the LR as the signal PC + 4, set the PC to the function
+ // to call, set R26 to point to the closure (if a closure call),
+ // and resume execution.
+ //
+ // If the function returns, this will set R20 to 1 and invoke
+ // BRK. The debugger can then inspect any return value saved
+ // on the stack at SP+8 and in registers. To resume execution,
+ // the debugger should restore the LR from (SP).
+ //
+ // If the function panics, this will set R20 to 2 and invoke BRK.
+ // The interface{} value of the panic will be at SP+8. The debugger
+ // can inspect the panic value and resume execution again.
+#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
+ CMP $MAXSIZE, R0; \
+ BGT 5(PC); \
+ MOVD $NAME(SB), R0; \
+ MOVD R0, 8(RSP); \
+ CALL runtime·debugCallWrap(SB); \
+ JMP restore
+
+ MOVD 256(RSP), R0 // the argument frame size
+ DEBUG_CALL_DISPATCH(debugCall32<>, 32)
+ DEBUG_CALL_DISPATCH(debugCall64<>, 64)
+ DEBUG_CALL_DISPATCH(debugCall128<>, 128)
+ DEBUG_CALL_DISPATCH(debugCall256<>, 256)
+ DEBUG_CALL_DISPATCH(debugCall512<>, 512)
+ DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
+ DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
+ DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
+ DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
+ DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
+ DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
+ DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
+ // The frame size is too large. Report the error.
+ MOVD $debugCallFrameTooLarge<>(SB), R0
+ MOVD R0, 8(RSP)
+ MOVD $20, R0
+ MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string
+ MOVD $8, R20
+ BREAK
+ JMP restore
+
+restore:
+ // Calls and failures resume here.
+ //
+ // Set R20 to 16 and invoke BRK. The debugger should restore
+ // all registers except for PC and RSP and resume execution.
+ MOVD $16, R20
+ BREAK
+ // We must not modify flags after this point.
+
+ // Restore pointer-containing registers, which may have been
+ // modified from the debugger's copy by stack copying.
+ LDP (30*8)(RSP), (R27, g)
+ LDP (28*8)(RSP), (R25, R26)
+ LDP (26*8)(RSP), (R23, R24)
+ LDP (24*8)(RSP), (R21, R22)
+ LDP (22*8)(RSP), (R19, R20)
+ LDP (20*8)(RSP), (R16, R17)
+ LDP (18*8)(RSP), (R14, R15)
+ LDP (16*8)(RSP), (R12, R13)
+ LDP (14*8)(RSP), (R10, R11)
+ LDP (12*8)(RSP), (R8, R9)
+ LDP (10*8)(RSP), (R6, R7)
+ LDP (8*8)(RSP), (R4, R5)
+ LDP (6*8)(RSP), (R2, R3)
+ LDP (4*8)(RSP), (R0, R1)
+
+ LDP -8(RSP), (R29, R27)
+ ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
+ MOVD -16(RSP), R30 // restore old lr
+ JMP (R27)
+
+// runtime.debugCallCheck assumes that functions defined with the
+// DEBUG_CALL_FN macro are safe points to inject calls.
+#define DEBUG_CALL_FN(NAME,MAXSIZE) \
+TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
+ NO_LOCAL_POINTERS; \
+ MOVD $0, R20; \
+ BREAK; \
+ MOVD $1, R20; \
+ BREAK; \
+ RET
+DEBUG_CALL_FN(debugCall32<>, 32)
+DEBUG_CALL_FN(debugCall64<>, 64)
+DEBUG_CALL_FN(debugCall128<>, 128)
+DEBUG_CALL_FN(debugCall256<>, 256)
+DEBUG_CALL_FN(debugCall512<>, 512)
+DEBUG_CALL_FN(debugCall1024<>, 1024)
+DEBUG_CALL_FN(debugCall2048<>, 2048)
+DEBUG_CALL_FN(debugCall4096<>, 4096)
+DEBUG_CALL_FN(debugCall8192<>, 8192)
+DEBUG_CALL_FN(debugCall16384<>, 16384)
+DEBUG_CALL_FN(debugCall32768<>, 32768)
+DEBUG_CALL_FN(debugCall65536<>, 65536)
+
+// func debugCallPanicked(val interface{})
+TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
+ // Copy the panic value to the top of stack at SP+8.
+ MOVD val_type+0(FP), R0
+ MOVD R0, 8(RSP)
+ MOVD val_data+8(FP), R0
+ MOVD R0, 16(RSP)
+ MOVD $2, R20
+ BREAK
+ RET
+
+// Note: these functions use a special calling convention to save generated code space.
+// Arguments are passed in registers, but the space for those arguments are allocated
+// in the caller's stack frame. These stubs write the args into that stack space and
+// then tail call to the corresponding runtime handler.
+// The tail call makes these stubs disappear in backtraces.
+//
+// Defined as ABIInternal since the compiler generates ABIInternal
+// calls to it directly and it does not use the stack-based Go ABI.
+TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
+ JMP runtime·goPanicIndex<ABIInternal>(SB)
+TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
+ JMP runtime·goPanicIndexU<ABIInternal>(SB)
+TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R1, R0
+ MOVD R2, R1
+ JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
+TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R1, R0
+ MOVD R2, R1
+ JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
+TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R1, R0
+ MOVD R2, R1
+ JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
+TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R1, R0
+ MOVD R2, R1
+ JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
+TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
+ JMP runtime·goPanicSliceB<ABIInternal>(SB)
+TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
+ JMP runtime·goPanicSliceBU<ABIInternal>(SB)
+TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R2, R0
+ MOVD R3, R1
+ JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
+TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R2, R0
+ MOVD R3, R1
+ JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
+TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R2, R0
+ MOVD R3, R1
+ JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
+TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R2, R0
+ MOVD R3, R1
+ JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
+TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R1, R0
+ MOVD R2, R1
+ JMP runtime·goPanicSlice3B<ABIInternal>(SB)
+TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R1, R0
+ MOVD R2, R1
+ JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
+TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
+ JMP runtime·goPanicSlice3C<ABIInternal>(SB)
+TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
+ JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
+TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R2, R0
+ MOVD R3, R1
+ JMP runtime·goPanicSliceConvert<ABIInternal>(SB)