summaryrefslogtreecommitdiffstats
path: root/src/runtime/mkpreempt.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:25:22 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:25:22 +0000
commitf6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch)
tree7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/runtime/mkpreempt.go
parentInitial commit. (diff)
downloadgolang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz
golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip
Adding upstream version 1.22.1.upstream/1.22.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/runtime/mkpreempt.go')
-rw-r--r--src/runtime/mkpreempt.go630
1 files changed, 630 insertions, 0 deletions
diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go
new file mode 100644
index 0000000..17544d6
--- /dev/null
+++ b/src/runtime/mkpreempt.go
@@ -0,0 +1,630 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+// mkpreempt generates the asyncPreempt functions for each
+// architecture.
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "strings"
+)
+
+// Copied from cmd/compile/internal/ssa/gen/*Ops.go
+
+var regNames386 = []string{
+ "AX",
+ "CX",
+ "DX",
+ "BX",
+ "SP",
+ "BP",
+ "SI",
+ "DI",
+ "X0",
+ "X1",
+ "X2",
+ "X3",
+ "X4",
+ "X5",
+ "X6",
+ "X7",
+}
+
+var regNamesAMD64 = []string{
+ "AX",
+ "CX",
+ "DX",
+ "BX",
+ "SP",
+ "BP",
+ "SI",
+ "DI",
+ "R8",
+ "R9",
+ "R10",
+ "R11",
+ "R12",
+ "R13",
+ "R14",
+ "R15",
+ "X0",
+ "X1",
+ "X2",
+ "X3",
+ "X4",
+ "X5",
+ "X6",
+ "X7",
+ "X8",
+ "X9",
+ "X10",
+ "X11",
+ "X12",
+ "X13",
+ "X14",
+ "X15",
+}
+
+var out io.Writer
+
+var arches = map[string]func(){
+ "386": gen386,
+ "amd64": genAMD64,
+ "arm": genARM,
+ "arm64": genARM64,
+ "loong64": genLoong64,
+ "mips64x": func() { genMIPS(true) },
+ "mipsx": func() { genMIPS(false) },
+ "ppc64x": genPPC64,
+ "riscv64": genRISCV64,
+ "s390x": genS390X,
+ "wasm": genWasm,
+}
+var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true}
+
+func main() {
+ flag.Parse()
+ if flag.NArg() > 0 {
+ out = os.Stdout
+ for _, arch := range flag.Args() {
+ gen, ok := arches[arch]
+ if !ok {
+ log.Fatalf("unknown arch %s", arch)
+ }
+ header(arch)
+ gen()
+ }
+ return
+ }
+
+ for arch, gen := range arches {
+ f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch))
+ if err != nil {
+ log.Fatal(err)
+ }
+ out = f
+ header(arch)
+ gen()
+ if err := f.Close(); err != nil {
+ log.Fatal(err)
+ }
+ }
+}
+
+func header(arch string) {
+ fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
+ if beLe[arch] {
+ base := arch[:len(arch)-1]
+ fmt.Fprintf(out, "//go:build %s || %sle\n\n", base, base)
+ }
+ fmt.Fprintf(out, "#include \"go_asm.h\"\n")
+ if arch == "amd64" {
+ fmt.Fprintf(out, "#include \"asm_amd64.h\"\n")
+ }
+ fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
+ fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
+}
+
+func p(f string, args ...any) {
+ fmted := fmt.Sprintf(f, args...)
+ fmt.Fprintf(out, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t"))
+}
+
+func label(l string) {
+ fmt.Fprintf(out, "%s\n", l)
+}
+
+type layout struct {
+ stack int
+ regs []regPos
+ sp string // stack pointer register
+}
+
+type regPos struct {
+ pos int
+
+ saveOp string
+ restoreOp string
+ reg string
+
+ // If this register requires special save and restore, these
+ // give those operations with a %d placeholder for the stack
+ // offset.
+ save, restore string
+}
+
+func (l *layout) add(op, reg string, size int) {
+ l.regs = append(l.regs, regPos{saveOp: op, restoreOp: op, reg: reg, pos: l.stack})
+ l.stack += size
+}
+
+func (l *layout) add2(sop, rop, reg string, size int) {
+ l.regs = append(l.regs, regPos{saveOp: sop, restoreOp: rop, reg: reg, pos: l.stack})
+ l.stack += size
+}
+
+func (l *layout) addSpecial(save, restore string, size int) {
+ l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack})
+ l.stack += size
+}
+
+func (l *layout) save() {
+ for _, reg := range l.regs {
+ if reg.save != "" {
+ p(reg.save, reg.pos)
+ } else {
+ p("%s %s, %d(%s)", reg.saveOp, reg.reg, reg.pos, l.sp)
+ }
+ }
+}
+
+func (l *layout) restore() {
+ for i := len(l.regs) - 1; i >= 0; i-- {
+ reg := l.regs[i]
+ if reg.restore != "" {
+ p(reg.restore, reg.pos)
+ } else {
+ p("%s %d(%s), %s", reg.restoreOp, reg.pos, l.sp, reg.reg)
+ }
+ }
+}
+
+func gen386() {
+ p("PUSHFL")
+ // Save general purpose registers.
+ var l = layout{sp: "SP"}
+ for _, reg := range regNames386 {
+ if reg == "SP" || strings.HasPrefix(reg, "X") {
+ continue
+ }
+ l.add("MOVL", reg, 4)
+ }
+
+ softfloat := "GO386_softfloat"
+
+ // Save SSE state only if supported.
+ lSSE := layout{stack: l.stack, sp: "SP"}
+ for i := 0; i < 8; i++ {
+ lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16)
+ }
+
+ p("ADJSP $%d", lSSE.stack)
+ p("NOP SP")
+ l.save()
+ p("#ifndef %s", softfloat)
+ lSSE.save()
+ p("#endif")
+ p("CALL ·asyncPreempt2(SB)")
+ p("#ifndef %s", softfloat)
+ lSSE.restore()
+ p("#endif")
+ l.restore()
+ p("ADJSP $%d", -lSSE.stack)
+
+ p("POPFL")
+ p("RET")
+}
+
+func genAMD64() {
+ // Assign stack offsets.
+ var l = layout{sp: "SP"}
+ for _, reg := range regNamesAMD64 {
+ if reg == "SP" || reg == "BP" {
+ continue
+ }
+ if !strings.HasPrefix(reg, "X") {
+ l.add("MOVQ", reg, 8)
+ }
+ }
+ lSSE := layout{stack: l.stack, sp: "SP"}
+ for _, reg := range regNamesAMD64 {
+ if strings.HasPrefix(reg, "X") {
+ lSSE.add("MOVUPS", reg, 16)
+ }
+ }
+
+ // TODO: MXCSR register?
+
+ p("PUSHQ BP")
+ p("MOVQ SP, BP")
+ p("// Save flags before clobbering them")
+ p("PUSHFQ")
+ p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
+ p("ADJSP $%d", lSSE.stack)
+ p("// But vet doesn't know ADJSP, so suppress vet stack checking")
+ p("NOP SP")
+
+ l.save()
+
+ // Apparently, the signal handling code path in darwin kernel leaves
+ // the upper bits of Y registers in a dirty state, which causes
+ // many SSE operations (128-bit and narrower) become much slower.
+ // Clear the upper bits to get to a clean state. See issue #37174.
+ // It is safe here as Go code don't use the upper bits of Y registers.
+ p("#ifdef GOOS_darwin")
+ p("#ifndef hasAVX")
+ p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
+ p("JE 2(PC)")
+ p("#endif")
+ p("VZEROUPPER")
+ p("#endif")
+
+ lSSE.save()
+ p("CALL ·asyncPreempt2(SB)")
+ lSSE.restore()
+ l.restore()
+ p("ADJSP $%d", -lSSE.stack)
+ p("POPFQ")
+ p("POPQ BP")
+ p("RET")
+}
+
+func genARM() {
+ // Add integer registers R0-R12.
+ // R13 (SP), R14 (LR), R15 (PC) are special and not saved here.
+ var l = layout{sp: "R13", stack: 4} // add LR slot
+ for i := 0; i <= 12; i++ {
+ reg := fmt.Sprintf("R%d", i)
+ if i == 10 {
+ continue // R10 is g register, no need to save/restore
+ }
+ l.add("MOVW", reg, 4)
+ }
+ // Add flag register.
+ l.addSpecial(
+ "MOVW CPSR, R0\nMOVW R0, %d(R13)",
+ "MOVW %d(R13), R0\nMOVW R0, CPSR",
+ 4)
+
+ // Add floating point registers F0-F15 and flag register.
+ var lfp = layout{stack: l.stack, sp: "R13"}
+ lfp.addSpecial(
+ "MOVW FPCR, R0\nMOVW R0, %d(R13)",
+ "MOVW %d(R13), R0\nMOVW R0, FPCR",
+ 4)
+ for i := 0; i <= 15; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ lfp.add("MOVD", reg, 8)
+ }
+
+ p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR
+ l.save()
+ p("MOVB ·goarmsoftfp(SB), R0\nCMP $0, R0\nBNE nofp") // test goarmsoftfp, and skip FP registers if goarmsoftfp!=0.
+ lfp.save()
+ label("nofp:")
+ p("CALL ·asyncPreempt2(SB)")
+ p("MOVB ·goarmsoftfp(SB), R0\nCMP $0, R0\nBNE nofp2") // test goarmsoftfp, and skip FP registers if goarmsoftfp!=0.
+ lfp.restore()
+ label("nofp2:")
+ l.restore()
+
+ p("MOVW %d(R13), R14", lfp.stack) // sigctxt.pushCall pushes LR on stack, restore it
+ p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall)
+ p("UNDEF") // shouldn't get here
+}
+
+func genARM64() {
+ // Add integer registers R0-R26
+ // R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
+ // and not saved here.
+ var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
+ for i := 0; i < 26; i += 2 {
+ if i == 18 {
+ i--
+ continue // R18 is not used, skip
+ }
+ reg := fmt.Sprintf("(R%d, R%d)", i, i+1)
+ l.add2("STP", "LDP", reg, 16)
+ }
+ // Add flag registers.
+ l.addSpecial(
+ "MOVD NZCV, R0\nMOVD R0, %d(RSP)",
+ "MOVD %d(RSP), R0\nMOVD R0, NZCV",
+ 8)
+ l.addSpecial(
+ "MOVD FPSR, R0\nMOVD R0, %d(RSP)",
+ "MOVD %d(RSP), R0\nMOVD R0, FPSR",
+ 8)
+ // TODO: FPCR? I don't think we'll change it, so no need to save.
+ // Add floating point registers F0-F31.
+ for i := 0; i < 31; i += 2 {
+ reg := fmt.Sprintf("(F%d, F%d)", i, i+1)
+ l.add2("FSTPD", "FLDPD", reg, 16)
+ }
+ if l.stack%16 != 0 {
+ l.stack += 8 // SP needs 16-byte alignment
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR)
+ p("MOVD R30, %d(RSP)", -l.stack)
+ p("SUB $%d, RSP", l.stack)
+ p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
+ p("SUB $8, RSP, R29") // set up new frame pointer
+ // On iOS, save the LR again after decrementing SP. We run the
+ // signal handler on the G stack (as it doesn't support sigaltstack),
+ // so any writes below SP may be clobbered.
+ p("#ifdef GOOS_ios")
+ p("MOVD R30, (RSP)")
+ p("#endif")
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p("MOVD -8(RSP), R29") // restore frame pointer
+ p("MOVD (RSP), R27") // load PC to REGTMP
+ p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (R27)")
+}
+
+func genMIPS(_64bit bool) {
+ mov := "MOVW"
+ movf := "MOVF"
+ add := "ADD"
+ sub := "SUB"
+ r28 := "R28"
+ regsize := 4
+ softfloat := "GOMIPS_softfloat"
+ if _64bit {
+ mov = "MOVV"
+ movf = "MOVD"
+ add = "ADDV"
+ sub = "SUBV"
+ r28 = "RSB"
+ regsize = 8
+ softfloat = "GOMIPS64_softfloat"
+ }
+
+ // Add integer registers R1-R22, R24-R25, R28
+ // R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special,
+ // and not saved here. R26 and R27 are reserved by kernel and not used.
+ var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
+ for i := 1; i <= 25; i++ {
+ if i == 23 {
+ continue // R23 is REGTMP
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add(mov, reg, regsize)
+ }
+ l.add(mov, r28, regsize)
+ l.addSpecial(
+ mov+" HI, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, HI",
+ regsize)
+ l.addSpecial(
+ mov+" LO, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, LO",
+ regsize)
+
+ // Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant)
+ var lfp = layout{sp: "R29", stack: l.stack}
+ lfp.addSpecial(
+ mov+" FCR31, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, FCR31",
+ regsize)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ lfp.add(movf, reg, regsize)
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR)
+ p(mov+" R31, -%d(R29)", lfp.stack)
+ p(sub+" $%d, R29", lfp.stack)
+
+ l.save()
+ p("#ifndef %s", softfloat)
+ lfp.save()
+ p("#endif")
+ p("CALL ·asyncPreempt2(SB)")
+ p("#ifndef %s", softfloat)
+ lfp.restore()
+ p("#endif")
+ l.restore()
+
+ p(mov+" %d(R29), R31", lfp.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p(mov + " (R29), R23") // load PC to REGTMP
+ p(add+" $%d, R29", lfp.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (R23)")
+}
+
+func genLoong64() {
+ mov := "MOVV"
+ movf := "MOVD"
+ add := "ADDV"
+ sub := "SUBV"
+ regsize := 8
+
+ // Add integer registers r4-r21 r23-r29 r31
+ // R0 (zero), R30 (REGTMP), R2 (tp), R3 (SP), R22 (g), R1 (LR) are special,
+ var l = layout{sp: "R3", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
+ for i := 4; i <= 31; i++ {
+ if i == 22 || i == 30 {
+ continue
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add(mov, reg, regsize)
+ }
+
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add(movf, reg, regsize)
+ }
+
+ // save/restore FCC0
+ l.addSpecial(
+ mov+" FCC0, R4\n"+mov+" R4, %d(R3)",
+ mov+" %d(R3), R4\n"+mov+" R4, FCC0",
+ regsize)
+
+ // allocate frame, save PC of interrupted instruction (in LR)
+ p(mov+" R1, -%d(R3)", l.stack)
+ p(sub+" $%d, R3", l.stack)
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p(mov+" %d(R3), R1", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p(mov + " (R3), R30") // load PC to REGTMP
+ p(add+" $%d, R3", l.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (R30)")
+}
+
+func genPPC64() {
+ // Add integer registers R3-R29
+ // R0 (zero), R1 (SP), R30 (g) are special and not saved here.
+ // R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
+ // R31 (REGTMP) will be saved manually.
+ var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
+ for i := 3; i <= 29; i++ {
+ if i == 12 || i == 13 {
+ // R12 has been saved in sigctxt.pushCall.
+ // R13 is TLS pointer, not used by Go code. we must NOT
+ // restore it, otherwise if we parked and resumed on a
+ // different thread we'll mess up TLS addresses.
+ continue
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add("MOVD", reg, 8)
+ }
+ l.addSpecial(
+ "MOVW CR, R31\nMOVW R31, %d(R1)",
+ "MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
+ 8) // CR is 4-byte wide, but just keep the alignment
+ l.addSpecial(
+ "MOVD XER, R31\nMOVD R31, %d(R1)",
+ "MOVD %d(R1), R31\nMOVD R31, XER",
+ 8)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("FMOVD", reg, 8)
+ }
+ // Add floating point control/status register FPSCR.
+ l.addSpecial(
+ "MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
+ "FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
+ 8)
+
+ p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
+ p("MOVD LR, R31")
+ p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
+ p("MOVD R31, LR")
+ p("MOVD %d(R1), R2", l.stack+8)
+ p("MOVD %d(R1), R12", l.stack+16)
+ p("MOVD (R1), R31") // load PC to CTR
+ p("MOVD R31, CTR")
+ p("MOVD 32(R1), R31") // restore R31
+ p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (CTR)")
+}
+
+func genRISCV64() {
+ // X0 (zero), X1 (LR), X2 (SP), X3 (GP), X4 (TP), X27 (g), X31 (TMP) are special.
+ var l = layout{sp: "X2", stack: 8}
+
+ // Add integer registers (X5-X26, X28-30).
+ for i := 5; i < 31; i++ {
+ if i == 27 {
+ continue
+ }
+ reg := fmt.Sprintf("X%d", i)
+ l.add("MOV", reg, 8)
+ }
+
+ // Add floating point registers (F0-F31).
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("MOVD", reg, 8)
+ }
+
+ p("MOV X1, -%d(X2)", l.stack)
+ p("SUB $%d, X2", l.stack)
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+ p("MOV %d(X2), X1", l.stack)
+ p("MOV (X2), X31")
+ p("ADD $%d, X2", l.stack+8)
+ p("JMP (X31)")
+}
+
+func genS390X() {
+ // Add integer registers R0-R12
+ // R13 (g), R14 (LR), R15 (SP) are special, and not saved here.
+ // Saving R10 (REGTMP) is not necessary, but it is saved anyway.
+ var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags
+ l.addSpecial(
+ "STMG R0, R12, %d(R15)",
+ "LMG %d(R15), R0, R12",
+ 13*8)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 15; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("FMOVD", reg, 8)
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR) and flags (condition code)
+ p("IPM R10") // save flags upfront, as ADD will clobber flags
+ p("MOVD R14, -%d(R15)", l.stack)
+ p("ADD $-%d, R15", l.stack)
+ p("MOVW R10, 8(R15)") // save flags
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(R15), R14", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p("ADD $%d, R15", l.stack+8) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("MOVWZ -%d(R15), R10", l.stack) // load flags to REGTMP
+ p("TMLH R10, $(3<<12)") // restore flags
+ p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP
+ p("JMP (R10)")
+}
+
+func genWasm() {
+ p("// No async preemption on wasm")
+ p("UNDEF")
+}
+
+func notImplemented() {
+ p("// Not implemented yet")
+ p("JMP ·abort(SB)")
+}