diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/runtime/mkpreempt.go | |
parent | Initial commit. (diff) | |
download | golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/runtime/mkpreempt.go')
-rw-r--r-- | src/runtime/mkpreempt.go | 626 |
1 files changed, 626 insertions, 0 deletions
diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go new file mode 100644 index 0000000..61d2d02 --- /dev/null +++ b/src/runtime/mkpreempt.go @@ -0,0 +1,626 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +// mkpreempt generates the asyncPreempt functions for each +// architecture. +package main + +import ( + "flag" + "fmt" + "io" + "log" + "os" + "strings" +) + +// Copied from cmd/compile/internal/ssa/gen/*Ops.go + +var regNames386 = []string{ + "AX", + "CX", + "DX", + "BX", + "SP", + "BP", + "SI", + "DI", + "X0", + "X1", + "X2", + "X3", + "X4", + "X5", + "X6", + "X7", +} + +var regNamesAMD64 = []string{ + "AX", + "CX", + "DX", + "BX", + "SP", + "BP", + "SI", + "DI", + "R8", + "R9", + "R10", + "R11", + "R12", + "R13", + "R14", + "R15", + "X0", + "X1", + "X2", + "X3", + "X4", + "X5", + "X6", + "X7", + "X8", + "X9", + "X10", + "X11", + "X12", + "X13", + "X14", + "X15", +} + +var out io.Writer + +var arches = map[string]func(){ + "386": gen386, + "amd64": genAMD64, + "arm": genARM, + "arm64": genARM64, + "loong64": genLoong64, + "mips64x": func() { genMIPS(true) }, + "mipsx": func() { genMIPS(false) }, + "ppc64x": genPPC64, + "riscv64": genRISCV64, + "s390x": genS390X, + "wasm": genWasm, +} +var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true} + +func main() { + flag.Parse() + if flag.NArg() > 0 { + out = os.Stdout + for _, arch := range flag.Args() { + gen, ok := arches[arch] + if !ok { + log.Fatalf("unknown arch %s", arch) + } + header(arch) + gen() + } + return + } + + for arch, gen := range arches { + f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch)) + if err != nil { + log.Fatal(err) + } + out = f + header(arch) + gen() + if err := f.Close(); err != nil { + log.Fatal(err) + } + } +} + +func header(arch string) { + fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n") + if beLe[arch] { + base := arch[:len(arch)-1] + fmt.Fprintf(out, "//go:build %s || %sle\n\n", base, base) + } + fmt.Fprintf(out, "#include \"go_asm.h\"\n") + if arch == "amd64" { + fmt.Fprintf(out, "#include \"asm_amd64.h\"\n") + } + fmt.Fprintf(out, "#include \"textflag.h\"\n\n") + fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n") +} + +func p(f string, args ...any) { + fmted := fmt.Sprintf(f, args...) + fmt.Fprintf(out, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t")) +} + +func label(l string) { + fmt.Fprintf(out, "%s\n", l) +} + +type layout struct { + stack int + regs []regPos + sp string // stack pointer register +} + +type regPos struct { + pos int + + saveOp string + restoreOp string + reg string + + // If this register requires special save and restore, these + // give those operations with a %d placeholder for the stack + // offset. + save, restore string +} + +func (l *layout) add(op, reg string, size int) { + l.regs = append(l.regs, regPos{saveOp: op, restoreOp: op, reg: reg, pos: l.stack}) + l.stack += size +} + +func (l *layout) add2(sop, rop, reg string, size int) { + l.regs = append(l.regs, regPos{saveOp: sop, restoreOp: rop, reg: reg, pos: l.stack}) + l.stack += size +} + +func (l *layout) addSpecial(save, restore string, size int) { + l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack}) + l.stack += size +} + +func (l *layout) save() { + for _, reg := range l.regs { + if reg.save != "" { + p(reg.save, reg.pos) + } else { + p("%s %s, %d(%s)", reg.saveOp, reg.reg, reg.pos, l.sp) + } + } +} + +func (l *layout) restore() { + for i := len(l.regs) - 1; i >= 0; i-- { + reg := l.regs[i] + if reg.restore != "" { + p(reg.restore, reg.pos) + } else { + p("%s %d(%s), %s", reg.restoreOp, reg.pos, l.sp, reg.reg) + } + } +} + +func gen386() { + p("PUSHFL") + // Save general purpose registers. + var l = layout{sp: "SP"} + for _, reg := range regNames386 { + if reg == "SP" || strings.HasPrefix(reg, "X") { + continue + } + l.add("MOVL", reg, 4) + } + + softfloat := "GO386_softfloat" + + // Save SSE state only if supported. + lSSE := layout{stack: l.stack, sp: "SP"} + for i := 0; i < 8; i++ { + lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16) + } + + p("ADJSP $%d", lSSE.stack) + p("NOP SP") + l.save() + p("#ifndef %s", softfloat) + lSSE.save() + p("#endif") + p("CALL ·asyncPreempt2(SB)") + p("#ifndef %s", softfloat) + lSSE.restore() + p("#endif") + l.restore() + p("ADJSP $%d", -lSSE.stack) + + p("POPFL") + p("RET") +} + +func genAMD64() { + // Assign stack offsets. + var l = layout{sp: "SP"} + for _, reg := range regNamesAMD64 { + if reg == "SP" || reg == "BP" { + continue + } + if !strings.HasPrefix(reg, "X") { + l.add("MOVQ", reg, 8) + } + } + lSSE := layout{stack: l.stack, sp: "SP"} + for _, reg := range regNamesAMD64 { + if strings.HasPrefix(reg, "X") { + lSSE.add("MOVUPS", reg, 16) + } + } + + // TODO: MXCSR register? + + p("PUSHQ BP") + p("MOVQ SP, BP") + p("// Save flags before clobbering them") + p("PUSHFQ") + p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP") + p("ADJSP $%d", lSSE.stack) + p("// But vet doesn't know ADJSP, so suppress vet stack checking") + p("NOP SP") + + l.save() + + // Apparently, the signal handling code path in darwin kernel leaves + // the upper bits of Y registers in a dirty state, which causes + // many SSE operations (128-bit and narrower) become much slower. + // Clear the upper bits to get to a clean state. See issue #37174. + // It is safe here as Go code don't use the upper bits of Y registers. + p("#ifdef GOOS_darwin") + p("#ifndef hasAVX") + p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0") + p("JE 2(PC)") + p("#endif") + p("VZEROUPPER") + p("#endif") + + lSSE.save() + p("CALL ·asyncPreempt2(SB)") + lSSE.restore() + l.restore() + p("ADJSP $%d", -lSSE.stack) + p("POPFQ") + p("POPQ BP") + p("RET") +} + +func genARM() { + // Add integer registers R0-R12. + // R13 (SP), R14 (LR), R15 (PC) are special and not saved here. + var l = layout{sp: "R13", stack: 4} // add LR slot + for i := 0; i <= 12; i++ { + reg := fmt.Sprintf("R%d", i) + if i == 10 { + continue // R10 is g register, no need to save/restore + } + l.add("MOVW", reg, 4) + } + // Add flag register. + l.addSpecial( + "MOVW CPSR, R0\nMOVW R0, %d(R13)", + "MOVW %d(R13), R0\nMOVW R0, CPSR", + 4) + + // Add floating point registers F0-F15 and flag register. + var lfp = layout{stack: l.stack, sp: "R13"} + lfp.addSpecial( + "MOVW FPCR, R0\nMOVW R0, %d(R13)", + "MOVW %d(R13), R0\nMOVW R0, FPCR", + 4) + for i := 0; i <= 15; i++ { + reg := fmt.Sprintf("F%d", i) + lfp.add("MOVD", reg, 8) + } + + p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR + l.save() + p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp") // test goarm, and skip FP registers if goarm=5. + lfp.save() + label("nofp:") + p("CALL ·asyncPreempt2(SB)") + p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp2") // test goarm, and skip FP registers if goarm=5. + lfp.restore() + label("nofp2:") + l.restore() + + p("MOVW %d(R13), R14", lfp.stack) // sigctxt.pushCall pushes LR on stack, restore it + p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall) + p("UNDEF") // shouldn't get here +} + +func genARM64() { + // Add integer registers R0-R26 + // R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special + // and not saved here. + var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction + for i := 0; i < 26; i += 2 { + if i == 18 { + i-- + continue // R18 is not used, skip + } + reg := fmt.Sprintf("(R%d, R%d)", i, i+1) + l.add2("STP", "LDP", reg, 16) + } + // Add flag registers. + l.addSpecial( + "MOVD NZCV, R0\nMOVD R0, %d(RSP)", + "MOVD %d(RSP), R0\nMOVD R0, NZCV", + 8) + l.addSpecial( + "MOVD FPSR, R0\nMOVD R0, %d(RSP)", + "MOVD %d(RSP), R0\nMOVD R0, FPSR", + 8) + // TODO: FPCR? I don't think we'll change it, so no need to save. + // Add floating point registers F0-F31. + for i := 0; i < 31; i += 2 { + reg := fmt.Sprintf("(F%d, F%d)", i, i+1) + l.add2("FSTPD", "FLDPD", reg, 16) + } + if l.stack%16 != 0 { + l.stack += 8 // SP needs 16-byte alignment + } + + // allocate frame, save PC of interrupted instruction (in LR) + p("MOVD R30, %d(RSP)", -l.stack) + p("SUB $%d, RSP", l.stack) + p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux) + p("SUB $8, RSP, R29") // set up new frame pointer + // On iOS, save the LR again after decrementing SP. We run the + // signal handler on the G stack (as it doesn't support sigaltstack), + // so any writes below SP may be clobbered. + p("#ifdef GOOS_ios") + p("MOVD R30, (RSP)") + p("#endif") + + l.save() + p("CALL ·asyncPreempt2(SB)") + l.restore() + + p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it + p("MOVD -8(RSP), R29") // restore frame pointer + p("MOVD (RSP), R27") // load PC to REGTMP + p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall) + p("JMP (R27)") +} + +func genMIPS(_64bit bool) { + mov := "MOVW" + movf := "MOVF" + add := "ADD" + sub := "SUB" + r28 := "R28" + regsize := 4 + softfloat := "GOMIPS_softfloat" + if _64bit { + mov = "MOVV" + movf = "MOVD" + add = "ADDV" + sub = "SUBV" + r28 = "RSB" + regsize = 8 + softfloat = "GOMIPS64_softfloat" + } + + // Add integer registers R1-R22, R24-R25, R28 + // R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special, + // and not saved here. R26 and R27 are reserved by kernel and not used. + var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR) + for i := 1; i <= 25; i++ { + if i == 23 { + continue // R23 is REGTMP + } + reg := fmt.Sprintf("R%d", i) + l.add(mov, reg, regsize) + } + l.add(mov, r28, regsize) + l.addSpecial( + mov+" HI, R1\n"+mov+" R1, %d(R29)", + mov+" %d(R29), R1\n"+mov+" R1, HI", + regsize) + l.addSpecial( + mov+" LO, R1\n"+mov+" R1, %d(R29)", + mov+" %d(R29), R1\n"+mov+" R1, LO", + regsize) + + // Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant) + var lfp = layout{sp: "R29", stack: l.stack} + lfp.addSpecial( + mov+" FCR31, R1\n"+mov+" R1, %d(R29)", + mov+" %d(R29), R1\n"+mov+" R1, FCR31", + regsize) + // Add floating point registers F0-F31. + for i := 0; i <= 31; i++ { + reg := fmt.Sprintf("F%d", i) + lfp.add(movf, reg, regsize) + } + + // allocate frame, save PC of interrupted instruction (in LR) + p(mov+" R31, -%d(R29)", lfp.stack) + p(sub+" $%d, R29", lfp.stack) + + l.save() + p("#ifndef %s", softfloat) + lfp.save() + p("#endif") + p("CALL ·asyncPreempt2(SB)") + p("#ifndef %s", softfloat) + lfp.restore() + p("#endif") + l.restore() + + p(mov+" %d(R29), R31", lfp.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it + p(mov + " (R29), R23") // load PC to REGTMP + p(add+" $%d, R29", lfp.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall) + p("JMP (R23)") +} + +func genLoong64() { + mov := "MOVV" + movf := "MOVD" + add := "ADDV" + sub := "SUBV" + r31 := "RSB" + regsize := 8 + + // Add integer registers r4-r21 r23-r29 r31 + // R0 (zero), R30 (REGTMP), R2 (tp), R3 (SP), R22 (g), R1 (LR) are special, + var l = layout{sp: "R3", stack: regsize} // add slot to save PC of interrupted instruction (in LR) + for i := 4; i <= 29; i++ { + if i == 22 { + continue // R3 is REGSP R22 is g + } + reg := fmt.Sprintf("R%d", i) + l.add(mov, reg, regsize) + } + l.add(mov, r31, regsize) + + // Add floating point registers F0-F31. + for i := 0; i <= 31; i++ { + reg := fmt.Sprintf("F%d", i) + l.add(movf, reg, regsize) + } + + // allocate frame, save PC of interrupted instruction (in LR) + p(mov+" R1, -%d(R3)", l.stack) + p(sub+" $%d, R3", l.stack) + + l.save() + p("CALL ·asyncPreempt2(SB)") + l.restore() + + p(mov+" %d(R3), R1", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it + p(mov + " (R3), R30") // load PC to REGTMP + p(add+" $%d, R3", l.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall) + p("JMP (R30)") +} + +func genPPC64() { + // Add integer registers R3-R29 + // R0 (zero), R1 (SP), R30 (g) are special and not saved here. + // R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall. + // R31 (REGTMP) will be saved manually. + var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31 + for i := 3; i <= 29; i++ { + if i == 12 || i == 13 { + // R12 has been saved in sigctxt.pushCall. + // R13 is TLS pointer, not used by Go code. we must NOT + // restore it, otherwise if we parked and resumed on a + // different thread we'll mess up TLS addresses. + continue + } + reg := fmt.Sprintf("R%d", i) + l.add("MOVD", reg, 8) + } + l.addSpecial( + "MOVW CR, R31\nMOVW R31, %d(R1)", + "MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR + 8) // CR is 4-byte wide, but just keep the alignment + l.addSpecial( + "MOVD XER, R31\nMOVD R31, %d(R1)", + "MOVD %d(R1), R31\nMOVD R31, XER", + 8) + // Add floating point registers F0-F31. + for i := 0; i <= 31; i++ { + reg := fmt.Sprintf("F%d", i) + l.add("FMOVD", reg, 8) + } + // Add floating point control/status register FPSCR. + l.addSpecial( + "MOVFL FPSCR, F0\nFMOVD F0, %d(R1)", + "FMOVD %d(R1), F0\nMOVFL F0, FPSCR", + 8) + + p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR + p("MOVD LR, R31") + p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR) + + l.save() + p("CALL ·asyncPreempt2(SB)") + l.restore() + + p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them + p("MOVD R31, LR") + p("MOVD %d(R1), R2", l.stack+8) + p("MOVD %d(R1), R12", l.stack+16) + p("MOVD (R1), R31") // load PC to CTR + p("MOVD R31, CTR") + p("MOVD 32(R1), R31") // restore R31 + p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall) + p("JMP (CTR)") +} + +func genRISCV64() { + // X0 (zero), X1 (LR), X2 (SP), X3 (GP), X4 (TP), X27 (g), X31 (TMP) are special. + var l = layout{sp: "X2", stack: 8} + + // Add integer registers (X5-X26, X28-30). + for i := 5; i < 31; i++ { + if i == 27 { + continue + } + reg := fmt.Sprintf("X%d", i) + l.add("MOV", reg, 8) + } + + // Add floating point registers (F0-F31). + for i := 0; i <= 31; i++ { + reg := fmt.Sprintf("F%d", i) + l.add("MOVD", reg, 8) + } + + p("MOV X1, -%d(X2)", l.stack) + p("ADD $-%d, X2", l.stack) + l.save() + p("CALL ·asyncPreempt2(SB)") + l.restore() + p("MOV %d(X2), X1", l.stack) + p("MOV (X2), X31") + p("ADD $%d, X2", l.stack+8) + p("JMP (X31)") +} + +func genS390X() { + // Add integer registers R0-R12 + // R13 (g), R14 (LR), R15 (SP) are special, and not saved here. + // Saving R10 (REGTMP) is not necessary, but it is saved anyway. + var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags + l.addSpecial( + "STMG R0, R12, %d(R15)", + "LMG %d(R15), R0, R12", + 13*8) + // Add floating point registers F0-F31. + for i := 0; i <= 15; i++ { + reg := fmt.Sprintf("F%d", i) + l.add("FMOVD", reg, 8) + } + + // allocate frame, save PC of interrupted instruction (in LR) and flags (condition code) + p("IPM R10") // save flags upfront, as ADD will clobber flags + p("MOVD R14, -%d(R15)", l.stack) + p("ADD $-%d, R15", l.stack) + p("MOVW R10, 8(R15)") // save flags + + l.save() + p("CALL ·asyncPreempt2(SB)") + l.restore() + + p("MOVD %d(R15), R14", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it + p("ADD $%d, R15", l.stack+8) // pop frame (including the space pushed by sigctxt.pushCall) + p("MOVWZ -%d(R15), R10", l.stack) // load flags to REGTMP + p("TMLH R10, $(3<<12)") // restore flags + p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP + p("JMP (R10)") +} + +func genWasm() { + p("// No async preemption on wasm") + p("UNDEF") +} + +func notImplemented() { + p("// Not implemented yet") + p("JMP ·abort(SB)") +} |