diff options
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/main.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/main.go | 541 |
1 files changed, 541 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/main.go b/src/cmd/compile/internal/ssa/gen/main.go new file mode 100644 index 0000000..dfa146a --- /dev/null +++ b/src/cmd/compile/internal/ssa/gen/main.go @@ -0,0 +1,541 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// The gen command generates Go code (in the parent directory) for all +// the architecture-specific opcodes, blocks, and rewrites. +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "io/ioutil" + "log" + "os" + "path" + "regexp" + "runtime" + "runtime/pprof" + "runtime/trace" + "sort" + "strings" + "sync" +) + +// TODO: capitalize these types, so that we can more easily tell variable names +// apart from type names, and avoid awkward func parameters like "arch arch". + +type arch struct { + name string + pkg string // obj package to import for this arch. + genfile string // source file containing opcode code generation. + ops []opData + blocks []blockData + regnames []string + gpregmask regMask + fpregmask regMask + fp32regmask regMask + fp64regmask regMask + specialregmask regMask + framepointerreg int8 + linkreg int8 + generic bool + imports []string +} + +type opData struct { + name string + reg regInfo + asm string + typ string // default result type + aux string + rematerializeable bool + argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments + commutative bool // this operation is commutative on its first 2 arguments (e.g. addition) + resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register + resultNotInArgs bool // outputs must not be allocated to the same registers as inputs + clobberFlags bool // this op clobbers flags register + call bool // is a function call + nilCheck bool // this op is a nil check on arg0 + faultOnNilArg0 bool // this op will fault if arg0 is nil (and aux encodes a small offset) + faultOnNilArg1 bool // this op will fault if arg1 is nil (and aux encodes a small offset) + usesScratch bool // this op requires scratch memory space + hasSideEffects bool // for "reasons", not to be eliminated. E.g., atomic store, #19182. + zeroWidth bool // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width. + unsafePoint bool // this op is an unsafe point, i.e. not safe for async preemption + symEffect string // effect this op has on symbol in aux + scale uint8 // amd64/386 indexed load scale +} + +type blockData struct { + name string // the suffix for this block ("EQ", "LT", etc.) + controls int // the number of control values this type of block requires + aux string // the type of the Aux/AuxInt value, if any +} + +type regInfo struct { + // inputs[i] encodes the set of registers allowed for the i'th input. + // Inputs that don't use registers (flags, memory, etc.) should be 0. + inputs []regMask + // clobbers encodes the set of registers that are overwritten by + // the instruction (other than the output registers). + clobbers regMask + // outputs[i] encodes the set of registers allowed for the i'th output. + outputs []regMask +} + +type regMask uint64 + +func (a arch) regMaskComment(r regMask) string { + var buf bytes.Buffer + for i := uint64(0); r != 0; i++ { + if r&1 != 0 { + if buf.Len() == 0 { + buf.WriteString(" //") + } + buf.WriteString(" ") + buf.WriteString(a.regnames[i]) + } + r >>= 1 + } + return buf.String() +} + +var archs []arch + +var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") +var memprofile = flag.String("memprofile", "", "write memory profile to `file`") +var tracefile = flag.String("trace", "", "write trace to `file`") + +func main() { + flag.Parse() + if *cpuprofile != "" { + f, err := os.Create(*cpuprofile) + if err != nil { + log.Fatal("could not create CPU profile: ", err) + } + defer f.Close() + if err := pprof.StartCPUProfile(f); err != nil { + log.Fatal("could not start CPU profile: ", err) + } + defer pprof.StopCPUProfile() + } + if *tracefile != "" { + f, err := os.Create(*tracefile) + if err != nil { + log.Fatalf("failed to create trace output file: %v", err) + } + defer func() { + if err := f.Close(); err != nil { + log.Fatalf("failed to close trace file: %v", err) + } + }() + + if err := trace.Start(f); err != nil { + log.Fatalf("failed to start trace: %v", err) + } + defer trace.Stop() + } + + sort.Sort(ArchsByName(archs)) + + // The generate tasks are run concurrently, since they are CPU-intensive + // that can easily make use of many cores on a machine. + // + // Note that there is no limit on the concurrency at the moment. On a + // four-core laptop at the time of writing, peak RSS usually reaches + // ~200MiB, which seems doable by practically any machine nowadays. If + // that stops being the case, we can cap this func to a fixed number of + // architectures being generated at once. + + tasks := []func(){ + genOp, + } + for _, a := range archs { + a := a // the funcs are ran concurrently at a later time + tasks = append(tasks, func() { + genRules(a) + genSplitLoadRules(a) + }) + } + var wg sync.WaitGroup + for _, task := range tasks { + task := task + wg.Add(1) + go func() { + task() + wg.Done() + }() + } + wg.Wait() + + if *memprofile != "" { + f, err := os.Create(*memprofile) + if err != nil { + log.Fatal("could not create memory profile: ", err) + } + defer f.Close() + runtime.GC() // get up-to-date statistics + if err := pprof.WriteHeapProfile(f); err != nil { + log.Fatal("could not write memory profile: ", err) + } + } +} + +func genOp() { + w := new(bytes.Buffer) + fmt.Fprintf(w, "// Code generated from gen/*Ops.go; DO NOT EDIT.\n") + fmt.Fprintln(w) + fmt.Fprintln(w, "package ssa") + + fmt.Fprintln(w, "import (") + fmt.Fprintln(w, "\"cmd/internal/obj\"") + for _, a := range archs { + if a.pkg != "" { + fmt.Fprintf(w, "%q\n", a.pkg) + } + } + fmt.Fprintln(w, ")") + + // generate Block* declarations + fmt.Fprintln(w, "const (") + fmt.Fprintln(w, "BlockInvalid BlockKind = iota") + for _, a := range archs { + fmt.Fprintln(w) + for _, d := range a.blocks { + fmt.Fprintf(w, "Block%s%s\n", a.Name(), d.name) + } + } + fmt.Fprintln(w, ")") + + // generate block kind string method + fmt.Fprintln(w, "var blockString = [...]string{") + fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",") + for _, a := range archs { + fmt.Fprintln(w) + for _, b := range a.blocks { + fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.name) + } + } + fmt.Fprintln(w, "}") + fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}") + + // generate block kind auxint method + fmt.Fprintln(w, "func (k BlockKind) AuxIntType() string {") + fmt.Fprintln(w, "switch k {") + for _, a := range archs { + for _, b := range a.blocks { + if b.auxIntType() == "invalid" { + continue + } + fmt.Fprintf(w, "case Block%s%s: return \"%s\"\n", a.Name(), b.name, b.auxIntType()) + } + } + fmt.Fprintln(w, "}") + fmt.Fprintln(w, "return \"\"") + fmt.Fprintln(w, "}") + + // generate Op* declarations + fmt.Fprintln(w, "const (") + fmt.Fprintln(w, "OpInvalid Op = iota") // make sure OpInvalid is 0. + for _, a := range archs { + fmt.Fprintln(w) + for _, v := range a.ops { + if v.name == "Invalid" { + continue + } + fmt.Fprintf(w, "Op%s%s\n", a.Name(), v.name) + } + } + fmt.Fprintln(w, ")") + + // generate OpInfo table + fmt.Fprintln(w, "var opcodeTable = [...]opInfo{") + fmt.Fprintln(w, " { name: \"OpInvalid\" },") + for _, a := range archs { + fmt.Fprintln(w) + + pkg := path.Base(a.pkg) + for _, v := range a.ops { + if v.name == "Invalid" { + continue + } + fmt.Fprintln(w, "{") + fmt.Fprintf(w, "name:\"%s\",\n", v.name) + + // flags + if v.aux != "" { + fmt.Fprintf(w, "auxType: aux%s,\n", v.aux) + } + fmt.Fprintf(w, "argLen: %d,\n", v.argLength) + + if v.rematerializeable { + if v.reg.clobbers != 0 { + log.Fatalf("%s is rematerializeable and clobbers registers", v.name) + } + if v.clobberFlags { + log.Fatalf("%s is rematerializeable and clobbers flags", v.name) + } + fmt.Fprintln(w, "rematerializeable: true,") + } + if v.commutative { + fmt.Fprintln(w, "commutative: true,") + } + if v.resultInArg0 { + fmt.Fprintln(w, "resultInArg0: true,") + // OpConvert's register mask is selected dynamically, + // so don't try to check it in the static table. + if v.name != "Convert" && v.reg.inputs[0] != v.reg.outputs[0] { + log.Fatalf("%s: input[0] and output[0] must use the same registers for %s", a.name, v.name) + } + if v.name != "Convert" && v.commutative && v.reg.inputs[1] != v.reg.outputs[0] { + log.Fatalf("%s: input[1] and output[0] must use the same registers for %s", a.name, v.name) + } + } + if v.resultNotInArgs { + fmt.Fprintln(w, "resultNotInArgs: true,") + } + if v.clobberFlags { + fmt.Fprintln(w, "clobberFlags: true,") + } + if v.call { + fmt.Fprintln(w, "call: true,") + } + if v.nilCheck { + fmt.Fprintln(w, "nilCheck: true,") + } + if v.faultOnNilArg0 { + fmt.Fprintln(w, "faultOnNilArg0: true,") + if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" { + log.Fatalf("faultOnNilArg0 with aux %s not allowed", v.aux) + } + } + if v.faultOnNilArg1 { + fmt.Fprintln(w, "faultOnNilArg1: true,") + if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" { + log.Fatalf("faultOnNilArg1 with aux %s not allowed", v.aux) + } + } + if v.usesScratch { + fmt.Fprintln(w, "usesScratch: true,") + } + if v.hasSideEffects { + fmt.Fprintln(w, "hasSideEffects: true,") + } + if v.zeroWidth { + fmt.Fprintln(w, "zeroWidth: true,") + } + if v.unsafePoint { + fmt.Fprintln(w, "unsafePoint: true,") + } + needEffect := strings.HasPrefix(v.aux, "Sym") + if v.symEffect != "" { + if !needEffect { + log.Fatalf("symEffect with aux %s not allowed", v.aux) + } + fmt.Fprintf(w, "symEffect: Sym%s,\n", strings.Replace(v.symEffect, ",", "|Sym", -1)) + } else if needEffect { + log.Fatalf("symEffect needed for aux %s", v.aux) + } + if a.name == "generic" { + fmt.Fprintln(w, "generic:true,") + fmt.Fprintln(w, "},") // close op + // generic ops have no reg info or asm + continue + } + if v.asm != "" { + fmt.Fprintf(w, "asm: %s.A%s,\n", pkg, v.asm) + } + if v.scale != 0 { + fmt.Fprintf(w, "scale: %d,\n", v.scale) + } + fmt.Fprintln(w, "reg:regInfo{") + + // Compute input allocation order. We allocate from the + // most to the least constrained input. This order guarantees + // that we will always be able to find a register. + var s []intPair + for i, r := range v.reg.inputs { + if r != 0 { + s = append(s, intPair{countRegs(r), i}) + } + } + if len(s) > 0 { + sort.Sort(byKey(s)) + fmt.Fprintln(w, "inputs: []inputInfo{") + for _, p := range s { + r := v.reg.inputs[p.val] + fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r)) + } + fmt.Fprintln(w, "},") + } + + if v.reg.clobbers > 0 { + fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers)) + } + + // reg outputs + s = s[:0] + for i, r := range v.reg.outputs { + s = append(s, intPair{countRegs(r), i}) + } + if len(s) > 0 { + sort.Sort(byKey(s)) + fmt.Fprintln(w, "outputs: []outputInfo{") + for _, p := range s { + r := v.reg.outputs[p.val] + fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r)) + } + fmt.Fprintln(w, "},") + } + fmt.Fprintln(w, "},") // close reg info + fmt.Fprintln(w, "},") // close op + } + } + fmt.Fprintln(w, "}") + + fmt.Fprintln(w, "func (o Op) Asm() obj.As {return opcodeTable[o].asm}") + fmt.Fprintln(w, "func (o Op) Scale() int16 {return int16(opcodeTable[o].scale)}") + + // generate op string method + fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }") + + fmt.Fprintln(w, "func (o Op) UsesScratch() bool { return opcodeTable[o].usesScratch }") + + fmt.Fprintln(w, "func (o Op) SymEffect() SymEffect { return opcodeTable[o].symEffect }") + fmt.Fprintln(w, "func (o Op) IsCall() bool { return opcodeTable[o].call }") + fmt.Fprintln(w, "func (o Op) HasSideEffects() bool { return opcodeTable[o].hasSideEffects }") + fmt.Fprintln(w, "func (o Op) UnsafePoint() bool { return opcodeTable[o].unsafePoint }") + + // generate registers + for _, a := range archs { + if a.generic { + continue + } + fmt.Fprintf(w, "var registers%s = [...]Register {\n", a.name) + var gcRegN int + for i, r := range a.regnames { + pkg := a.pkg[len("cmd/internal/obj/"):] + var objname string // name in cmd/internal/obj/$ARCH + switch r { + case "SB": + // SB isn't a real register. cmd/internal/obj expects 0 in this case. + objname = "0" + case "SP": + objname = pkg + ".REGSP" + case "g": + objname = pkg + ".REGG" + default: + objname = pkg + ".REG_" + r + } + // Assign a GC register map index to registers + // that may contain pointers. + gcRegIdx := -1 + if a.gpregmask&(1<<uint(i)) != 0 { + gcRegIdx = gcRegN + gcRegN++ + } + fmt.Fprintf(w, " {%d, %s, %d, \"%s\"},\n", i, objname, gcRegIdx, r) + } + if gcRegN > 32 { + // Won't fit in a uint32 mask. + log.Fatalf("too many GC registers (%d > 32) on %s", gcRegN, a.name) + } + fmt.Fprintln(w, "}") + fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask) + fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask) + if a.fp32regmask != 0 { + fmt.Fprintf(w, "var fp32RegMask%s = regMask(%d)\n", a.name, a.fp32regmask) + } + if a.fp64regmask != 0 { + fmt.Fprintf(w, "var fp64RegMask%s = regMask(%d)\n", a.name, a.fp64regmask) + } + fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask) + fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg) + fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg) + } + + // gofmt result + b := w.Bytes() + var err error + b, err = format.Source(b) + if err != nil { + fmt.Printf("%s\n", w.Bytes()) + panic(err) + } + + if err := ioutil.WriteFile("../opGen.go", b, 0666); err != nil { + log.Fatalf("can't write output: %v\n", err) + } + + // Check that the arch genfile handles all the arch-specific opcodes. + // This is very much a hack, but it is better than nothing. + // + // Do a single regexp pass to record all ops being handled in a map, and + // then compare that with the ops list. This is much faster than one + // regexp pass per opcode. + for _, a := range archs { + if a.genfile == "" { + continue + } + + pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name) + rxOp, err := regexp.Compile(pattern) + if err != nil { + log.Fatalf("bad opcode regexp %s: %v", pattern, err) + } + + src, err := ioutil.ReadFile(a.genfile) + if err != nil { + log.Fatalf("can't read %s: %v", a.genfile, err) + } + seen := make(map[string]bool, len(a.ops)) + for _, m := range rxOp.FindAllSubmatch(src, -1) { + seen[string(m[1])] = true + } + for _, op := range a.ops { + if !seen[op.name] { + log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile) + } + } + } +} + +// Name returns the name of the architecture for use in Op* and Block* enumerations. +func (a arch) Name() string { + s := a.name + if s == "generic" { + s = "" + } + return s +} + +// countRegs returns the number of set bits in the register mask. +func countRegs(r regMask) int { + n := 0 + for r != 0 { + n += int(r & 1) + r >>= 1 + } + return n +} + +// for sorting a pair of integers by key +type intPair struct { + key, val int +} +type byKey []intPair + +func (a byKey) Len() int { return len(a) } +func (a byKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key } + +type ArchsByName []arch + +func (x ArchsByName) Len() int { return len(x) } +func (x ArchsByName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } +func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name } |