diff options
Diffstat (limited to 'src/cmd/compile/internal/ssa/stackalloc.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/stackalloc.go | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go new file mode 100644 index 0000000..3e24b48 --- /dev/null +++ b/src/cmd/compile/internal/ssa/stackalloc.go @@ -0,0 +1,454 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO: live at start of block instead? + +package ssa + +import ( + "cmd/compile/internal/ir" + "cmd/compile/internal/types" + "cmd/internal/src" + "fmt" +) + +type stackAllocState struct { + f *Func + + // live is the output of stackalloc. + // live[b.id] = live values at the end of block b. + live [][]ID + + // The following slices are reused across multiple users + // of stackAllocState. + values []stackValState + interfere [][]ID // interfere[v.id] = values that interfere with v. + names []LocalSlot + + nArgSlot, // Number of Values sourced to arg slot + nNotNeed, // Number of Values not needing a stack slot + nNamedSlot, // Number of Values using a named stack slot + nReuse, // Number of values reusing a stack slot + nAuto, // Number of autos allocated for stack slots. + nSelfInterfere int32 // Number of self-interferences +} + +func newStackAllocState(f *Func) *stackAllocState { + s := f.Cache.stackAllocState + if s == nil { + return new(stackAllocState) + } + if s.f != nil { + f.fe.Fatalf(src.NoXPos, "newStackAllocState called without previous free") + } + return s +} + +func putStackAllocState(s *stackAllocState) { + for i := range s.values { + s.values[i] = stackValState{} + } + for i := range s.interfere { + s.interfere[i] = nil + } + for i := range s.names { + s.names[i] = LocalSlot{} + } + s.f.Cache.stackAllocState = s + s.f = nil + s.live = nil + s.nArgSlot, s.nNotNeed, s.nNamedSlot, s.nReuse, s.nAuto, s.nSelfInterfere = 0, 0, 0, 0, 0, 0 +} + +type stackValState struct { + typ *types.Type + spill *Value + needSlot bool + isArg bool +} + +// stackalloc allocates storage in the stack frame for +// all Values that did not get a register. +// Returns a map from block ID to the stack values live at the end of that block. +func stackalloc(f *Func, spillLive [][]ID) [][]ID { + if f.pass.debug > stackDebug { + fmt.Println("before stackalloc") + fmt.Println(f.String()) + } + s := newStackAllocState(f) + s.init(f, spillLive) + defer putStackAllocState(s) + + s.stackalloc() + if f.pass.stats > 0 { + f.LogStat("stack_alloc_stats", + s.nArgSlot, "arg_slots", s.nNotNeed, "slot_not_needed", + s.nNamedSlot, "named_slots", s.nAuto, "auto_slots", + s.nReuse, "reused_slots", s.nSelfInterfere, "self_interfering") + } + + return s.live +} + +func (s *stackAllocState) init(f *Func, spillLive [][]ID) { + s.f = f + + // Initialize value information. + if n := f.NumValues(); cap(s.values) >= n { + s.values = s.values[:n] + } else { + s.values = make([]stackValState, n) + } + for _, b := range f.Blocks { + for _, v := range b.Values { + s.values[v.ID].typ = v.Type + s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable() && !v.OnWasmStack + s.values[v.ID].isArg = hasAnyArgOp(v) + if f.pass.debug > stackDebug && s.values[v.ID].needSlot { + fmt.Printf("%s needs a stack slot\n", v) + } + if v.Op == OpStoreReg { + s.values[v.Args[0].ID].spill = v + } + } + } + + // Compute liveness info for values needing a slot. + s.computeLive(spillLive) + + // Build interference graph among values needing a slot. + s.buildInterferenceGraph() +} + +func (s *stackAllocState) stackalloc() { + f := s.f + + // Build map from values to their names, if any. + // A value may be associated with more than one name (e.g. after + // the assignment i=j). This step picks one name per value arbitrarily. + if n := f.NumValues(); cap(s.names) >= n { + s.names = s.names[:n] + } else { + s.names = make([]LocalSlot, n) + } + names := s.names + empty := LocalSlot{} + for _, name := range f.Names { + // Note: not "range f.NamedValues" above, because + // that would be nondeterministic. + for _, v := range f.NamedValues[*name] { + if v.Op == OpArgIntReg || v.Op == OpArgFloatReg { + aux := v.Aux.(*AuxNameOffset) + // Never let an arg be bound to a differently named thing. + if name.N != aux.Name || name.Off != aux.Offset { + if f.pass.debug > stackDebug { + fmt.Printf("stackalloc register arg %s skipping name %s\n", v, name) + } + continue + } + } else if name.N.Class == ir.PPARAM && v.Op != OpArg { + // PPARAM's only bind to OpArg + if f.pass.debug > stackDebug { + fmt.Printf("stackalloc PPARAM name %s skipping non-Arg %s\n", name, v) + } + continue + } + + if names[v.ID] == empty { + if f.pass.debug > stackDebug { + fmt.Printf("stackalloc value %s to name %s\n", v, *name) + } + names[v.ID] = *name + } + } + } + + // Allocate args to their assigned locations. + for _, v := range f.Entry.Values { + if !hasAnyArgOp(v) { + continue + } + if v.Aux == nil { + f.Fatalf("%s has nil Aux\n", v.LongString()) + } + if v.Op == OpArg { + loc := LocalSlot{N: v.Aux.(*ir.Name), Type: v.Type, Off: v.AuxInt} + if f.pass.debug > stackDebug { + fmt.Printf("stackalloc OpArg %s to %s\n", v, loc) + } + f.setHome(v, loc) + continue + } + // You might think this below would be the right idea, but you would be wrong. + // It almost works; as of 105a6e9518 - 2021-04-23, + // GOSSAHASH=11011011001011111 == cmd/compile/internal/noder.(*noder).embedded + // is compiled incorrectly. I believe the cause is one of those SSA-to-registers + // puzzles that the register allocator untangles; in the event that a register + // parameter does not end up bound to a name, "fixing" it is a bad idea. + // + //if f.DebugTest { + // if v.Op == OpArgIntReg || v.Op == OpArgFloatReg { + // aux := v.Aux.(*AuxNameOffset) + // loc := LocalSlot{N: aux.Name, Type: v.Type, Off: aux.Offset} + // if f.pass.debug > stackDebug { + // fmt.Printf("stackalloc Op%s %s to %s\n", v.Op, v, loc) + // } + // names[v.ID] = loc + // continue + // } + //} + + } + + // For each type, we keep track of all the stack slots we + // have allocated for that type. + // TODO: share slots among equivalent types. We would need to + // only share among types with the same GC signature. See the + // type.Equal calls below for where this matters. + locations := map[*types.Type][]LocalSlot{} + + // Each time we assign a stack slot to a value v, we remember + // the slot we used via an index into locations[v.Type]. + slots := f.Cache.allocIntSlice(f.NumValues()) + defer f.Cache.freeIntSlice(slots) + for i := range slots { + slots[i] = -1 + } + + // Pick a stack slot for each value needing one. + used := f.Cache.allocBoolSlice(f.NumValues()) + defer f.Cache.freeBoolSlice(used) + for _, b := range f.Blocks { + for _, v := range b.Values { + if !s.values[v.ID].needSlot { + s.nNotNeed++ + continue + } + if hasAnyArgOp(v) { + s.nArgSlot++ + continue // already picked + } + + // If this is a named value, try to use the name as + // the spill location. + var name LocalSlot + if v.Op == OpStoreReg { + name = names[v.Args[0].ID] + } else { + name = names[v.ID] + } + if name.N != nil && v.Type.Compare(name.Type) == types.CMPeq { + for _, id := range s.interfere[v.ID] { + h := f.getHome(id) + if h != nil && h.(LocalSlot).N == name.N && h.(LocalSlot).Off == name.Off { + // A variable can interfere with itself. + // It is rare, but it can happen. + s.nSelfInterfere++ + goto noname + } + } + if f.pass.debug > stackDebug { + fmt.Printf("stackalloc %s to %s\n", v, name) + } + s.nNamedSlot++ + f.setHome(v, name) + continue + } + + noname: + // Set of stack slots we could reuse. + locs := locations[v.Type] + // Mark all positions in locs used by interfering values. + for i := 0; i < len(locs); i++ { + used[i] = false + } + for _, xid := range s.interfere[v.ID] { + slot := slots[xid] + if slot >= 0 { + used[slot] = true + } + } + // Find an unused stack slot. + var i int + for i = 0; i < len(locs); i++ { + if !used[i] { + s.nReuse++ + break + } + } + // If there is no unused stack slot, allocate a new one. + if i == len(locs) { + s.nAuto++ + locs = append(locs, LocalSlot{N: f.fe.Auto(v.Pos, v.Type), Type: v.Type, Off: 0}) + locations[v.Type] = locs + } + // Use the stack variable at that index for v. + loc := locs[i] + if f.pass.debug > stackDebug { + fmt.Printf("stackalloc %s to %s\n", v, loc) + } + f.setHome(v, loc) + slots[v.ID] = i + } + } +} + +// computeLive computes a map from block ID to a list of +// stack-slot-needing value IDs live at the end of that block. +// TODO: this could be quadratic if lots of variables are live across lots of +// basic blocks. Figure out a way to make this function (or, more precisely, the user +// of this function) require only linear size & time. +func (s *stackAllocState) computeLive(spillLive [][]ID) { + s.live = make([][]ID, s.f.NumBlocks()) + var phis []*Value + live := s.f.newSparseSet(s.f.NumValues()) + defer s.f.retSparseSet(live) + t := s.f.newSparseSet(s.f.NumValues()) + defer s.f.retSparseSet(t) + + // Instead of iterating over f.Blocks, iterate over their postordering. + // Liveness information flows backward, so starting at the end + // increases the probability that we will stabilize quickly. + po := s.f.postorder() + for { + changed := false + for _, b := range po { + // Start with known live values at the end of the block + live.clear() + live.addAll(s.live[b.ID]) + + // Propagate backwards to the start of the block + phis = phis[:0] + for i := len(b.Values) - 1; i >= 0; i-- { + v := b.Values[i] + live.remove(v.ID) + if v.Op == OpPhi { + // Save phi for later. + // Note: its args might need a stack slot even though + // the phi itself doesn't. So don't use needSlot. + if !v.Type.IsMemory() && !v.Type.IsVoid() { + phis = append(phis, v) + } + continue + } + for _, a := range v.Args { + if s.values[a.ID].needSlot { + live.add(a.ID) + } + } + } + + // for each predecessor of b, expand its list of live-at-end values + // invariant: s contains the values live at the start of b (excluding phi inputs) + for i, e := range b.Preds { + p := e.b + t.clear() + t.addAll(s.live[p.ID]) + t.addAll(live.contents()) + t.addAll(spillLive[p.ID]) + for _, v := range phis { + a := v.Args[i] + if s.values[a.ID].needSlot { + t.add(a.ID) + } + if spill := s.values[a.ID].spill; spill != nil { + //TODO: remove? Subsumed by SpillUse? + t.add(spill.ID) + } + } + if t.size() == len(s.live[p.ID]) { + continue + } + // grow p's live set + s.live[p.ID] = append(s.live[p.ID][:0], t.contents()...) + changed = true + } + } + + if !changed { + break + } + } + if s.f.pass.debug > stackDebug { + for _, b := range s.f.Blocks { + fmt.Printf("stacklive %s %v\n", b, s.live[b.ID]) + } + } +} + +func (f *Func) getHome(vid ID) Location { + if int(vid) >= len(f.RegAlloc) { + return nil + } + return f.RegAlloc[vid] +} + +func (f *Func) setHome(v *Value, loc Location) { + for v.ID >= ID(len(f.RegAlloc)) { + f.RegAlloc = append(f.RegAlloc, nil) + } + f.RegAlloc[v.ID] = loc +} + +func (s *stackAllocState) buildInterferenceGraph() { + f := s.f + if n := f.NumValues(); cap(s.interfere) >= n { + s.interfere = s.interfere[:n] + } else { + s.interfere = make([][]ID, n) + } + live := f.newSparseSet(f.NumValues()) + defer f.retSparseSet(live) + for _, b := range f.Blocks { + // Propagate liveness backwards to the start of the block. + // Two values interfere if one is defined while the other is live. + live.clear() + live.addAll(s.live[b.ID]) + for i := len(b.Values) - 1; i >= 0; i-- { + v := b.Values[i] + if s.values[v.ID].needSlot { + live.remove(v.ID) + for _, id := range live.contents() { + // Note: args can have different types and still interfere + // (with each other or with other values). See issue 23522. + if s.values[v.ID].typ.Compare(s.values[id].typ) == types.CMPeq || hasAnyArgOp(v) || s.values[id].isArg { + s.interfere[v.ID] = append(s.interfere[v.ID], id) + s.interfere[id] = append(s.interfere[id], v.ID) + } + } + } + for _, a := range v.Args { + if s.values[a.ID].needSlot { + live.add(a.ID) + } + } + if hasAnyArgOp(v) && s.values[v.ID].needSlot { + // OpArg is an input argument which is pre-spilled. + // We add back v.ID here because we want this value + // to appear live even before this point. Being live + // all the way to the start of the entry block prevents other + // values from being allocated to the same slot and clobbering + // the input value before we have a chance to load it. + + // TODO(register args) this is apparently not wrong for register args -- is it necessary? + live.add(v.ID) + } + } + } + if f.pass.debug > stackDebug { + for vid, i := range s.interfere { + if len(i) > 0 { + fmt.Printf("v%d interferes with", vid) + for _, x := range i { + fmt.Printf(" v%d", x) + } + fmt.Println() + } + } + } +} + +func hasAnyArgOp(v *Value) bool { + return v.Op == OpArg || v.Op == OpArgIntReg || v.Op == OpArgFloatReg +} |