diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/cmd/compile/internal/ssagen/phi.go | |
parent | Initial commit. (diff) | |
download | golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/cmd/compile/internal/ssagen/phi.go')
-rw-r--r-- | src/cmd/compile/internal/ssagen/phi.go | 557 |
1 files changed, 557 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssagen/phi.go b/src/cmd/compile/internal/ssagen/phi.go new file mode 100644 index 0000000..19b6920 --- /dev/null +++ b/src/cmd/compile/internal/ssagen/phi.go @@ -0,0 +1,557 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssagen + +import ( + "container/heap" + "fmt" + + "cmd/compile/internal/ir" + "cmd/compile/internal/ssa" + "cmd/compile/internal/types" + "cmd/internal/src" +) + +// This file contains the algorithm to place phi nodes in a function. +// For small functions, we use Braun, Buchwald, Hack, Leißa, Mallon, and Zwinkau. +// https://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf +// For large functions, we use Sreedhar & Gao: A Linear Time Algorithm for Placing Φ-Nodes. +// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.8.1979&rep=rep1&type=pdf + +const smallBlocks = 500 + +const debugPhi = false + +// fwdRefAux wraps an arbitrary ir.Node as an ssa.Aux for use with OpFwdref. +type fwdRefAux struct { + _ [0]func() // ensure ir.Node isn't compared for equality + N ir.Node +} + +func (fwdRefAux) CanBeAnSSAAux() {} + +// insertPhis finds all the places in the function where a phi is +// necessary and inserts them. +// Uses FwdRef ops to find all uses of variables, and s.defvars to find +// all definitions. +// Phi values are inserted, and all FwdRefs are changed to a Copy +// of the appropriate phi or definition. +// TODO: make this part of cmd/compile/internal/ssa somehow? +func (s *state) insertPhis() { + if len(s.f.Blocks) <= smallBlocks { + sps := simplePhiState{s: s, f: s.f, defvars: s.defvars} + sps.insertPhis() + return + } + ps := phiState{s: s, f: s.f, defvars: s.defvars} + ps.insertPhis() +} + +type phiState struct { + s *state // SSA state + f *ssa.Func // function to work on + defvars []map[ir.Node]*ssa.Value // defined variables at end of each block + + varnum map[ir.Node]int32 // variable numbering + + // properties of the dominator tree + idom []*ssa.Block // dominator parents + tree []domBlock // dominator child+sibling + level []int32 // level in dominator tree (0 = root or unreachable, 1 = children of root, ...) + + // scratch locations + priq blockHeap // priority queue of blocks, higher level (toward leaves) = higher priority + q []*ssa.Block // inner loop queue + queued *sparseSet // has been put in q + hasPhi *sparseSet // has a phi + hasDef *sparseSet // has a write of the variable we're processing + + // miscellaneous + placeholder *ssa.Value // value to use as a "not set yet" placeholder. +} + +func (s *phiState) insertPhis() { + if debugPhi { + fmt.Println(s.f.String()) + } + + // Find all the variables for which we need to match up reads & writes. + // This step prunes any basic-block-only variables from consideration. + // Generate a numbering for these variables. + s.varnum = map[ir.Node]int32{} + var vars []ir.Node + var vartypes []*types.Type + for _, b := range s.f.Blocks { + for _, v := range b.Values { + if v.Op != ssa.OpFwdRef { + continue + } + var_ := v.Aux.(fwdRefAux).N + + // Optimization: look back 1 block for the definition. + if len(b.Preds) == 1 { + c := b.Preds[0].Block() + if w := s.defvars[c.ID][var_]; w != nil { + v.Op = ssa.OpCopy + v.Aux = nil + v.AddArg(w) + continue + } + } + + if _, ok := s.varnum[var_]; ok { + continue + } + s.varnum[var_] = int32(len(vartypes)) + if debugPhi { + fmt.Printf("var%d = %v\n", len(vartypes), var_) + } + vars = append(vars, var_) + vartypes = append(vartypes, v.Type) + } + } + + if len(vartypes) == 0 { + return + } + + // Find all definitions of the variables we need to process. + // defs[n] contains all the blocks in which variable number n is assigned. + defs := make([][]*ssa.Block, len(vartypes)) + for _, b := range s.f.Blocks { + for var_ := range s.defvars[b.ID] { // TODO: encode defvars some other way (explicit ops)? make defvars[n] a slice instead of a map. + if n, ok := s.varnum[var_]; ok { + defs[n] = append(defs[n], b) + } + } + } + + // Make dominator tree. + s.idom = s.f.Idom() + s.tree = make([]domBlock, s.f.NumBlocks()) + for _, b := range s.f.Blocks { + p := s.idom[b.ID] + if p != nil { + s.tree[b.ID].sibling = s.tree[p.ID].firstChild + s.tree[p.ID].firstChild = b + } + } + // Compute levels in dominator tree. + // With parent pointers we can do a depth-first walk without + // any auxiliary storage. + s.level = make([]int32, s.f.NumBlocks()) + b := s.f.Entry +levels: + for { + if p := s.idom[b.ID]; p != nil { + s.level[b.ID] = s.level[p.ID] + 1 + if debugPhi { + fmt.Printf("level %s = %d\n", b, s.level[b.ID]) + } + } + if c := s.tree[b.ID].firstChild; c != nil { + b = c + continue + } + for { + if c := s.tree[b.ID].sibling; c != nil { + b = c + continue levels + } + b = s.idom[b.ID] + if b == nil { + break levels + } + } + } + + // Allocate scratch locations. + s.priq.level = s.level + s.q = make([]*ssa.Block, 0, s.f.NumBlocks()) + s.queued = newSparseSet(s.f.NumBlocks()) + s.hasPhi = newSparseSet(s.f.NumBlocks()) + s.hasDef = newSparseSet(s.f.NumBlocks()) + s.placeholder = s.s.entryNewValue0(ssa.OpUnknown, types.TypeInvalid) + + // Generate phi ops for each variable. + for n := range vartypes { + s.insertVarPhis(n, vars[n], defs[n], vartypes[n]) + } + + // Resolve FwdRefs to the correct write or phi. + s.resolveFwdRefs() + + // Erase variable numbers stored in AuxInt fields of phi ops. They are no longer needed. + for _, b := range s.f.Blocks { + for _, v := range b.Values { + if v.Op == ssa.OpPhi { + v.AuxInt = 0 + } + // Any remaining FwdRefs are dead code. + if v.Op == ssa.OpFwdRef { + v.Op = ssa.OpUnknown + v.Aux = nil + } + } + } +} + +func (s *phiState) insertVarPhis(n int, var_ ir.Node, defs []*ssa.Block, typ *types.Type) { + priq := &s.priq + q := s.q + queued := s.queued + queued.clear() + hasPhi := s.hasPhi + hasPhi.clear() + hasDef := s.hasDef + hasDef.clear() + + // Add defining blocks to priority queue. + for _, b := range defs { + priq.a = append(priq.a, b) + hasDef.add(b.ID) + if debugPhi { + fmt.Printf("def of var%d in %s\n", n, b) + } + } + heap.Init(priq) + + // Visit blocks defining variable n, from deepest to shallowest. + for len(priq.a) > 0 { + currentRoot := heap.Pop(priq).(*ssa.Block) + if debugPhi { + fmt.Printf("currentRoot %s\n", currentRoot) + } + // Walk subtree below definition. + // Skip subtrees we've done in previous iterations. + // Find edges exiting tree dominated by definition (the dominance frontier). + // Insert phis at target blocks. + if queued.contains(currentRoot.ID) { + s.s.Fatalf("root already in queue") + } + q = append(q, currentRoot) + queued.add(currentRoot.ID) + for len(q) > 0 { + b := q[len(q)-1] + q = q[:len(q)-1] + if debugPhi { + fmt.Printf(" processing %s\n", b) + } + + currentRootLevel := s.level[currentRoot.ID] + for _, e := range b.Succs { + c := e.Block() + // TODO: if the variable is dead at c, skip it. + if s.level[c.ID] > currentRootLevel { + // a D-edge, or an edge whose target is in currentRoot's subtree. + continue + } + if hasPhi.contains(c.ID) { + continue + } + // Add a phi to block c for variable n. + hasPhi.add(c.ID) + v := c.NewValue0I(currentRoot.Pos, ssa.OpPhi, typ, int64(n)) // TODO: line number right? + // Note: we store the variable number in the phi's AuxInt field. Used temporarily by phi building. + if var_.Op() == ir.ONAME { + s.s.addNamedValue(var_.(*ir.Name), v) + } + for range c.Preds { + v.AddArg(s.placeholder) // Actual args will be filled in by resolveFwdRefs. + } + if debugPhi { + fmt.Printf("new phi for var%d in %s: %s\n", n, c, v) + } + if !hasDef.contains(c.ID) { + // There's now a new definition of this variable in block c. + // Add it to the priority queue to explore. + heap.Push(priq, c) + hasDef.add(c.ID) + } + } + + // Visit children if they have not been visited yet. + for c := s.tree[b.ID].firstChild; c != nil; c = s.tree[c.ID].sibling { + if !queued.contains(c.ID) { + q = append(q, c) + queued.add(c.ID) + } + } + } + } +} + +// resolveFwdRefs links all FwdRef uses up to their nearest dominating definition. +func (s *phiState) resolveFwdRefs() { + // Do a depth-first walk of the dominator tree, keeping track + // of the most-recently-seen value for each variable. + + // Map from variable ID to SSA value at the current point of the walk. + values := make([]*ssa.Value, len(s.varnum)) + for i := range values { + values[i] = s.placeholder + } + + // Stack of work to do. + type stackEntry struct { + b *ssa.Block // block to explore + + // variable/value pair to reinstate on exit + n int32 // variable ID + v *ssa.Value + + // Note: only one of b or n,v will be set. + } + var stk []stackEntry + + stk = append(stk, stackEntry{b: s.f.Entry}) + for len(stk) > 0 { + work := stk[len(stk)-1] + stk = stk[:len(stk)-1] + + b := work.b + if b == nil { + // On exit from a block, this case will undo any assignments done below. + values[work.n] = work.v + continue + } + + // Process phis as new defs. They come before FwdRefs in this block. + for _, v := range b.Values { + if v.Op != ssa.OpPhi { + continue + } + n := int32(v.AuxInt) + // Remember the old assignment so we can undo it when we exit b. + stk = append(stk, stackEntry{n: n, v: values[n]}) + // Record the new assignment. + values[n] = v + } + + // Replace a FwdRef op with the current incoming value for its variable. + for _, v := range b.Values { + if v.Op != ssa.OpFwdRef { + continue + } + n := s.varnum[v.Aux.(fwdRefAux).N] + v.Op = ssa.OpCopy + v.Aux = nil + v.AddArg(values[n]) + } + + // Establish values for variables defined in b. + for var_, v := range s.defvars[b.ID] { + n, ok := s.varnum[var_] + if !ok { + // some variable not live across a basic block boundary. + continue + } + // Remember the old assignment so we can undo it when we exit b. + stk = append(stk, stackEntry{n: n, v: values[n]}) + // Record the new assignment. + values[n] = v + } + + // Replace phi args in successors with the current incoming value. + for _, e := range b.Succs { + c, i := e.Block(), e.Index() + for j := len(c.Values) - 1; j >= 0; j-- { + v := c.Values[j] + if v.Op != ssa.OpPhi { + break // All phis will be at the end of the block during phi building. + } + // Only set arguments that have been resolved. + // For very wide CFGs, this significantly speeds up phi resolution. + // See golang.org/issue/8225. + if w := values[v.AuxInt]; w.Op != ssa.OpUnknown { + v.SetArg(i, w) + } + } + } + + // Walk children in dominator tree. + for c := s.tree[b.ID].firstChild; c != nil; c = s.tree[c.ID].sibling { + stk = append(stk, stackEntry{b: c}) + } + } +} + +// domBlock contains extra per-block information to record the dominator tree. +type domBlock struct { + firstChild *ssa.Block // first child of block in dominator tree + sibling *ssa.Block // next child of parent in dominator tree +} + +// A block heap is used as a priority queue to implement the PiggyBank +// from Sreedhar and Gao. That paper uses an array which is better +// asymptotically but worse in the common case when the PiggyBank +// holds a sparse set of blocks. +type blockHeap struct { + a []*ssa.Block // block IDs in heap + level []int32 // depth in dominator tree (static, used for determining priority) +} + +func (h *blockHeap) Len() int { return len(h.a) } +func (h *blockHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } + +func (h *blockHeap) Push(x interface{}) { + v := x.(*ssa.Block) + h.a = append(h.a, v) +} +func (h *blockHeap) Pop() interface{} { + old := h.a + n := len(old) + x := old[n-1] + h.a = old[:n-1] + return x +} +func (h *blockHeap) Less(i, j int) bool { + return h.level[h.a[i].ID] > h.level[h.a[j].ID] +} + +// TODO: stop walking the iterated domininance frontier when +// the variable is dead. Maybe detect that by checking if the +// node we're on is reverse dominated by all the reads? +// Reverse dominated by the highest common successor of all the reads? + +// copy of ../ssa/sparseset.go +// TODO: move this file to ../ssa, then use sparseSet there. +type sparseSet struct { + dense []ssa.ID + sparse []int32 +} + +// newSparseSet returns a sparseSet that can represent +// integers between 0 and n-1. +func newSparseSet(n int) *sparseSet { + return &sparseSet{dense: nil, sparse: make([]int32, n)} +} + +func (s *sparseSet) contains(x ssa.ID) bool { + i := s.sparse[x] + return i < int32(len(s.dense)) && s.dense[i] == x +} + +func (s *sparseSet) add(x ssa.ID) { + i := s.sparse[x] + if i < int32(len(s.dense)) && s.dense[i] == x { + return + } + s.dense = append(s.dense, x) + s.sparse[x] = int32(len(s.dense)) - 1 +} + +func (s *sparseSet) clear() { + s.dense = s.dense[:0] +} + +// Variant to use for small functions. +type simplePhiState struct { + s *state // SSA state + f *ssa.Func // function to work on + fwdrefs []*ssa.Value // list of FwdRefs to be processed + defvars []map[ir.Node]*ssa.Value // defined variables at end of each block + reachable []bool // which blocks are reachable +} + +func (s *simplePhiState) insertPhis() { + s.reachable = ssa.ReachableBlocks(s.f) + + // Find FwdRef ops. + for _, b := range s.f.Blocks { + for _, v := range b.Values { + if v.Op != ssa.OpFwdRef { + continue + } + s.fwdrefs = append(s.fwdrefs, v) + var_ := v.Aux.(fwdRefAux).N + if _, ok := s.defvars[b.ID][var_]; !ok { + s.defvars[b.ID][var_] = v // treat FwdDefs as definitions. + } + } + } + + var args []*ssa.Value + +loop: + for len(s.fwdrefs) > 0 { + v := s.fwdrefs[len(s.fwdrefs)-1] + s.fwdrefs = s.fwdrefs[:len(s.fwdrefs)-1] + b := v.Block + var_ := v.Aux.(fwdRefAux).N + if b == s.f.Entry { + // No variable should be live at entry. + s.s.Fatalf("value %v (%v) incorrectly live at entry", var_, v) + } + if !s.reachable[b.ID] { + // This block is dead. + // It doesn't matter what we use here as long as it is well-formed. + v.Op = ssa.OpUnknown + v.Aux = nil + continue + } + // Find variable value on each predecessor. + args = args[:0] + for _, e := range b.Preds { + args = append(args, s.lookupVarOutgoing(e.Block(), v.Type, var_, v.Pos)) + } + + // Decide if we need a phi or not. We need a phi if there + // are two different args (which are both not v). + var w *ssa.Value + for _, a := range args { + if a == v { + continue // self-reference + } + if a == w { + continue // already have this witness + } + if w != nil { + // two witnesses, need a phi value + v.Op = ssa.OpPhi + v.AddArgs(args...) + v.Aux = nil + continue loop + } + w = a // save witness + } + if w == nil { + s.s.Fatalf("no witness for reachable phi %s", v) + } + // One witness. Make v a copy of w. + v.Op = ssa.OpCopy + v.Aux = nil + v.AddArg(w) + } +} + +// lookupVarOutgoing finds the variable's value at the end of block b. +func (s *simplePhiState) lookupVarOutgoing(b *ssa.Block, t *types.Type, var_ ir.Node, line src.XPos) *ssa.Value { + for { + if v := s.defvars[b.ID][var_]; v != nil { + return v + } + // The variable is not defined by b and we haven't looked it up yet. + // If b has exactly one predecessor, loop to look it up there. + // Otherwise, give up and insert a new FwdRef and resolve it later. + if len(b.Preds) != 1 { + break + } + b = b.Preds[0].Block() + if !s.reachable[b.ID] { + // This is rare; it happens with oddly interleaved infinite loops in dead code. + // See issue 19783. + break + } + } + // Generate a FwdRef for the variable and return that. + v := b.NewValue0A(line, ssa.OpFwdRef, t, fwdRefAux{N: var_}) + s.defvars[b.ID][var_] = v + if var_.Op() == ir.ONAME { + s.s.addNamedValue(var_.(*ir.Name), v) + } + s.fwdrefs = append(s.fwdrefs, v) + return v +} |