summaryrefslogtreecommitdiffstats
path: root/src/cmd/compile/internal/ssa/loopreschedchecks.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/compile/internal/ssa/loopreschedchecks.go')
-rw-r--r--src/cmd/compile/internal/ssa/loopreschedchecks.go512
1 files changed, 512 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/loopreschedchecks.go b/src/cmd/compile/internal/ssa/loopreschedchecks.go
new file mode 100644
index 0000000..7c56523
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/loopreschedchecks.go
@@ -0,0 +1,512 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+ "cmd/compile/internal/types"
+ "fmt"
+)
+
+// an edgeMem records a backedge, together with the memory
+// phi functions at the target of the backedge that must
+// be updated when a rescheduling check replaces the backedge.
+type edgeMem struct {
+ e Edge
+ m *Value // phi for memory at dest of e
+}
+
+// a rewriteTarget is a value-argindex pair indicating
+// where a rewrite is applied. Note that this is for values,
+// not for block controls, because block controls are not targets
+// for the rewrites performed in inserting rescheduling checks.
+type rewriteTarget struct {
+ v *Value
+ i int
+}
+
+type rewrite struct {
+ before, after *Value // before is the expected value before rewrite, after is the new value installed.
+ rewrites []rewriteTarget // all the targets for this rewrite.
+}
+
+func (r *rewrite) String() string {
+ s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
+ for _, rw := range r.rewrites {
+ s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
+ }
+ s += "\n"
+ return s
+}
+
+// insertLoopReschedChecks inserts rescheduling checks on loop backedges.
+func insertLoopReschedChecks(f *Func) {
+ // TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.
+
+ // Loop reschedule checks compare the stack pointer with
+ // the per-g stack bound. If the pointer appears invalid,
+ // that means a reschedule check is needed.
+ //
+ // Steps:
+ // 1. locate backedges.
+ // 2. Record memory definitions at block end so that
+ // the SSA graph for mem can be properly modified.
+ // 3. Ensure that phi functions that will-be-needed for mem
+ // are present in the graph, initially with trivial inputs.
+ // 4. Record all to-be-modified uses of mem;
+ // apply modifications (split into two steps to simplify and
+ // avoided nagging order-dependencies).
+ // 5. Rewrite backedges to include reschedule check,
+ // and modify destination phi function appropriately with new
+ // definitions for mem.
+
+ if f.NoSplit { // nosplit functions don't reschedule.
+ return
+ }
+
+ backedges := backedges(f)
+ if len(backedges) == 0 { // no backedges means no rescheduling checks.
+ return
+ }
+
+ lastMems := findLastMems(f)
+
+ idom := f.Idom()
+ po := f.postorder()
+ // The ordering in the dominator tree matters; it's important that
+ // the walk of the dominator tree also be a preorder (i.e., a node is
+ // visited only after all its non-backedge predecessors have been visited).
+ sdom := newSparseOrderedTree(f, idom, po)
+
+ if f.pass.debug > 1 {
+ fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
+ }
+
+ tofixBackedges := []edgeMem{}
+
+ for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
+ tofixBackedges = append(tofixBackedges, edgeMem{e, nil})
+ }
+
+ // It's possible that there is no memory state (no global/pointer loads/stores or calls)
+ if lastMems[f.Entry.ID] == nil {
+ lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem)
+ }
+
+ memDefsAtBlockEnds := f.Cache.allocValueSlice(f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
+ defer f.Cache.freeValueSlice(memDefsAtBlockEnds)
+
+ // Propagate last mem definitions forward through successor blocks.
+ for i := len(po) - 1; i >= 0; i-- {
+ b := po[i]
+ mem := lastMems[b.ID]
+ for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
+ // loop because there might be backedges that haven't been visited yet.
+ mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
+ }
+ memDefsAtBlockEnds[b.ID] = mem
+ if f.pass.debug > 2 {
+ fmt.Printf("memDefsAtBlockEnds[%s] = %s\n", b, mem)
+ }
+ }
+
+ // Maps from block to newly-inserted phi function in block.
+ newmemphis := make(map[*Block]rewrite)
+
+ // Insert phi functions as necessary for future changes to flow graph.
+ for i, emc := range tofixBackedges {
+ e := emc.e
+ h := e.b
+
+ // find the phi function for the memory input at "h", if there is one.
+ var headerMemPhi *Value // look for header mem phi
+
+ for _, v := range h.Values {
+ if v.Op == OpPhi && v.Type.IsMemory() {
+ headerMemPhi = v
+ }
+ }
+
+ if headerMemPhi == nil {
+ // if the header is nil, make a trivial phi from the dominator
+ mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
+ headerMemPhi = newPhiFor(h, mem0)
+ newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
+ addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis, sdom)
+
+ }
+ tofixBackedges[i].m = headerMemPhi
+
+ }
+ if f.pass.debug > 0 {
+ for b, r := range newmemphis {
+ fmt.Printf("before b=%s, rewrite=%s\n", b, r.String())
+ }
+ }
+
+ // dfPhiTargets notes inputs to phis in dominance frontiers that should not
+ // be rewritten as part of the dominated children of some outer rewrite.
+ dfPhiTargets := make(map[rewriteTarget]bool)
+
+ rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis, dfPhiTargets, sdom)
+
+ if f.pass.debug > 0 {
+ for b, r := range newmemphis {
+ fmt.Printf("after b=%s, rewrite=%s\n", b, r.String())
+ }
+ }
+
+ // Apply collected rewrites.
+ for _, r := range newmemphis {
+ for _, rw := range r.rewrites {
+ rw.v.SetArg(rw.i, r.after)
+ }
+ }
+
+ // Rewrite backedges to include reschedule checks.
+ for _, emc := range tofixBackedges {
+ e := emc.e
+ headerMemPhi := emc.m
+ h := e.b
+ i := e.i
+ p := h.Preds[i]
+ bb := p.b
+ mem0 := headerMemPhi.Args[i]
+ // bb e->p h,
+ // Because we're going to insert a rare-call, make sure the
+ // looping edge still looks likely.
+ likely := BranchLikely
+ if p.i != 0 {
+ likely = BranchUnlikely
+ }
+ if bb.Kind != BlockPlain { // backedges can be unconditional. e.g., if x { something; continue }
+ bb.Likely = likely
+ }
+
+ // rewrite edge to include reschedule check
+ // existing edges:
+ //
+ // bb.Succs[p.i] == Edge{h, i}
+ // h.Preds[i] == p == Edge{bb,p.i}
+ //
+ // new block(s):
+ // test:
+ // if sp < g.limit { goto sched }
+ // goto join
+ // sched:
+ // mem1 := call resched (mem0)
+ // goto join
+ // join:
+ // mem2 := phi(mem0, mem1)
+ // goto h
+ //
+ // and correct arg i of headerMemPhi and headerCtrPhi
+ //
+ // EXCEPT: join block containing only phi functions is bad
+ // for the register allocator. Therefore, there is no
+ // join, and branches targeting join must instead target
+ // the header, and the other phi functions within header are
+ // adjusted for the additional input.
+
+ test := f.NewBlock(BlockIf)
+ sched := f.NewBlock(BlockPlain)
+
+ test.Pos = bb.Pos
+ sched.Pos = bb.Pos
+
+ // if sp < g.limit { goto sched }
+ // goto header
+
+ cfgtypes := &f.Config.Types
+ pt := cfgtypes.Uintptr
+ g := test.NewValue1(bb.Pos, OpGetG, pt, mem0)
+ sp := test.NewValue0(bb.Pos, OpSP, pt)
+ cmpOp := OpLess64U
+ if pt.Size() == 4 {
+ cmpOp = OpLess32U
+ }
+ limaddr := test.NewValue1I(bb.Pos, OpOffPtr, pt, 2*pt.Size(), g)
+ lim := test.NewValue2(bb.Pos, OpLoad, pt, limaddr, mem0)
+ cmp := test.NewValue2(bb.Pos, cmpOp, cfgtypes.Bool, sp, lim)
+ test.SetControl(cmp)
+
+ // if true, goto sched
+ test.AddEdgeTo(sched)
+
+ // if false, rewrite edge to header.
+ // do NOT remove+add, because that will perturb all the other phi functions
+ // as well as messing up other edges to the header.
+ test.Succs = append(test.Succs, Edge{h, i})
+ h.Preds[i] = Edge{test, 1}
+ headerMemPhi.SetArg(i, mem0)
+
+ test.Likely = BranchUnlikely
+
+ // sched:
+ // mem1 := call resched (mem0)
+ // goto header
+ resched := f.fe.Syslook("goschedguarded")
+ call := sched.NewValue1A(bb.Pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(resched, bb.Func.ABIDefault.ABIAnalyzeTypes(nil, nil, nil)), mem0)
+ mem1 := sched.NewValue1I(bb.Pos, OpSelectN, types.TypeMem, 0, call)
+ sched.AddEdgeTo(h)
+ headerMemPhi.AddArg(mem1)
+
+ bb.Succs[p.i] = Edge{test, 0}
+ test.Preds = append(test.Preds, Edge{bb, p.i})
+
+ // Must correct all the other phi functions in the header for new incoming edge.
+ // Except for mem phis, it will be the same value seen on the original
+ // backedge at index i.
+ for _, v := range h.Values {
+ if v.Op == OpPhi && v != headerMemPhi {
+ v.AddArg(v.Args[i])
+ }
+ }
+ }
+
+ f.invalidateCFG()
+
+ if f.pass.debug > 1 {
+ sdom = newSparseTree(f, f.Idom())
+ fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
+ }
+}
+
+// newPhiFor inserts a new Phi function into b,
+// with all inputs set to v.
+func newPhiFor(b *Block, v *Value) *Value {
+ phiV := b.NewValue0(b.Pos, OpPhi, v.Type)
+
+ for range b.Preds {
+ phiV.AddArg(v)
+ }
+ return phiV
+}
+
+// rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
+// in block h will replace a previous definition. Block b is the block currently being processed;
+// if b has its own phi definition then it takes the place of h.
+// defsForUses provides information about other definitions of the variable that are present
+// (and if nil, indicates that the variable is no longer live)
+// sdom must yield a preorder of the flow graph if recursively walked, root-to-children.
+// The result of newSparseOrderedTree with order supplied by a dfs-postorder satisfies this
+// requirement.
+func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite, dfPhiTargets map[rewriteTarget]bool, sdom SparseTree) {
+ // If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
+ if _, ok := newphis[b]; ok {
+ h = b
+ }
+ change := newphis[h]
+ x := change.before
+ y := change.after
+
+ // Apply rewrites to this block
+ if x != nil { // don't waste time on the common case of no definition.
+ p := &change.rewrites
+ for _, v := range b.Values {
+ if v == y { // don't rewrite self -- phi inputs are handled below.
+ continue
+ }
+ for i, w := range v.Args {
+ if w != x {
+ continue
+ }
+ tgt := rewriteTarget{v, i}
+
+ // It's possible dominated control flow will rewrite this instead.
+ // Visiting in preorder (a property of how sdom was constructed)
+ // ensures that these are seen in the proper order.
+ if dfPhiTargets[tgt] {
+ continue
+ }
+ *p = append(*p, tgt)
+ if f.pass.debug > 1 {
+ fmt.Printf("added block target for h=%v, b=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
+ h, b, x, y, v, i)
+ }
+ }
+ }
+
+ // Rewrite appropriate inputs of phis reached in successors
+ // in dominance frontier, self, and dominated.
+ // If the variable def reaching uses in b is itself defined in b, then the new phi function
+ // does not reach the successors of b. (This assumes a bit about the structure of the
+ // phi use-def graph, but it's true for memory.)
+ if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
+ for _, e := range b.Succs {
+ s := e.b
+
+ for _, v := range s.Values {
+ if v.Op == OpPhi && v.Args[e.i] == x {
+ tgt := rewriteTarget{v, e.i}
+ *p = append(*p, tgt)
+ dfPhiTargets[tgt] = true
+ if f.pass.debug > 1 {
+ fmt.Printf("added phi target for h=%v, b=%v, s=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
+ h, b, s, x, y, v.LongString(), e.i)
+ }
+ break
+ }
+ }
+ }
+ }
+ newphis[h] = change
+ }
+
+ for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
+ rewriteNewPhis(h, c, f, defsForUses, newphis, dfPhiTargets, sdom) // TODO: convert to explicit stack from recursion.
+ }
+}
+
+// addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
+// a new definition for variable "x" inserted at h (usually but not necessarily a phi).
+// These new phis can only occur at the dominance frontier of h; block s is in the dominance
+// frontier of h if h does not strictly dominate s and if s is a successor of a block b where
+// either b = h or h strictly dominates b.
+// These newly created phis are themselves new definitions that may require addition of their
+// own trivial phi functions in their own dominance frontier, and this is handled recursively.
+func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite, sdom SparseTree) {
+ oldv := defForUses[b.ID]
+ if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
+ return
+ }
+ idom := f.Idom()
+outer:
+ for _, e := range b.Succs {
+ s := e.b
+ // check phi functions in the dominance frontier
+ if sdom.isAncestor(h, s) {
+ continue // h dominates s, successor of b, therefore s is not in the frontier.
+ }
+ if _, ok := newphis[s]; ok {
+ continue // successor s of b already has a new phi function, so there is no need to add another.
+ }
+ if x != nil {
+ for _, v := range s.Values {
+ if v.Op == OpPhi && v.Args[e.i] == x {
+ continue outer // successor s of b has an old phi function, so there is no need to add another.
+ }
+ }
+ }
+
+ old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
+ headerPhi := newPhiFor(s, old)
+ // the new phi will replace "old" in block s and all blocks dominated by s.
+ newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
+ addDFphis(old, s, s, f, defForUses, newphis, sdom) // the new definition may also create new phi functions.
+ }
+ for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
+ addDFphis(x, h, c, f, defForUses, newphis, sdom) // TODO: convert to explicit stack from recursion.
+ }
+}
+
+// findLastMems maps block ids to last memory-output op in a block, if any.
+func findLastMems(f *Func) []*Value {
+
+ var stores []*Value
+ lastMems := f.Cache.allocValueSlice(f.NumBlocks())
+ defer f.Cache.freeValueSlice(lastMems)
+ storeUse := f.newSparseSet(f.NumValues())
+ defer f.retSparseSet(storeUse)
+ for _, b := range f.Blocks {
+ // Find all the stores in this block. Categorize their uses:
+ // storeUse contains stores which are used by a subsequent store.
+ storeUse.clear()
+ stores = stores[:0]
+ var memPhi *Value
+ for _, v := range b.Values {
+ if v.Op == OpPhi {
+ if v.Type.IsMemory() {
+ memPhi = v
+ }
+ continue
+ }
+ if v.Type.IsMemory() {
+ stores = append(stores, v)
+ for _, a := range v.Args {
+ if a.Block == b && a.Type.IsMemory() {
+ storeUse.add(a.ID)
+ }
+ }
+ }
+ }
+ if len(stores) == 0 {
+ lastMems[b.ID] = memPhi
+ continue
+ }
+
+ // find last store in the block
+ var last *Value
+ for _, v := range stores {
+ if storeUse.contains(v.ID) {
+ continue
+ }
+ if last != nil {
+ b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
+ }
+ last = v
+ }
+ if last == nil {
+ b.Fatalf("no last store found - cycle?")
+ }
+
+ // If this is a tuple containing a mem, select just
+ // the mem. This will generate ops we don't need, but
+ // it's the easiest thing to do.
+ if last.Type.IsTuple() {
+ last = b.NewValue1(last.Pos, OpSelect1, types.TypeMem, last)
+ } else if last.Type.IsResults() {
+ last = b.NewValue1I(last.Pos, OpSelectN, types.TypeMem, int64(last.Type.NumFields()-1), last)
+ }
+
+ lastMems[b.ID] = last
+ }
+ return lastMems
+}
+
+// mark values
+type markKind uint8
+
+const (
+ notFound markKind = iota // block has not been discovered yet
+ notExplored // discovered and in queue, outedges not processed yet
+ explored // discovered and in queue, outedges processed
+ done // all done, in output ordering
+)
+
+type backedgesState struct {
+ b *Block
+ i int
+}
+
+// backedges returns a slice of successor edges that are back
+// edges. For reducible loops, edge.b is the header.
+func backedges(f *Func) []Edge {
+ edges := []Edge{}
+ mark := make([]markKind, f.NumBlocks())
+ stack := []backedgesState{}
+
+ mark[f.Entry.ID] = notExplored
+ stack = append(stack, backedgesState{f.Entry, 0})
+
+ for len(stack) > 0 {
+ l := len(stack)
+ x := stack[l-1]
+ if x.i < len(x.b.Succs) {
+ e := x.b.Succs[x.i]
+ stack[l-1].i++
+ s := e.b
+ if mark[s.ID] == notFound {
+ mark[s.ID] = notExplored
+ stack = append(stack, backedgesState{s, 0})
+ } else if mark[s.ID] == notExplored {
+ edges = append(edges, e)
+ }
+ } else {
+ mark[x.b.ID] = done
+ stack = stack[0 : l-1]
+ }
+ }
+ return edges
+}