summaryrefslogtreecommitdiffstats
path: root/src/cmd/compile/internal/ssa/fuse.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/compile/internal/ssa/fuse.go')
-rw-r--r--src/cmd/compile/internal/ssa/fuse.go333
1 files changed, 333 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/fuse.go b/src/cmd/compile/internal/ssa/fuse.go
new file mode 100644
index 0000000..6d3fb70
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/fuse.go
@@ -0,0 +1,333 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+ "cmd/internal/src"
+ "fmt"
+)
+
+// fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange).
+func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange) }
+
+// fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
+func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
+
+type fuseType uint8
+
+const (
+ fuseTypePlain fuseType = 1 << iota
+ fuseTypeIf
+ fuseTypeIntInRange
+ fuseTypeBranchRedirect
+ fuseTypeShortCircuit
+)
+
+// fuse simplifies control flow by joining basic blocks.
+func fuse(f *Func, typ fuseType) {
+ for changed := true; changed; {
+ changed = false
+ // Be sure to avoid quadratic behavior in fuseBlockPlain. See issue 13554.
+ // Previously this was dealt with using backwards iteration, now fuseBlockPlain
+ // handles large runs of blocks.
+ for i := len(f.Blocks) - 1; i >= 0; i-- {
+ b := f.Blocks[i]
+ if typ&fuseTypeIf != 0 {
+ changed = fuseBlockIf(b) || changed
+ }
+ if typ&fuseTypeIntInRange != 0 {
+ changed = fuseIntegerComparisons(b) || changed
+ }
+ if typ&fuseTypePlain != 0 {
+ changed = fuseBlockPlain(b) || changed
+ }
+ if typ&fuseTypeShortCircuit != 0 {
+ changed = shortcircuitBlock(b) || changed
+ }
+ }
+
+ if typ&fuseTypeBranchRedirect != 0 {
+ changed = fuseBranchRedirect(f) || changed
+ }
+ if changed {
+ f.invalidateCFG()
+ }
+ }
+}
+
+// fuseBlockIf handles the following cases where s0 and s1 are empty blocks.
+//
+// b b b b
+// \ / \ / | \ / \ / | | |
+// s0 s1 | s1 s0 | | |
+// \ / | / \ | | |
+// ss ss ss ss
+//
+// If all Phi ops in ss have identical variables for slots corresponding to
+// s0, s1 and b then the branch can be dropped.
+// This optimization often comes up in switch statements with multiple
+// expressions in a case clause:
+//
+// switch n {
+// case 1,2,3: return 4
+// }
+//
+// TODO: If ss doesn't contain any OpPhis, are s0 and s1 dead code anyway.
+func fuseBlockIf(b *Block) bool {
+ if b.Kind != BlockIf {
+ return false
+ }
+ // It doesn't matter how much Preds does s0 or s1 have.
+ var ss0, ss1 *Block
+ s0 := b.Succs[0].b
+ i0 := b.Succs[0].i
+ if s0.Kind != BlockPlain || !isEmpty(s0) {
+ s0, ss0 = b, s0
+ } else {
+ ss0 = s0.Succs[0].b
+ i0 = s0.Succs[0].i
+ }
+ s1 := b.Succs[1].b
+ i1 := b.Succs[1].i
+ if s1.Kind != BlockPlain || !isEmpty(s1) {
+ s1, ss1 = b, s1
+ } else {
+ ss1 = s1.Succs[0].b
+ i1 = s1.Succs[0].i
+ }
+ if ss0 != ss1 {
+ if s0.Kind == BlockPlain && isEmpty(s0) && s1.Kind == BlockPlain && isEmpty(s1) {
+ // Two special cases where both s0, s1 and ss are empty blocks.
+ if s0 == ss1 {
+ s0, ss0 = b, ss1
+ } else if ss0 == s1 {
+ s1, ss1 = b, ss0
+ } else {
+ return false
+ }
+ } else {
+ return false
+ }
+ }
+ ss := ss0
+
+ // s0 and s1 are equal with b if the corresponding block is missing
+ // (2nd, 3rd and 4th case in the figure).
+
+ for _, v := range ss.Values {
+ if v.Op == OpPhi && v.Uses > 0 && v.Args[i0] != v.Args[i1] {
+ return false
+ }
+ }
+
+ // We do not need to redirect the Preds of s0 and s1 to ss,
+ // the following optimization will do this.
+ b.removeEdge(0)
+ if s0 != b && len(s0.Preds) == 0 {
+ s0.removeEdge(0)
+ // Move any (dead) values in s0 to b,
+ // where they will be eliminated by the next deadcode pass.
+ for _, v := range s0.Values {
+ v.Block = b
+ }
+ b.Values = append(b.Values, s0.Values...)
+ // Clear s0.
+ s0.Kind = BlockInvalid
+ s0.Values = nil
+ s0.Succs = nil
+ s0.Preds = nil
+ }
+
+ b.Kind = BlockPlain
+ b.Likely = BranchUnknown
+ b.ResetControls()
+ // The values in b may be dead codes, and clearing them in time may
+ // obtain new optimization opportunities.
+ // First put dead values that can be deleted into a slice walkValues.
+ // Then put their arguments in walkValues before resetting the dead values
+ // in walkValues, because the arguments may also become dead values.
+ walkValues := []*Value{}
+ for _, v := range b.Values {
+ if v.Uses == 0 && v.removeable() {
+ walkValues = append(walkValues, v)
+ }
+ }
+ for len(walkValues) != 0 {
+ v := walkValues[len(walkValues)-1]
+ walkValues = walkValues[:len(walkValues)-1]
+ if v.Uses == 0 && v.removeable() {
+ walkValues = append(walkValues, v.Args...)
+ v.reset(OpInvalid)
+ }
+ }
+ return true
+}
+
+// isEmpty reports whether b contains any live values.
+// There may be false positives.
+func isEmpty(b *Block) bool {
+ for _, v := range b.Values {
+ if v.Uses > 0 || v.Op.IsCall() || v.Op.HasSideEffects() || v.Type.IsVoid() {
+ return false
+ }
+ }
+ return true
+}
+
+// fuseBlockPlain handles a run of blocks with length >= 2,
+// whose interior has single predecessors and successors,
+// b must be BlockPlain, allowing it to be any node except the
+// last (multiple successors means not BlockPlain).
+// Cycles are handled and merged into b's successor.
+func fuseBlockPlain(b *Block) bool {
+ if b.Kind != BlockPlain {
+ return false
+ }
+
+ c := b.Succs[0].b
+ if len(c.Preds) != 1 || c == b { // At least 2 distinct blocks.
+ return false
+ }
+
+ // find earliest block in run. Avoid simple cycles.
+ for len(b.Preds) == 1 && b.Preds[0].b != c && b.Preds[0].b.Kind == BlockPlain {
+ b = b.Preds[0].b
+ }
+
+ // find latest block in run. Still beware of simple cycles.
+ for {
+ if c.Kind != BlockPlain {
+ break
+ } // Has exactly 1 successor
+ cNext := c.Succs[0].b
+ if cNext == b {
+ break
+ } // not a cycle
+ if len(cNext.Preds) != 1 {
+ break
+ } // no other incoming edge
+ c = cNext
+ }
+
+ // Try to preserve any statement marks on the ends of blocks; move values to C
+ var b_next *Block
+ for bx := b; bx != c; bx = b_next {
+ // For each bx with an end-of-block statement marker,
+ // try to move it to a value in the next block,
+ // or to the next block's end, if possible.
+ b_next = bx.Succs[0].b
+ if bx.Pos.IsStmt() == src.PosIsStmt {
+ l := bx.Pos.Line() // looking for another place to mark for line l
+ outOfOrder := false
+ for _, v := range b_next.Values {
+ if v.Pos.IsStmt() == src.PosNotStmt {
+ continue
+ }
+ if l == v.Pos.Line() { // Found a Value with same line, therefore done.
+ v.Pos = v.Pos.WithIsStmt()
+ l = 0
+ break
+ }
+ if l < v.Pos.Line() {
+ // The order of values in a block is not specified so OOO in a block is not interesting,
+ // but they do all come before the end of the block, so this disqualifies attaching to end of b_next.
+ outOfOrder = true
+ }
+ }
+ if l != 0 && !outOfOrder && (b_next.Pos.Line() == l || b_next.Pos.IsStmt() != src.PosIsStmt) {
+ b_next.Pos = bx.Pos.WithIsStmt()
+ }
+ }
+ // move all of bx's values to c (note containing loop excludes c)
+ for _, v := range bx.Values {
+ v.Block = c
+ }
+ }
+
+ // Compute the total number of values and find the largest value slice in the run, to maximize chance of storage reuse.
+ total := 0
+ totalBeforeMax := 0 // number of elements preceding the maximum block (i.e. its position in the result).
+ max_b := b // block with maximum capacity
+
+ for bx := b; ; bx = bx.Succs[0].b {
+ if cap(bx.Values) > cap(max_b.Values) {
+ totalBeforeMax = total
+ max_b = bx
+ }
+ total += len(bx.Values)
+ if bx == c {
+ break
+ }
+ }
+
+ // Use c's storage if fused blocks will fit, else use the max if that will fit, else allocate new storage.
+
+ // Take care to avoid c.Values pointing to b.valstorage.
+ // See golang.org/issue/18602.
+
+ // It's important to keep the elements in the same order; maintenance of
+ // debugging information depends on the order of *Values in Blocks.
+ // This can also cause changes in the order (which may affect other
+ // optimizations and possibly compiler output) for 32-vs-64 bit compilation
+ // platforms (word size affects allocation bucket size affects slice capacity).
+
+ // figure out what slice will hold the values,
+ // preposition the destination elements if not allocating new storage
+ var t []*Value
+ if total <= len(c.valstorage) {
+ t = c.valstorage[:total]
+ max_b = c
+ totalBeforeMax = total - len(c.Values)
+ copy(t[totalBeforeMax:], c.Values)
+ } else if total <= cap(max_b.Values) { // in place, somewhere
+ t = max_b.Values[0:total]
+ copy(t[totalBeforeMax:], max_b.Values)
+ } else {
+ t = make([]*Value, total)
+ max_b = nil
+ }
+
+ // copy the values
+ copyTo := 0
+ for bx := b; ; bx = bx.Succs[0].b {
+ if bx != max_b {
+ copy(t[copyTo:], bx.Values)
+ } else if copyTo != totalBeforeMax { // trust but verify.
+ panic(fmt.Errorf("totalBeforeMax (%d) != copyTo (%d), max_b=%v, b=%v, c=%v", totalBeforeMax, copyTo, max_b, b, c))
+ }
+ if bx == c {
+ break
+ }
+ copyTo += len(bx.Values)
+ }
+ c.Values = t
+
+ // replace b->c edge with preds(b) -> c
+ c.predstorage[0] = Edge{}
+ if len(b.Preds) > len(b.predstorage) {
+ c.Preds = b.Preds
+ } else {
+ c.Preds = append(c.predstorage[:0], b.Preds...)
+ }
+ for i, e := range c.Preds {
+ p := e.b
+ p.Succs[e.i] = Edge{c, i}
+ }
+ f := b.Func
+ if f.Entry == b {
+ f.Entry = c
+ }
+
+ // trash b's fields, just in case
+ for bx := b; bx != c; bx = b_next {
+ b_next = bx.Succs[0].b
+
+ bx.Kind = BlockInvalid
+ bx.Values = nil
+ bx.Preds = nil
+ bx.Succs = nil
+ }
+ return true
+}