summaryrefslogtreecommitdiffstats
path: root/src/cmd/compile/internal/ssa/branchelim.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/compile/internal/ssa/branchelim.go')
-rw-r--r--src/cmd/compile/internal/ssa/branchelim.go470
1 files changed, 470 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/branchelim.go b/src/cmd/compile/internal/ssa/branchelim.go
new file mode 100644
index 0000000..f16959d
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/branchelim.go
@@ -0,0 +1,470 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "cmd/internal/src"
+
+// branchelim tries to eliminate branches by
+// generating CondSelect instructions.
+//
+// Search for basic blocks that look like
+//
+// bb0 bb0
+// | \ / \
+// | bb1 or bb1 bb2 <- trivial if/else blocks
+// | / \ /
+// bb2 bb3
+//
+// where the intermediate blocks are mostly empty (with no side-effects);
+// rewrite Phis in the postdominator as CondSelects.
+func branchelim(f *Func) {
+ // FIXME: add support for lowering CondSelects on more architectures
+ switch f.Config.arch {
+ case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64":
+ // implemented
+ default:
+ return
+ }
+
+ // Find all the values used in computing the address of any load.
+ // Typically these values have operations like AddPtr, Lsh64x64, etc.
+ loadAddr := f.newSparseSet(f.NumValues())
+ defer f.retSparseSet(loadAddr)
+ for _, b := range f.Blocks {
+ for _, v := range b.Values {
+ switch v.Op {
+ case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64:
+ loadAddr.add(v.Args[0].ID)
+ case OpMove:
+ loadAddr.add(v.Args[1].ID)
+ }
+ }
+ }
+ po := f.postorder()
+ for {
+ n := loadAddr.size()
+ for _, b := range po {
+ for i := len(b.Values) - 1; i >= 0; i-- {
+ v := b.Values[i]
+ if !loadAddr.contains(v.ID) {
+ continue
+ }
+ for _, a := range v.Args {
+ if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
+ loadAddr.add(a.ID)
+ }
+ }
+ }
+ }
+ if loadAddr.size() == n {
+ break
+ }
+ }
+
+ change := true
+ for change {
+ change = false
+ for _, b := range f.Blocks {
+ change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
+ }
+ }
+}
+
+func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
+ if loadAddr.contains(v.ID) {
+ // The result of the soon-to-be conditional move is used to compute a load address.
+ // We want to avoid generating a conditional move in this case
+ // because the load address would now be data-dependent on the condition.
+ // Previously it would only be control-dependent on the condition, which is faster
+ // if the branch predicts well (or possibly even if it doesn't, if the load will
+ // be an expensive cache miss).
+ // See issue #26306.
+ return false
+ }
+ if arch == "loong64" {
+ // We should not generate conditional moves if neither of the arguments is constant zero,
+ // because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the
+ // register pressure.
+ if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) &&
+ !(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) {
+ return false
+ }
+ }
+ // For now, stick to simple scalars that fit in registers
+ switch {
+ case v.Type.Size() > v.Block.Func.Config.RegSize:
+ return false
+ case v.Type.IsPtrShaped():
+ return true
+ case v.Type.IsInteger():
+ if arch == "amd64" && v.Type.Size() < 2 {
+ // amd64 doesn't support CMOV with byte registers
+ return false
+ }
+ return true
+ default:
+ return false
+ }
+}
+
+// elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
+// loadAddr is a set of values which are used to compute the address of a load.
+// Those values are exempt from CMOV generation.
+func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
+ // See if dom is an If with one arm that
+ // is trivial and succeeded by the other
+ // successor of dom.
+ if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
+ return false
+ }
+ var simple, post *Block
+ for i := range dom.Succs {
+ bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
+ if isLeafPlain(bb) && bb.Succs[0].Block() == other {
+ simple = bb
+ post = other
+ break
+ }
+ }
+ if simple == nil || len(post.Preds) != 2 || post == dom {
+ return false
+ }
+
+ // We've found our diamond CFG of blocks.
+ // Now decide if fusing 'simple' into dom+post
+ // looks profitable.
+
+ // Check that there are Phis, and that all of them
+ // can be safely rewritten to CondSelect.
+ hasphis := false
+ for _, v := range post.Values {
+ if v.Op == OpPhi {
+ hasphis = true
+ if !canCondSelect(v, f.Config.arch, loadAddr) {
+ return false
+ }
+ }
+ }
+ if !hasphis {
+ return false
+ }
+
+ // Pick some upper bound for the number of instructions
+ // we'd be willing to execute just to generate a dead
+ // argument to CondSelect. In the worst case, this is
+ // the number of useless instructions executed.
+ const maxfuseinsts = 2
+
+ if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) {
+ return false
+ }
+
+ // Replace Phi instructions in b with CondSelect instructions
+ swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
+ for _, v := range post.Values {
+ if v.Op != OpPhi {
+ continue
+ }
+ v.Op = OpCondSelect
+ if swap {
+ v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
+ }
+ v.AddArg(dom.Controls[0])
+ }
+
+ // Put all of the instructions into 'dom'
+ // and update the CFG appropriately.
+ dom.Kind = post.Kind
+ dom.CopyControls(post)
+ dom.Aux = post.Aux
+ dom.Succs = append(dom.Succs[:0], post.Succs...)
+ for i := range dom.Succs {
+ e := dom.Succs[i]
+ e.b.Preds[e.i].b = dom
+ }
+
+ // Try really hard to preserve statement marks attached to blocks.
+ simplePos := simple.Pos
+ postPos := post.Pos
+ simpleStmt := simplePos.IsStmt() == src.PosIsStmt
+ postStmt := postPos.IsStmt() == src.PosIsStmt
+
+ for _, v := range simple.Values {
+ v.Block = dom
+ }
+ for _, v := range post.Values {
+ v.Block = dom
+ }
+
+ // findBlockPos determines if b contains a stmt-marked value
+ // that has the same line number as the Pos for b itself.
+ // (i.e. is the position on b actually redundant?)
+ findBlockPos := func(b *Block) bool {
+ pos := b.Pos
+ for _, v := range b.Values {
+ // See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
+ if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
+ return true
+ }
+ }
+ return false
+ }
+ if simpleStmt {
+ simpleStmt = !findBlockPos(simple)
+ if !simpleStmt && simplePos.SameFileAndLine(postPos) {
+ postStmt = false
+ }
+
+ }
+ if postStmt {
+ postStmt = !findBlockPos(post)
+ }
+
+ // If simpleStmt and/or postStmt are still true, then try harder
+ // to find the corresponding statement marks new homes.
+
+ // setBlockPos determines if b contains a can-be-statement value
+ // that has the same line number as the Pos for b itself, and
+ // puts a statement mark on it, and returns whether it succeeded
+ // in this operation.
+ setBlockPos := func(b *Block) bool {
+ pos := b.Pos
+ for _, v := range b.Values {
+ if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
+ v.Pos = v.Pos.WithIsStmt()
+ return true
+ }
+ }
+ return false
+ }
+ // If necessary and possible, add a mark to a value in simple
+ if simpleStmt {
+ if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
+ postStmt = false
+ }
+ }
+ // If necessary and possible, add a mark to a value in post
+ if postStmt {
+ postStmt = !setBlockPos(post)
+ }
+
+ // Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
+ // try the values in the successor block if it is uncomplicated.
+ if postStmt {
+ if dom.Pos.IsStmt() != src.PosIsStmt {
+ dom.Pos = postPos
+ } else {
+ // Try the successor block
+ if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
+ succ := dom.Succs[0].Block()
+ for _, v := range succ.Values {
+ if isPoorStatementOp(v.Op) {
+ continue
+ }
+ if postPos.SameFileAndLine(v.Pos) {
+ v.Pos = v.Pos.WithIsStmt()
+ }
+ postStmt = false
+ break
+ }
+ // If postStmt still true, tag the block itself if possible
+ if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
+ succ.Pos = postPos
+ }
+ }
+ }
+ }
+
+ dom.Values = append(dom.Values, simple.Values...)
+ dom.Values = append(dom.Values, post.Values...)
+
+ // Trash 'post' and 'simple'
+ clobberBlock(post)
+ clobberBlock(simple)
+
+ f.invalidateCFG()
+ return true
+}
+
+// is this a BlockPlain with one predecessor?
+func isLeafPlain(b *Block) bool {
+ return b.Kind == BlockPlain && len(b.Preds) == 1
+}
+
+func clobberBlock(b *Block) {
+ b.Values = nil
+ b.Preds = nil
+ b.Succs = nil
+ b.Aux = nil
+ b.ResetControls()
+ b.Likely = BranchUnknown
+ b.Kind = BlockInvalid
+}
+
+// elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
+// loadAddr is a set of values which are used to compute the address of a load.
+// Those values are exempt from CMOV generation.
+func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
+ // See if 'b' ends in an if/else: it should
+ // have two successors, both of which are BlockPlain
+ // and succeeded by the same block.
+ if b.Kind != BlockIf || b.Likely != BranchUnknown {
+ return false
+ }
+ yes, no := b.Succs[0].Block(), b.Succs[1].Block()
+ if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) {
+ return false
+ }
+ if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) {
+ return false
+ }
+ if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
+ return false
+ }
+ // block that postdominates the if/else
+ post := b.Succs[0].Block().Succs[0].Block()
+ if len(post.Preds) != 2 || post == b {
+ return false
+ }
+ hasphis := false
+ for _, v := range post.Values {
+ if v.Op == OpPhi {
+ hasphis = true
+ if !canCondSelect(v, f.Config.arch, loadAddr) {
+ return false
+ }
+ }
+ }
+ if !hasphis {
+ return false
+ }
+
+ // Don't generate CondSelects if branch is cheaper.
+ if !shouldElimIfElse(no, yes, post, f.Config.arch) {
+ return false
+ }
+
+ // now we're committed: rewrite each Phi as a CondSelect
+ swap := post.Preds[0].Block() != b.Succs[0].Block()
+ for _, v := range post.Values {
+ if v.Op != OpPhi {
+ continue
+ }
+ v.Op = OpCondSelect
+ if swap {
+ v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
+ }
+ v.AddArg(b.Controls[0])
+ }
+
+ // Move the contents of all of these
+ // blocks into 'b' and update CFG edges accordingly
+ b.Kind = post.Kind
+ b.CopyControls(post)
+ b.Aux = post.Aux
+ b.Succs = append(b.Succs[:0], post.Succs...)
+ for i := range b.Succs {
+ e := b.Succs[i]
+ e.b.Preds[e.i].b = b
+ }
+ for i := range post.Values {
+ post.Values[i].Block = b
+ }
+ for i := range yes.Values {
+ yes.Values[i].Block = b
+ }
+ for i := range no.Values {
+ no.Values[i].Block = b
+ }
+ b.Values = append(b.Values, yes.Values...)
+ b.Values = append(b.Values, no.Values...)
+ b.Values = append(b.Values, post.Values...)
+
+ // trash post, yes, and no
+ clobberBlock(yes)
+ clobberBlock(no)
+ clobberBlock(post)
+
+ f.invalidateCFG()
+ return true
+}
+
+// shouldElimIfElse reports whether estimated cost of eliminating branch
+// is lower than threshold.
+func shouldElimIfElse(no, yes, post *Block, arch string) bool {
+ switch arch {
+ default:
+ return true
+ case "amd64":
+ const maxcost = 2
+ phi := 0
+ other := 0
+ for _, v := range post.Values {
+ if v.Op == OpPhi {
+ // Each phi results in CondSelect, which lowers into CMOV,
+ // CMOV has latency >1 on most CPUs.
+ phi++
+ }
+ for _, x := range v.Args {
+ if x.Block == no || x.Block == yes {
+ other++
+ }
+ }
+ }
+ cost := phi * 1
+ if phi > 1 {
+ // If we have more than 1 phi and some values in post have args
+ // in yes or no blocks, we may have to recalculate condition, because
+ // those args may clobber flags. For now assume that all operations clobber flags.
+ cost += other * 1
+ }
+ return cost < maxcost
+ }
+}
+
+// canSpeculativelyExecute reports whether every value in the block can
+// be evaluated without causing any observable side effects (memory
+// accesses, panics and so on) except for execution time changes. It
+// also ensures that the block does not contain any phis which we can't
+// speculatively execute.
+// Warning: this function cannot currently detect values that represent
+// instructions the execution of which need to be guarded with CPU
+// hardware feature checks. See issue #34950.
+func canSpeculativelyExecute(b *Block) bool {
+ // don't fuse memory ops, Phi ops, divides (can panic),
+ // or anything else with side-effects
+ for _, v := range b.Values {
+ if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() ||
+ v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
+ return false
+ }
+ }
+ return true
+}
+
+func isDivMod(op Op) bool {
+ switch op {
+ case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
+ OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
+ OpDiv32F, OpDiv64F,
+ OpMod8, OpMod8u, OpMod16, OpMod16u,
+ OpMod32, OpMod32u, OpMod64, OpMod64u:
+ return true
+ default:
+ return false
+ }
+}
+
+func isPtrArithmetic(op Op) bool {
+ // Pointer arithmetic can't be speculatively executed because the result
+ // may be an invalid pointer (if, for example, the condition is that the
+ // base pointer is not nil). See issue 56990.
+ switch op {
+ case OpOffPtr, OpAddPtr, OpSubPtr:
+ return true
+ default:
+ return false
+ }
+}