diff options
Diffstat (limited to 'src/cmd/compile/internal/ssa/branchelim.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/branchelim.go | 470 |
1 files changed, 470 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/branchelim.go b/src/cmd/compile/internal/ssa/branchelim.go new file mode 100644 index 0000000..f16959d --- /dev/null +++ b/src/cmd/compile/internal/ssa/branchelim.go @@ -0,0 +1,470 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import "cmd/internal/src" + +// branchelim tries to eliminate branches by +// generating CondSelect instructions. +// +// Search for basic blocks that look like +// +// bb0 bb0 +// | \ / \ +// | bb1 or bb1 bb2 <- trivial if/else blocks +// | / \ / +// bb2 bb3 +// +// where the intermediate blocks are mostly empty (with no side-effects); +// rewrite Phis in the postdominator as CondSelects. +func branchelim(f *Func) { + // FIXME: add support for lowering CondSelects on more architectures + switch f.Config.arch { + case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64": + // implemented + default: + return + } + + // Find all the values used in computing the address of any load. + // Typically these values have operations like AddPtr, Lsh64x64, etc. + loadAddr := f.newSparseSet(f.NumValues()) + defer f.retSparseSet(loadAddr) + for _, b := range f.Blocks { + for _, v := range b.Values { + switch v.Op { + case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64: + loadAddr.add(v.Args[0].ID) + case OpMove: + loadAddr.add(v.Args[1].ID) + } + } + } + po := f.postorder() + for { + n := loadAddr.size() + for _, b := range po { + for i := len(b.Values) - 1; i >= 0; i-- { + v := b.Values[i] + if !loadAddr.contains(v.ID) { + continue + } + for _, a := range v.Args { + if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() { + loadAddr.add(a.ID) + } + } + } + } + if loadAddr.size() == n { + break + } + } + + change := true + for change { + change = false + for _, b := range f.Blocks { + change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change + } + } +} + +func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool { + if loadAddr.contains(v.ID) { + // The result of the soon-to-be conditional move is used to compute a load address. + // We want to avoid generating a conditional move in this case + // because the load address would now be data-dependent on the condition. + // Previously it would only be control-dependent on the condition, which is faster + // if the branch predicts well (or possibly even if it doesn't, if the load will + // be an expensive cache miss). + // See issue #26306. + return false + } + if arch == "loong64" { + // We should not generate conditional moves if neither of the arguments is constant zero, + // because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the + // register pressure. + if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) && + !(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) { + return false + } + } + // For now, stick to simple scalars that fit in registers + switch { + case v.Type.Size() > v.Block.Func.Config.RegSize: + return false + case v.Type.IsPtrShaped(): + return true + case v.Type.IsInteger(): + if arch == "amd64" && v.Type.Size() < 2 { + // amd64 doesn't support CMOV with byte registers + return false + } + return true + default: + return false + } +} + +// elimIf converts the one-way branch starting at dom in f to a conditional move if possible. +// loadAddr is a set of values which are used to compute the address of a load. +// Those values are exempt from CMOV generation. +func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool { + // See if dom is an If with one arm that + // is trivial and succeeded by the other + // successor of dom. + if dom.Kind != BlockIf || dom.Likely != BranchUnknown { + return false + } + var simple, post *Block + for i := range dom.Succs { + bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block() + if isLeafPlain(bb) && bb.Succs[0].Block() == other { + simple = bb + post = other + break + } + } + if simple == nil || len(post.Preds) != 2 || post == dom { + return false + } + + // We've found our diamond CFG of blocks. + // Now decide if fusing 'simple' into dom+post + // looks profitable. + + // Check that there are Phis, and that all of them + // can be safely rewritten to CondSelect. + hasphis := false + for _, v := range post.Values { + if v.Op == OpPhi { + hasphis = true + if !canCondSelect(v, f.Config.arch, loadAddr) { + return false + } + } + } + if !hasphis { + return false + } + + // Pick some upper bound for the number of instructions + // we'd be willing to execute just to generate a dead + // argument to CondSelect. In the worst case, this is + // the number of useless instructions executed. + const maxfuseinsts = 2 + + if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) { + return false + } + + // Replace Phi instructions in b with CondSelect instructions + swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post) + for _, v := range post.Values { + if v.Op != OpPhi { + continue + } + v.Op = OpCondSelect + if swap { + v.Args[0], v.Args[1] = v.Args[1], v.Args[0] + } + v.AddArg(dom.Controls[0]) + } + + // Put all of the instructions into 'dom' + // and update the CFG appropriately. + dom.Kind = post.Kind + dom.CopyControls(post) + dom.Aux = post.Aux + dom.Succs = append(dom.Succs[:0], post.Succs...) + for i := range dom.Succs { + e := dom.Succs[i] + e.b.Preds[e.i].b = dom + } + + // Try really hard to preserve statement marks attached to blocks. + simplePos := simple.Pos + postPos := post.Pos + simpleStmt := simplePos.IsStmt() == src.PosIsStmt + postStmt := postPos.IsStmt() == src.PosIsStmt + + for _, v := range simple.Values { + v.Block = dom + } + for _, v := range post.Values { + v.Block = dom + } + + // findBlockPos determines if b contains a stmt-marked value + // that has the same line number as the Pos for b itself. + // (i.e. is the position on b actually redundant?) + findBlockPos := func(b *Block) bool { + pos := b.Pos + for _, v := range b.Values { + // See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos) + if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt { + return true + } + } + return false + } + if simpleStmt { + simpleStmt = !findBlockPos(simple) + if !simpleStmt && simplePos.SameFileAndLine(postPos) { + postStmt = false + } + + } + if postStmt { + postStmt = !findBlockPos(post) + } + + // If simpleStmt and/or postStmt are still true, then try harder + // to find the corresponding statement marks new homes. + + // setBlockPos determines if b contains a can-be-statement value + // that has the same line number as the Pos for b itself, and + // puts a statement mark on it, and returns whether it succeeded + // in this operation. + setBlockPos := func(b *Block) bool { + pos := b.Pos + for _, v := range b.Values { + if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) { + v.Pos = v.Pos.WithIsStmt() + return true + } + } + return false + } + // If necessary and possible, add a mark to a value in simple + if simpleStmt { + if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) { + postStmt = false + } + } + // If necessary and possible, add a mark to a value in post + if postStmt { + postStmt = !setBlockPos(post) + } + + // Before giving up (this was added because it helps), try the end of "dom", and if that is not available, + // try the values in the successor block if it is uncomplicated. + if postStmt { + if dom.Pos.IsStmt() != src.PosIsStmt { + dom.Pos = postPos + } else { + // Try the successor block + if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 { + succ := dom.Succs[0].Block() + for _, v := range succ.Values { + if isPoorStatementOp(v.Op) { + continue + } + if postPos.SameFileAndLine(v.Pos) { + v.Pos = v.Pos.WithIsStmt() + } + postStmt = false + break + } + // If postStmt still true, tag the block itself if possible + if postStmt && succ.Pos.IsStmt() != src.PosIsStmt { + succ.Pos = postPos + } + } + } + } + + dom.Values = append(dom.Values, simple.Values...) + dom.Values = append(dom.Values, post.Values...) + + // Trash 'post' and 'simple' + clobberBlock(post) + clobberBlock(simple) + + f.invalidateCFG() + return true +} + +// is this a BlockPlain with one predecessor? +func isLeafPlain(b *Block) bool { + return b.Kind == BlockPlain && len(b.Preds) == 1 +} + +func clobberBlock(b *Block) { + b.Values = nil + b.Preds = nil + b.Succs = nil + b.Aux = nil + b.ResetControls() + b.Likely = BranchUnknown + b.Kind = BlockInvalid +} + +// elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible. +// loadAddr is a set of values which are used to compute the address of a load. +// Those values are exempt from CMOV generation. +func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool { + // See if 'b' ends in an if/else: it should + // have two successors, both of which are BlockPlain + // and succeeded by the same block. + if b.Kind != BlockIf || b.Likely != BranchUnknown { + return false + } + yes, no := b.Succs[0].Block(), b.Succs[1].Block() + if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) { + return false + } + if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) { + return false + } + if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() { + return false + } + // block that postdominates the if/else + post := b.Succs[0].Block().Succs[0].Block() + if len(post.Preds) != 2 || post == b { + return false + } + hasphis := false + for _, v := range post.Values { + if v.Op == OpPhi { + hasphis = true + if !canCondSelect(v, f.Config.arch, loadAddr) { + return false + } + } + } + if !hasphis { + return false + } + + // Don't generate CondSelects if branch is cheaper. + if !shouldElimIfElse(no, yes, post, f.Config.arch) { + return false + } + + // now we're committed: rewrite each Phi as a CondSelect + swap := post.Preds[0].Block() != b.Succs[0].Block() + for _, v := range post.Values { + if v.Op != OpPhi { + continue + } + v.Op = OpCondSelect + if swap { + v.Args[0], v.Args[1] = v.Args[1], v.Args[0] + } + v.AddArg(b.Controls[0]) + } + + // Move the contents of all of these + // blocks into 'b' and update CFG edges accordingly + b.Kind = post.Kind + b.CopyControls(post) + b.Aux = post.Aux + b.Succs = append(b.Succs[:0], post.Succs...) + for i := range b.Succs { + e := b.Succs[i] + e.b.Preds[e.i].b = b + } + for i := range post.Values { + post.Values[i].Block = b + } + for i := range yes.Values { + yes.Values[i].Block = b + } + for i := range no.Values { + no.Values[i].Block = b + } + b.Values = append(b.Values, yes.Values...) + b.Values = append(b.Values, no.Values...) + b.Values = append(b.Values, post.Values...) + + // trash post, yes, and no + clobberBlock(yes) + clobberBlock(no) + clobberBlock(post) + + f.invalidateCFG() + return true +} + +// shouldElimIfElse reports whether estimated cost of eliminating branch +// is lower than threshold. +func shouldElimIfElse(no, yes, post *Block, arch string) bool { + switch arch { + default: + return true + case "amd64": + const maxcost = 2 + phi := 0 + other := 0 + for _, v := range post.Values { + if v.Op == OpPhi { + // Each phi results in CondSelect, which lowers into CMOV, + // CMOV has latency >1 on most CPUs. + phi++ + } + for _, x := range v.Args { + if x.Block == no || x.Block == yes { + other++ + } + } + } + cost := phi * 1 + if phi > 1 { + // If we have more than 1 phi and some values in post have args + // in yes or no blocks, we may have to recalculate condition, because + // those args may clobber flags. For now assume that all operations clobber flags. + cost += other * 1 + } + return cost < maxcost + } +} + +// canSpeculativelyExecute reports whether every value in the block can +// be evaluated without causing any observable side effects (memory +// accesses, panics and so on) except for execution time changes. It +// also ensures that the block does not contain any phis which we can't +// speculatively execute. +// Warning: this function cannot currently detect values that represent +// instructions the execution of which need to be guarded with CPU +// hardware feature checks. See issue #34950. +func canSpeculativelyExecute(b *Block) bool { + // don't fuse memory ops, Phi ops, divides (can panic), + // or anything else with side-effects + for _, v := range b.Values { + if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() || + v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects { + return false + } + } + return true +} + +func isDivMod(op Op) bool { + switch op { + case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u, + OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u, + OpDiv32F, OpDiv64F, + OpMod8, OpMod8u, OpMod16, OpMod16u, + OpMod32, OpMod32u, OpMod64, OpMod64u: + return true + default: + return false + } +} + +func isPtrArithmetic(op Op) bool { + // Pointer arithmetic can't be speculatively executed because the result + // may be an invalid pointer (if, for example, the condition is that the + // base pointer is not nil). See issue 56990. + switch op { + case OpOffPtr, OpAddPtr, OpSubPtr: + return true + default: + return false + } +} |