diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/cmd/compile/internal/ssa/rewrite.go | |
parent | Initial commit. (diff) | |
download | golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/rewrite.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/rewrite.go | 2045 |
1 files changed, 2045 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go new file mode 100644 index 0000000..f4ac97c --- /dev/null +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -0,0 +1,2045 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import ( + "cmd/compile/internal/base" + "cmd/compile/internal/logopt" + "cmd/compile/internal/types" + "cmd/internal/obj" + "cmd/internal/obj/s390x" + "cmd/internal/objabi" + "cmd/internal/src" + "encoding/binary" + "fmt" + "io" + "math" + "math/bits" + "os" + "path/filepath" +) + +type deadValueChoice bool + +const ( + leaveDeadValues deadValueChoice = false + removeDeadValues = true +) + +// deadcode indicates whether rewrite should try to remove any values that become dead. +func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) { + // repeat rewrites until we find no more rewrites + pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block + pendingLines.clear() + debug := f.pass.debug + if debug > 1 { + fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name) + } + var iters int + var states map[string]bool + for { + change := false + deadChange := false + for _, b := range f.Blocks { + var b0 *Block + if debug > 1 { + b0 = new(Block) + *b0 = *b + b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing + } + for i, c := range b.ControlValues() { + for c.Op == OpCopy { + c = c.Args[0] + b.ReplaceControl(i, c) + } + } + if rb(b) { + change = true + if debug > 1 { + fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString()) + } + } + for j, v := range b.Values { + var v0 *Value + if debug > 1 { + v0 = new(Value) + *v0 = *v + v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing + } + if v.Uses == 0 && v.removeable() { + if v.Op != OpInvalid && deadcode == removeDeadValues { + // Reset any values that are now unused, so that we decrement + // the use count of all of its arguments. + // Not quite a deadcode pass, because it does not handle cycles. + // But it should help Uses==1 rules to fire. + v.reset(OpInvalid) + deadChange = true + } + // No point rewriting values which aren't used. + continue + } + + vchange := phielimValue(v) + if vchange && debug > 1 { + fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) + } + + // Eliminate copy inputs. + // If any copy input becomes unused, mark it + // as invalid and discard its argument. Repeat + // recursively on the discarded argument. + // This phase helps remove phantom "dead copy" uses + // of a value so that a x.Uses==1 rule condition + // fires reliably. + for i, a := range v.Args { + if a.Op != OpCopy { + continue + } + aa := copySource(a) + v.SetArg(i, aa) + // If a, a copy, has a line boundary indicator, attempt to find a new value + // to hold it. The first candidate is the value that will replace a (aa), + // if it shares the same block and line and is eligible. + // The second option is v, which has a as an input. Because aa is earlier in + // the data flow, it is the better choice. + if a.Pos.IsStmt() == src.PosIsStmt { + if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt { + aa.Pos = aa.Pos.WithIsStmt() + } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt { + v.Pos = v.Pos.WithIsStmt() + } else { + // Record the lost line and look for a new home after all rewrites are complete. + // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same + // line to appear in more than one block, but only one block is stored, so if both end + // up here, then one will be lost. + pendingLines.set(a.Pos, int32(a.Block.ID)) + } + a.Pos = a.Pos.WithNotStmt() + } + vchange = true + for a.Uses == 0 { + b := a.Args[0] + a.reset(OpInvalid) + a = b + } + } + if vchange && debug > 1 { + fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) + } + + // apply rewrite function + if rv(v) { + vchange = true + // If value changed to a poor choice for a statement boundary, move the boundary + if v.Pos.IsStmt() == src.PosIsStmt { + if k := nextGoodStatementIndex(v, j, b); k != j { + v.Pos = v.Pos.WithNotStmt() + b.Values[k].Pos = b.Values[k].Pos.WithIsStmt() + } + } + } + + change = change || vchange + if vchange && debug > 1 { + fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) + } + } + } + if !change && !deadChange { + break + } + iters++ + if (iters > 1000 || debug >= 2) && change { + // We've done a suspiciously large number of rewrites (or we're in debug mode). + // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer + // and the maximum value encountered during make.bash is 12. + // Start checking for cycles. (This is too expensive to do routinely.) + // Note: we avoid this path for deadChange-only iterations, to fix #51639. + if states == nil { + states = make(map[string]bool) + } + h := f.rewriteHash() + if _, ok := states[h]; ok { + // We've found a cycle. + // To diagnose it, set debug to 2 and start again, + // so that we'll print all rules applied until we complete another cycle. + // If debug is already >= 2, we've already done that, so it's time to crash. + if debug < 2 { + debug = 2 + states = make(map[string]bool) + } else { + f.Fatalf("rewrite cycle detected") + } + } + states[h] = true + } + } + // remove clobbered values + for _, b := range f.Blocks { + j := 0 + for i, v := range b.Values { + vl := v.Pos + if v.Op == OpInvalid { + if v.Pos.IsStmt() == src.PosIsStmt { + pendingLines.set(vl, int32(b.ID)) + } + f.freeValue(v) + continue + } + if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) { + pendingLines.remove(vl) + v.Pos = v.Pos.WithIsStmt() + } + if i != j { + b.Values[j] = v + } + j++ + } + if pendingLines.get(b.Pos) == int32(b.ID) { + b.Pos = b.Pos.WithIsStmt() + pendingLines.remove(b.Pos) + } + b.truncateValues(j) + } +} + +// Common functions called from rewriting rules + +func is64BitFloat(t *types.Type) bool { + return t.Size() == 8 && t.IsFloat() +} + +func is32BitFloat(t *types.Type) bool { + return t.Size() == 4 && t.IsFloat() +} + +func is64BitInt(t *types.Type) bool { + return t.Size() == 8 && t.IsInteger() +} + +func is32BitInt(t *types.Type) bool { + return t.Size() == 4 && t.IsInteger() +} + +func is16BitInt(t *types.Type) bool { + return t.Size() == 2 && t.IsInteger() +} + +func is8BitInt(t *types.Type) bool { + return t.Size() == 1 && t.IsInteger() +} + +func isPtr(t *types.Type) bool { + return t.IsPtrShaped() +} + +func isSigned(t *types.Type) bool { + return t.IsSigned() +} + +// mergeSym merges two symbolic offsets. There is no real merging of +// offsets, we just pick the non-nil one. +func mergeSym(x, y Sym) Sym { + if x == nil { + return y + } + if y == nil { + return x + } + panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y)) +} + +func canMergeSym(x, y Sym) bool { + return x == nil || y == nil +} + +// canMergeLoadClobber reports whether the load can be merged into target without +// invalidating the schedule. +// It also checks that the other non-load argument x is something we +// are ok with clobbering. +func canMergeLoadClobber(target, load, x *Value) bool { + // The register containing x is going to get clobbered. + // Don't merge if we still need the value of x. + // We don't have liveness information here, but we can + // approximate x dying with: + // 1) target is x's only use. + // 2) target is not in a deeper loop than x. + if x.Uses != 1 { + return false + } + loopnest := x.Block.Func.loopnest() + loopnest.calculateDepths() + if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) { + return false + } + return canMergeLoad(target, load) +} + +// canMergeLoad reports whether the load can be merged into target without +// invalidating the schedule. +func canMergeLoad(target, load *Value) bool { + if target.Block.ID != load.Block.ID { + // If the load is in a different block do not merge it. + return false + } + + // We can't merge the load into the target if the load + // has more than one use. + if load.Uses != 1 { + return false + } + + mem := load.MemoryArg() + + // We need the load's memory arg to still be alive at target. That + // can't be the case if one of target's args depends on a memory + // state that is a successor of load's memory arg. + // + // For example, it would be invalid to merge load into target in + // the following situation because newmem has killed oldmem + // before target is reached: + // load = read ... oldmem + // newmem = write ... oldmem + // arg0 = read ... newmem + // target = add arg0 load + // + // If the argument comes from a different block then we can exclude + // it immediately because it must dominate load (which is in the + // same block as target). + var args []*Value + for _, a := range target.Args { + if a != load && a.Block.ID == target.Block.ID { + args = append(args, a) + } + } + + // memPreds contains memory states known to be predecessors of load's + // memory state. It is lazily initialized. + var memPreds map[*Value]bool + for i := 0; len(args) > 0; i++ { + const limit = 100 + if i >= limit { + // Give up if we have done a lot of iterations. + return false + } + v := args[len(args)-1] + args = args[:len(args)-1] + if target.Block.ID != v.Block.ID { + // Since target and load are in the same block + // we can stop searching when we leave the block. + continue + } + if v.Op == OpPhi { + // A Phi implies we have reached the top of the block. + // The memory phi, if it exists, is always + // the first logical store in the block. + continue + } + if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() { + // We could handle this situation however it is likely + // to be very rare. + return false + } + if v.Op.SymEffect()&SymAddr != 0 { + // This case prevents an operation that calculates the + // address of a local variable from being forced to schedule + // before its corresponding VarDef. + // See issue 28445. + // v1 = LOAD ... + // v2 = VARDEF + // v3 = LEAQ + // v4 = CMPQ v1 v3 + // We don't want to combine the CMPQ with the load, because + // that would force the CMPQ to schedule before the VARDEF, which + // in turn requires the LEAQ to schedule before the VARDEF. + return false + } + if v.Type.IsMemory() { + if memPreds == nil { + // Initialise a map containing memory states + // known to be predecessors of load's memory + // state. + memPreds = make(map[*Value]bool) + m := mem + const limit = 50 + for i := 0; i < limit; i++ { + if m.Op == OpPhi { + // The memory phi, if it exists, is always + // the first logical store in the block. + break + } + if m.Block.ID != target.Block.ID { + break + } + if !m.Type.IsMemory() { + break + } + memPreds[m] = true + if len(m.Args) == 0 { + break + } + m = m.MemoryArg() + } + } + + // We can merge if v is a predecessor of mem. + // + // For example, we can merge load into target in the + // following scenario: + // x = read ... v + // mem = write ... v + // load = read ... mem + // target = add x load + if memPreds[v] { + continue + } + return false + } + if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem { + // If v takes mem as an input then we know mem + // is valid at this point. + continue + } + for _, a := range v.Args { + if target.Block.ID == a.Block.ID { + args = append(args, a) + } + } + } + + return true +} + +// isSameCall reports whether sym is the same as the given named symbol. +func isSameCall(sym interface{}, name string) bool { + fn := sym.(*AuxCall).Fn + return fn != nil && fn.String() == name +} + +// canLoadUnaligned reports if the architecture supports unaligned load operations. +func canLoadUnaligned(c *Config) bool { + return c.ctxt.Arch.Alignment == 1 +} + +// nlzX returns the number of leading zeros. +func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) } +func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) } +func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) } +func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) } + +// ntzX returns the number of trailing zeros. +func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) } +func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) } +func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) } +func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) } + +func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 } +func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 } +func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 } +func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 } +func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 } + +// nto returns the number of trailing ones. +func nto(x int64) int64 { + return int64(ntz64(^x)) +} + +// logX returns logarithm of n base 2. +// n must be a positive power of 2 (isPowerOfTwoX returns true). +func log8(n int8) int64 { + return int64(bits.Len8(uint8(n))) - 1 +} +func log16(n int16) int64 { + return int64(bits.Len16(uint16(n))) - 1 +} +func log32(n int32) int64 { + return int64(bits.Len32(uint32(n))) - 1 +} +func log64(n int64) int64 { + return int64(bits.Len64(uint64(n))) - 1 +} + +// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1. +// Rounds down. +func log2uint32(n int64) int64 { + return int64(bits.Len32(uint32(n))) - 1 +} + +// isPowerOfTwoX functions report whether n is a power of 2. +func isPowerOfTwo8(n int8) bool { + return n > 0 && n&(n-1) == 0 +} +func isPowerOfTwo16(n int16) bool { + return n > 0 && n&(n-1) == 0 +} +func isPowerOfTwo32(n int32) bool { + return n > 0 && n&(n-1) == 0 +} +func isPowerOfTwo64(n int64) bool { + return n > 0 && n&(n-1) == 0 +} + +// isUint64PowerOfTwo reports whether uint64(n) is a power of 2. +func isUint64PowerOfTwo(in int64) bool { + n := uint64(in) + return n > 0 && n&(n-1) == 0 +} + +// isUint32PowerOfTwo reports whether uint32(n) is a power of 2. +func isUint32PowerOfTwo(in int64) bool { + n := uint64(uint32(in)) + return n > 0 && n&(n-1) == 0 +} + +// is32Bit reports whether n can be represented as a signed 32 bit integer. +func is32Bit(n int64) bool { + return n == int64(int32(n)) +} + +// is16Bit reports whether n can be represented as a signed 16 bit integer. +func is16Bit(n int64) bool { + return n == int64(int16(n)) +} + +// is8Bit reports whether n can be represented as a signed 8 bit integer. +func is8Bit(n int64) bool { + return n == int64(int8(n)) +} + +// isU8Bit reports whether n can be represented as an unsigned 8 bit integer. +func isU8Bit(n int64) bool { + return n == int64(uint8(n)) +} + +// isU12Bit reports whether n can be represented as an unsigned 12 bit integer. +func isU12Bit(n int64) bool { + return 0 <= n && n < (1<<12) +} + +// isU16Bit reports whether n can be represented as an unsigned 16 bit integer. +func isU16Bit(n int64) bool { + return n == int64(uint16(n)) +} + +// isU32Bit reports whether n can be represented as an unsigned 32 bit integer. +func isU32Bit(n int64) bool { + return n == int64(uint32(n)) +} + +// is20Bit reports whether n can be represented as a signed 20 bit integer. +func is20Bit(n int64) bool { + return -(1<<19) <= n && n < (1<<19) +} + +// b2i translates a boolean value to 0 or 1 for assigning to auxInt. +func b2i(b bool) int64 { + if b { + return 1 + } + return 0 +} + +// b2i32 translates a boolean value to 0 or 1. +func b2i32(b bool) int32 { + if b { + return 1 + } + return 0 +} + +// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded. +// A shift is bounded if it is shifting by less than the width of the shifted value. +func shiftIsBounded(v *Value) bool { + return v.AuxInt != 0 +} + +// canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing +// generated code as much as possible. +func canonLessThan(x, y *Value) bool { + if x.Op != y.Op { + return x.Op < y.Op + } + if !x.Pos.SameFileAndLine(y.Pos) { + return x.Pos.Before(y.Pos) + } + return x.ID < y.ID +} + +// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern +// of the mantissa. It will panic if the truncation results in lost information. +func truncate64Fto32F(f float64) float32 { + if !isExactFloat32(f) { + panic("truncate64Fto32F: truncation is not exact") + } + if !math.IsNaN(f) { + return float32(f) + } + // NaN bit patterns aren't necessarily preserved across conversion + // instructions so we need to do the conversion manually. + b := math.Float64bits(f) + m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand) + // | sign | exponent | mantissa | + r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23))) + return math.Float32frombits(r) +} + +// extend32Fto64F converts a float32 value to a float64 value preserving the bit +// pattern of the mantissa. +func extend32Fto64F(f float32) float64 { + if !math.IsNaN(float64(f)) { + return float64(f) + } + // NaN bit patterns aren't necessarily preserved across conversion + // instructions so we need to do the conversion manually. + b := uint64(math.Float32bits(f)) + // | sign | exponent | mantissa | + r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23)) + return math.Float64frombits(r) +} + +// DivisionNeedsFixUp reports whether the division needs fix-up code. +func DivisionNeedsFixUp(v *Value) bool { + return v.AuxInt == 0 +} + +// auxFrom64F encodes a float64 value so it can be stored in an AuxInt. +func auxFrom64F(f float64) int64 { + if f != f { + panic("can't encode a NaN in AuxInt field") + } + return int64(math.Float64bits(f)) +} + +// auxFrom32F encodes a float32 value so it can be stored in an AuxInt. +func auxFrom32F(f float32) int64 { + if f != f { + panic("can't encode a NaN in AuxInt field") + } + return int64(math.Float64bits(extend32Fto64F(f))) +} + +// auxTo32F decodes a float32 from the AuxInt value provided. +func auxTo32F(i int64) float32 { + return truncate64Fto32F(math.Float64frombits(uint64(i))) +} + +// auxTo64F decodes a float64 from the AuxInt value provided. +func auxTo64F(i int64) float64 { + return math.Float64frombits(uint64(i)) +} + +func auxIntToBool(i int64) bool { + if i == 0 { + return false + } + return true +} +func auxIntToInt8(i int64) int8 { + return int8(i) +} +func auxIntToInt16(i int64) int16 { + return int16(i) +} +func auxIntToInt32(i int64) int32 { + return int32(i) +} +func auxIntToInt64(i int64) int64 { + return i +} +func auxIntToUint8(i int64) uint8 { + return uint8(i) +} +func auxIntToFloat32(i int64) float32 { + return float32(math.Float64frombits(uint64(i))) +} +func auxIntToFloat64(i int64) float64 { + return math.Float64frombits(uint64(i)) +} +func auxIntToValAndOff(i int64) ValAndOff { + return ValAndOff(i) +} +func auxIntToArm64BitField(i int64) arm64BitField { + return arm64BitField(i) +} +func auxIntToInt128(x int64) int128 { + if x != 0 { + panic("nonzero int128 not allowed") + } + return 0 +} +func auxIntToFlagConstant(x int64) flagConstant { + return flagConstant(x) +} + +func auxIntToOp(cc int64) Op { + return Op(cc) +} + +func boolToAuxInt(b bool) int64 { + if b { + return 1 + } + return 0 +} +func int8ToAuxInt(i int8) int64 { + return int64(i) +} +func int16ToAuxInt(i int16) int64 { + return int64(i) +} +func int32ToAuxInt(i int32) int64 { + return int64(i) +} +func int64ToAuxInt(i int64) int64 { + return int64(i) +} +func uint8ToAuxInt(i uint8) int64 { + return int64(int8(i)) +} +func float32ToAuxInt(f float32) int64 { + return int64(math.Float64bits(float64(f))) +} +func float64ToAuxInt(f float64) int64 { + return int64(math.Float64bits(f)) +} +func valAndOffToAuxInt(v ValAndOff) int64 { + return int64(v) +} +func arm64BitFieldToAuxInt(v arm64BitField) int64 { + return int64(v) +} +func int128ToAuxInt(x int128) int64 { + if x != 0 { + panic("nonzero int128 not allowed") + } + return 0 +} +func flagConstantToAuxInt(x flagConstant) int64 { + return int64(x) +} + +func opToAuxInt(o Op) int64 { + return int64(o) +} + +// Aux is an interface to hold miscellaneous data in Blocks and Values. +type Aux interface { + CanBeAnSSAAux() +} + +// stringAux wraps string values for use in Aux. +type stringAux string + +func (stringAux) CanBeAnSSAAux() {} + +func auxToString(i Aux) string { + return string(i.(stringAux)) +} +func auxToSym(i Aux) Sym { + // TODO: kind of a hack - allows nil interface through + s, _ := i.(Sym) + return s +} +func auxToType(i Aux) *types.Type { + return i.(*types.Type) +} +func auxToCall(i Aux) *AuxCall { + return i.(*AuxCall) +} +func auxToS390xCCMask(i Aux) s390x.CCMask { + return i.(s390x.CCMask) +} +func auxToS390xRotateParams(i Aux) s390x.RotateParams { + return i.(s390x.RotateParams) +} + +func StringToAux(s string) Aux { + return stringAux(s) +} +func symToAux(s Sym) Aux { + return s +} +func callToAux(s *AuxCall) Aux { + return s +} +func typeToAux(t *types.Type) Aux { + return t +} +func s390xCCMaskToAux(c s390x.CCMask) Aux { + return c +} +func s390xRotateParamsToAux(r s390x.RotateParams) Aux { + return r +} + +// uaddOvf reports whether unsigned a+b would overflow. +func uaddOvf(a, b int64) bool { + return uint64(a)+uint64(b) < uint64(a) +} + +// loadLSymOffset simulates reading a word at an offset into a +// read-only symbol's runtime memory. If it would read a pointer to +// another symbol, that symbol is returned. Otherwise, it returns nil. +func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym { + if lsym.Type != objabi.SRODATA { + return nil + } + + for _, r := range lsym.R { + if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 { + return r.Sym + } + } + + return nil +} + +// de-virtualize an InterLECall +// 'sym' is the symbol for the itab. +func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym { + n, ok := sym.(*obj.LSym) + if !ok { + return nil + } + + lsym := loadLSymOffset(n, offset) + if f := v.Block.Func; f.pass.debug > 0 { + if lsym != nil { + f.Warnl(v.Pos, "de-virtualizing call") + } else { + f.Warnl(v.Pos, "couldn't de-virtualize call") + } + } + return lsym +} + +func devirtLECall(v *Value, sym *obj.LSym) *Value { + v.Op = OpStaticLECall + auxcall := v.Aux.(*AuxCall) + auxcall.Fn = sym + // Remove first arg + v.Args[0].Uses-- + copy(v.Args[0:], v.Args[1:]) + v.Args[len(v.Args)-1] = nil // aid GC + v.Args = v.Args[:len(v.Args)-1] + return v +} + +// isSamePtr reports whether p1 and p2 point to the same address. +func isSamePtr(p1, p2 *Value) bool { + if p1 == p2 { + return true + } + if p1.Op != p2.Op { + return false + } + switch p1.Op { + case OpOffPtr: + return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0]) + case OpAddr, OpLocalAddr: + // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op. + // Checking for value equality only works after [z]cse has run. + return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op + case OpAddPtr: + return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0]) + } + return false +} + +func isStackPtr(v *Value) bool { + for v.Op == OpOffPtr || v.Op == OpAddPtr { + v = v.Args[0] + } + return v.Op == OpSP || v.Op == OpLocalAddr +} + +// disjoint reports whether the memory region specified by [p1:p1+n1) +// does not overlap with [p2:p2+n2). +// A return value of false does not imply the regions overlap. +func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool { + if n1 == 0 || n2 == 0 { + return true + } + if p1 == p2 { + return false + } + baseAndOffset := func(ptr *Value) (base *Value, offset int64) { + base, offset = ptr, 0 + for base.Op == OpOffPtr { + offset += base.AuxInt + base = base.Args[0] + } + return base, offset + } + p1, off1 := baseAndOffset(p1) + p2, off2 := baseAndOffset(p2) + if isSamePtr(p1, p2) { + return !overlap(off1, n1, off2, n2) + } + // p1 and p2 are not the same, so if they are both OpAddrs then + // they point to different variables. + // If one pointer is on the stack and the other is an argument + // then they can't overlap. + switch p1.Op { + case OpAddr, OpLocalAddr: + if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP { + return true + } + return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP + case OpArg, OpArgIntReg: + if p2.Op == OpSP || p2.Op == OpLocalAddr { + return true + } + case OpSP: + return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP + } + return false +} + +// moveSize returns the number of bytes an aligned MOV instruction moves. +func moveSize(align int64, c *Config) int64 { + switch { + case align%8 == 0 && c.PtrSize == 8: + return 8 + case align%4 == 0: + return 4 + case align%2 == 0: + return 2 + } + return 1 +} + +// mergePoint finds a block among a's blocks which dominates b and is itself +// dominated by all of a's blocks. Returns nil if it can't find one. +// Might return nil even if one does exist. +func mergePoint(b *Block, a ...*Value) *Block { + // Walk backward from b looking for one of the a's blocks. + + // Max distance + d := 100 + + for d > 0 { + for _, x := range a { + if b == x.Block { + goto found + } + } + if len(b.Preds) > 1 { + // Don't know which way to go back. Abort. + return nil + } + b = b.Preds[0].b + d-- + } + return nil // too far away +found: + // At this point, r is the first value in a that we find by walking backwards. + // if we return anything, r will be it. + r := b + + // Keep going, counting the other a's that we find. They must all dominate r. + na := 0 + for d > 0 { + for _, x := range a { + if b == x.Block { + na++ + } + } + if na == len(a) { + // Found all of a in a backwards walk. We can return r. + return r + } + if len(b.Preds) > 1 { + return nil + } + b = b.Preds[0].b + d-- + + } + return nil // too far away +} + +// clobber invalidates values. Returns true. +// clobber is used by rewrite rules to: +// +// A) make sure the values are really dead and never used again. +// B) decrement use counts of the values' args. +func clobber(vv ...*Value) bool { + for _, v := range vv { + v.reset(OpInvalid) + // Note: leave v.Block intact. The Block field is used after clobber. + } + return true +} + +// clobberIfDead resets v when use count is 1. Returns true. +// clobberIfDead is used by rewrite rules to decrement +// use counts of v's args when v is dead and never used. +func clobberIfDead(v *Value) bool { + if v.Uses == 1 { + v.reset(OpInvalid) + } + // Note: leave v.Block intact. The Block field is used after clobberIfDead. + return true +} + +// noteRule is an easy way to track if a rule is matched when writing +// new ones. Make the rule of interest also conditional on +// +// noteRule("note to self: rule of interest matched") +// +// and that message will print when the rule matches. +func noteRule(s string) bool { + fmt.Println(s) + return true +} + +// countRule increments Func.ruleMatches[key]. +// If Func.ruleMatches is non-nil at the end +// of compilation, it will be printed to stdout. +// This is intended to make it easier to find which functions +// which contain lots of rules matches when developing new rules. +func countRule(v *Value, key string) bool { + f := v.Block.Func + if f.ruleMatches == nil { + f.ruleMatches = make(map[string]int) + } + f.ruleMatches[key]++ + return true +} + +// warnRule generates compiler debug output with string s when +// v is not in autogenerated code, cond is true and the rule has fired. +func warnRule(cond bool, v *Value, s string) bool { + if pos := v.Pos; pos.Line() > 1 && cond { + v.Block.Func.Warnl(pos, s) + } + return true +} + +// for a pseudo-op like (LessThan x), extract x. +func flagArg(v *Value) *Value { + if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() { + return nil + } + return v.Args[0] +} + +// arm64Negate finds the complement to an ARM64 condition code, +// for example !Equal -> NotEqual or !LessThan -> GreaterEqual +// +// For floating point, it's more subtle because NaN is unordered. We do +// !LessThanF -> NotLessThanF, the latter takes care of NaNs. +func arm64Negate(op Op) Op { + switch op { + case OpARM64LessThan: + return OpARM64GreaterEqual + case OpARM64LessThanU: + return OpARM64GreaterEqualU + case OpARM64GreaterThan: + return OpARM64LessEqual + case OpARM64GreaterThanU: + return OpARM64LessEqualU + case OpARM64LessEqual: + return OpARM64GreaterThan + case OpARM64LessEqualU: + return OpARM64GreaterThanU + case OpARM64GreaterEqual: + return OpARM64LessThan + case OpARM64GreaterEqualU: + return OpARM64LessThanU + case OpARM64Equal: + return OpARM64NotEqual + case OpARM64NotEqual: + return OpARM64Equal + case OpARM64LessThanF: + return OpARM64NotLessThanF + case OpARM64NotLessThanF: + return OpARM64LessThanF + case OpARM64LessEqualF: + return OpARM64NotLessEqualF + case OpARM64NotLessEqualF: + return OpARM64LessEqualF + case OpARM64GreaterThanF: + return OpARM64NotGreaterThanF + case OpARM64NotGreaterThanF: + return OpARM64GreaterThanF + case OpARM64GreaterEqualF: + return OpARM64NotGreaterEqualF + case OpARM64NotGreaterEqualF: + return OpARM64GreaterEqualF + default: + panic("unreachable") + } +} + +// arm64Invert evaluates (InvertFlags op), which +// is the same as altering the condition codes such +// that the same result would be produced if the arguments +// to the flag-generating instruction were reversed, e.g. +// (InvertFlags (CMP x y)) -> (CMP y x) +func arm64Invert(op Op) Op { + switch op { + case OpARM64LessThan: + return OpARM64GreaterThan + case OpARM64LessThanU: + return OpARM64GreaterThanU + case OpARM64GreaterThan: + return OpARM64LessThan + case OpARM64GreaterThanU: + return OpARM64LessThanU + case OpARM64LessEqual: + return OpARM64GreaterEqual + case OpARM64LessEqualU: + return OpARM64GreaterEqualU + case OpARM64GreaterEqual: + return OpARM64LessEqual + case OpARM64GreaterEqualU: + return OpARM64LessEqualU + case OpARM64Equal, OpARM64NotEqual: + return op + case OpARM64LessThanF: + return OpARM64GreaterThanF + case OpARM64GreaterThanF: + return OpARM64LessThanF + case OpARM64LessEqualF: + return OpARM64GreaterEqualF + case OpARM64GreaterEqualF: + return OpARM64LessEqualF + case OpARM64NotLessThanF: + return OpARM64NotGreaterThanF + case OpARM64NotGreaterThanF: + return OpARM64NotLessThanF + case OpARM64NotLessEqualF: + return OpARM64NotGreaterEqualF + case OpARM64NotGreaterEqualF: + return OpARM64NotLessEqualF + default: + panic("unreachable") + } +} + +// evaluate an ARM64 op against a flags value +// that is potentially constant; return 1 for true, +// -1 for false, and 0 for not constant. +func ccARM64Eval(op Op, flags *Value) int { + fop := flags.Op + if fop == OpARM64InvertFlags { + return -ccARM64Eval(op, flags.Args[0]) + } + if fop != OpARM64FlagConstant { + return 0 + } + fc := flagConstant(flags.AuxInt) + b2i := func(b bool) int { + if b { + return 1 + } + return -1 + } + switch op { + case OpARM64Equal: + return b2i(fc.eq()) + case OpARM64NotEqual: + return b2i(fc.ne()) + case OpARM64LessThan: + return b2i(fc.lt()) + case OpARM64LessThanU: + return b2i(fc.ult()) + case OpARM64GreaterThan: + return b2i(fc.gt()) + case OpARM64GreaterThanU: + return b2i(fc.ugt()) + case OpARM64LessEqual: + return b2i(fc.le()) + case OpARM64LessEqualU: + return b2i(fc.ule()) + case OpARM64GreaterEqual: + return b2i(fc.ge()) + case OpARM64GreaterEqualU: + return b2i(fc.uge()) + } + return 0 +} + +// logRule logs the use of the rule s. This will only be enabled if +// rewrite rules were generated with the -log option, see _gen/rulegen.go. +func logRule(s string) { + if ruleFile == nil { + // Open a log file to write log to. We open in append + // mode because all.bash runs the compiler lots of times, + // and we want the concatenation of all of those logs. + // This means, of course, that users need to rm the old log + // to get fresh data. + // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow? + w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"), + os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + if err != nil { + panic(err) + } + ruleFile = w + } + _, err := fmt.Fprintln(ruleFile, s) + if err != nil { + panic(err) + } +} + +var ruleFile io.Writer + +func min(x, y int64) int64 { + if x < y { + return x + } + return y +} + +func isConstZero(v *Value) bool { + switch v.Op { + case OpConstNil: + return true + case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F: + return v.AuxInt == 0 + } + return false +} + +// reciprocalExact64 reports whether 1/c is exactly representable. +func reciprocalExact64(c float64) bool { + b := math.Float64bits(c) + man := b & (1<<52 - 1) + if man != 0 { + return false // not a power of 2, denormal, or NaN + } + exp := b >> 52 & (1<<11 - 1) + // exponent bias is 0x3ff. So taking the reciprocal of a number + // changes the exponent to 0x7fe-exp. + switch exp { + case 0: + return false // ±0 + case 0x7ff: + return false // ±inf + case 0x7fe: + return false // exponent is not representable + default: + return true + } +} + +// reciprocalExact32 reports whether 1/c is exactly representable. +func reciprocalExact32(c float32) bool { + b := math.Float32bits(c) + man := b & (1<<23 - 1) + if man != 0 { + return false // not a power of 2, denormal, or NaN + } + exp := b >> 23 & (1<<8 - 1) + // exponent bias is 0x7f. So taking the reciprocal of a number + // changes the exponent to 0xfe-exp. + switch exp { + case 0: + return false // ±0 + case 0xff: + return false // ±inf + case 0xfe: + return false // exponent is not representable + default: + return true + } +} + +// check if an immediate can be directly encoded into an ARM's instruction. +func isARMImmRot(v uint32) bool { + for i := 0; i < 16; i++ { + if v&^0xff == 0 { + return true + } + v = v<<2 | v>>30 + } + + return false +} + +// overlap reports whether the ranges given by the given offset and +// size pairs overlap. +func overlap(offset1, size1, offset2, size2 int64) bool { + if offset1 >= offset2 && offset2+size2 > offset1 { + return true + } + if offset2 >= offset1 && offset1+size1 > offset2 { + return true + } + return false +} + +func areAdjacentOffsets(off1, off2, size int64) bool { + return off1+size == off2 || off1 == off2+size +} + +// check if value zeroes out upper 32-bit of 64-bit register. +// depth limits recursion depth. In AMD64.rules 3 is used as limit, +// because it catches same amount of cases as 4. +func zeroUpper32Bits(x *Value, depth int) bool { + switch x.Op { + case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1, + OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1, + OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload, + OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL, + OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst, + OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst, + OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL, + OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst, + OpAMD64SHLL, OpAMD64SHLLconst: + return true + case OpArg: + return x.Type.Size() == 4 + case OpPhi, OpSelect0, OpSelect1: + // Phis can use each-other as an arguments, instead of tracking visited values, + // just limit recursion depth. + if depth <= 0 { + return false + } + for i := range x.Args { + if !zeroUpper32Bits(x.Args[i], depth-1) { + return false + } + } + return true + + } + return false +} + +// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits. +func zeroUpper48Bits(x *Value, depth int) bool { + switch x.Op { + case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2: + return true + case OpArg: + return x.Type.Size() == 2 + case OpPhi, OpSelect0, OpSelect1: + // Phis can use each-other as an arguments, instead of tracking visited values, + // just limit recursion depth. + if depth <= 0 { + return false + } + for i := range x.Args { + if !zeroUpper48Bits(x.Args[i], depth-1) { + return false + } + } + return true + + } + return false +} + +// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits. +func zeroUpper56Bits(x *Value, depth int) bool { + switch x.Op { + case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1: + return true + case OpArg: + return x.Type.Size() == 1 + case OpPhi, OpSelect0, OpSelect1: + // Phis can use each-other as an arguments, instead of tracking visited values, + // just limit recursion depth. + if depth <= 0 { + return false + } + for i := range x.Args { + if !zeroUpper56Bits(x.Args[i], depth-1) { + return false + } + } + return true + + } + return false +} + +// isInlinableMemmove reports whether the given arch performs a Move of the given size +// faster than memmove. It will only return true if replacing the memmove with a Move is +// safe, either because Move will do all of its loads before any of its stores, or +// because the arguments are known to be disjoint. +// This is used as a check for replacing memmove with Move ops. +func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { + // It is always safe to convert memmove into Move when its arguments are disjoint. + // Move ops may or may not be faster for large sizes depending on how the platform + // lowers them, so we only perform this optimization on platforms that we know to + // have fast Move ops. + switch c.arch { + case "amd64": + return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz)) + case "386", "arm64": + return sz <= 8 + case "s390x", "ppc64", "ppc64le": + return sz <= 8 || disjoint(dst, sz, src, sz) + case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le": + return sz <= 4 + } + return false +} +func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { + return isInlinableMemmove(dst, src, sz, c) +} + +// logLargeCopy logs the occurrence of a large copy. +// The best place to do this is in the rewrite rules where the size of the move is easy to find. +// "Large" is arbitrarily chosen to be 128 bytes; this may change. +func logLargeCopy(v *Value, s int64) bool { + if s < 128 { + return true + } + if logopt.Enabled() { + logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s)) + } + return true +} +func LogLargeCopy(funcName string, pos src.XPos, s int64) { + if s < 128 { + return + } + if logopt.Enabled() { + logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s)) + } +} + +// hasSmallRotate reports whether the architecture has rotate instructions +// for sizes < 32-bit. This is used to decide whether to promote some rotations. +func hasSmallRotate(c *Config) bool { + switch c.arch { + case "amd64", "386": + return true + default: + return false + } +} + +func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 { + if sh < 0 || sh >= sz { + panic("PPC64 shift arg sh out of range") + } + if mb < 0 || mb >= sz { + panic("PPC64 shift arg mb out of range") + } + if me < 0 || me >= sz { + panic("PPC64 shift arg me out of range") + } + return int32(sh<<16 | mb<<8 | me) +} + +func GetPPC64Shiftsh(auxint int64) int64 { + return int64(int8(auxint >> 16)) +} + +func GetPPC64Shiftmb(auxint int64) int64 { + return int64(int8(auxint >> 8)) +} + +func GetPPC64Shiftme(auxint int64) int64 { + return int64(int8(auxint)) +} + +// Test if this value can encoded as a mask for a rlwinm like +// operation. Masks can also extend from the msb and wrap to +// the lsb too. That is, the valid masks are 32 bit strings +// of the form: 0..01..10..0 or 1..10..01..1 or 1...1 +func isPPC64WordRotateMask(v64 int64) bool { + // Isolate rightmost 1 (if none 0) and add. + v := uint32(v64) + vp := (v & -v) + v + // Likewise, for the wrapping case. + vn := ^v + vpn := (vn & -vn) + vn + return (v&vp == 0 || vn&vpn == 0) && v != 0 +} + +// Compress mask and shift into single value of the form +// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can +// be used to regenerate the input mask. +func encodePPC64RotateMask(rotate, mask, nbits int64) int64 { + var mb, me, mbn, men int + + // Determine boundaries and then decode them + if mask == 0 || ^mask == 0 || rotate >= nbits { + panic("Invalid PPC64 rotate mask") + } else if nbits == 32 { + mb = bits.LeadingZeros32(uint32(mask)) + me = 32 - bits.TrailingZeros32(uint32(mask)) + mbn = bits.LeadingZeros32(^uint32(mask)) + men = 32 - bits.TrailingZeros32(^uint32(mask)) + } else { + mb = bits.LeadingZeros64(uint64(mask)) + me = 64 - bits.TrailingZeros64(uint64(mask)) + mbn = bits.LeadingZeros64(^uint64(mask)) + men = 64 - bits.TrailingZeros64(^uint64(mask)) + } + // Check for a wrapping mask (e.g bits at 0 and 63) + if mb == 0 && me == int(nbits) { + // swap the inverted values + mb, me = men, mbn + } + + return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24) +} + +// DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as +// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask. +func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) { + auxint := uint64(sauxint) + rotate = int64((auxint >> 16) & 0xFF) + mb = int64((auxint >> 8) & 0xFF) + me = int64((auxint >> 0) & 0xFF) + nbits := int64((auxint >> 24) & 0xFF) + mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1) + if mb > me { + mask = ^mask + } + if nbits == 32 { + mask = uint64(uint32(mask)) + } + + // Fixup ME to match ISA definition. The second argument to MASK(..,me) + // is inclusive. + me = (me - 1) & (nbits - 1) + return +} + +// This verifies that the mask is a set of +// consecutive bits including the least +// significant bit. +func isPPC64ValidShiftMask(v int64) bool { + if (v != 0) && ((v+1)&v) == 0 { + return true + } + return false +} + +func getPPC64ShiftMaskLength(v int64) int64 { + return int64(bits.Len64(uint64(v))) +} + +// Decompose a shift right into an equivalent rotate/mask, +// and return mask & m. +func mergePPC64RShiftMask(m, s, nbits int64) int64 { + smask := uint64((1<<uint(nbits))-1) >> uint(s) + return m & int64(smask) +} + +// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0 +func mergePPC64AndSrwi(m, s int64) int64 { + mask := mergePPC64RShiftMask(m, s, 32) + if !isPPC64WordRotateMask(mask) { + return 0 + } + return encodePPC64RotateMask((32-s)&31, mask, 32) +} + +// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM. +// Return the encoded RLWINM constant, or 0 if they cannot be merged. +func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { + mask_1 := uint64(0xFFFFFFFF >> uint(srw)) + // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. + mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) + + // Rewrite mask to apply after the final left shift. + mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) + + r_1 := 32 - srw + r_2 := GetPPC64Shiftsh(sld) + r_3 := (r_1 + r_2) & 31 // This can wrap. + + if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { + return 0 + } + return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) +} + +// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return +// the encoded RLWINM constant, or 0 if they cannot be merged. +func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { + r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw) + // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. + mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) + + // combine the masks, and adjust for the final left shift. + mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld))) + r_2 := GetPPC64Shiftsh(int64(sld)) + r_3 := (r_1 + r_2) & 31 // This can wrap. + + // Verify the result is still a valid bitmask of <= 32 bits. + if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 { + return 0 + } + return encodePPC64RotateMask(r_3, int64(mask_3), 32) +} + +// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)), +// or return 0 if they cannot be combined. +func mergePPC64SldiSrw(sld, srw int64) int64 { + if sld > srw || srw >= 32 { + return 0 + } + mask_r := uint32(0xFFFFFFFF) >> uint(srw) + mask_l := uint32(0xFFFFFFFF) >> uint(sld) + mask := (mask_r & mask_l) << uint(sld) + return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32) +} + +// Convenience function to rotate a 32 bit constant value by another constant. +func rotateLeft32(v, rotate int64) int64 { + return int64(bits.RotateLeft32(uint32(v), int(rotate))) +} + +func rotateRight64(v, rotate int64) int64 { + return int64(bits.RotateLeft64(uint64(v), int(-rotate))) +} + +// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. +func armBFAuxInt(lsb, width int64) arm64BitField { + if lsb < 0 || lsb > 63 { + panic("ARM(64) bit field lsb constant out of range") + } + if width < 1 || lsb+width > 64 { + panic("ARM(64) bit field width constant out of range") + } + return arm64BitField(width | lsb<<8) +} + +// returns the lsb part of the auxInt field of arm64 bitfield ops. +func (bfc arm64BitField) getARM64BFlsb() int64 { + return int64(uint64(bfc) >> 8) +} + +// returns the width part of the auxInt field of arm64 bitfield ops. +func (bfc arm64BitField) getARM64BFwidth() int64 { + return int64(bfc) & 0xff +} + +// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask. +func isARM64BFMask(lsb, mask, rshift int64) bool { + shiftedMask := int64(uint64(mask) >> uint64(rshift)) + return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64 +} + +// returns the bitfield width of mask >> rshift for arm64 bitfield ops. +func arm64BFWidth(mask, rshift int64) int64 { + shiftedMask := int64(uint64(mask) >> uint64(rshift)) + if shiftedMask == 0 { + panic("ARM64 BF mask is zero") + } + return nto(shiftedMask) +} + +// sizeof returns the size of t in bytes. +// It will panic if t is not a *types.Type. +func sizeof(t interface{}) int64 { + return t.(*types.Type).Size() +} + +// registerizable reports whether t is a primitive type that fits in +// a register. It assumes float64 values will always fit into registers +// even if that isn't strictly true. +func registerizable(b *Block, typ *types.Type) bool { + if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() { + return true + } + if typ.IsInteger() { + return typ.Size() <= b.Func.Config.RegSize + } + return false +} + +// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed. +func needRaceCleanup(sym *AuxCall, v *Value) bool { + f := v.Block.Func + if !f.Config.Race { + return false + } + if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") { + return false + } + for _, b := range f.Blocks { + for _, v := range b.Values { + switch v.Op { + case OpStaticCall, OpStaticLECall: + // Check for racefuncenter will encounter racefuncexit and vice versa. + // Allow calls to panic* + s := v.Aux.(*AuxCall).Fn.String() + switch s { + case "runtime.racefuncenter", "runtime.racefuncexit", + "runtime.panicdivide", "runtime.panicwrap", + "runtime.panicshift": + continue + } + // If we encountered any call, we need to keep racefunc*, + // for accurate stacktraces. + return false + case OpPanicBounds, OpPanicExtend: + // Note: these are panic generators that are ok (like the static calls above). + case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall: + // We must keep the race functions if there are any other call types. + return false + } + } + } + if isSameCall(sym, "runtime.racefuncenter") { + // TODO REGISTER ABI this needs to be cleaned up. + // If we're removing racefuncenter, remove its argument as well. + if v.Args[0].Op != OpStore { + if v.Op == OpStaticLECall { + // there is no store, yet. + return true + } + return false + } + mem := v.Args[0].Args[2] + v.Args[0].reset(OpCopy) + v.Args[0].AddArg(mem) + } + return true +} + +// symIsRO reports whether sym is a read-only global. +func symIsRO(sym interface{}) bool { + lsym := sym.(*obj.LSym) + return lsym.Type == objabi.SRODATA && len(lsym.R) == 0 +} + +// symIsROZero reports whether sym is a read-only global whose data contains all zeros. +func symIsROZero(sym Sym) bool { + lsym := sym.(*obj.LSym) + if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 { + return false + } + for _, b := range lsym.P { + if b != 0 { + return false + } + } + return true +} + +// read8 reads one byte from the read-only global sym at offset off. +func read8(sym interface{}, off int64) uint8 { + lsym := sym.(*obj.LSym) + if off >= int64(len(lsym.P)) || off < 0 { + // Invalid index into the global sym. + // This can happen in dead code, so we don't want to panic. + // Just return any value, it will eventually get ignored. + // See issue 29215. + return 0 + } + return lsym.P[off] +} + +// read16 reads two bytes from the read-only global sym at offset off. +func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 { + lsym := sym.(*obj.LSym) + // lsym.P is written lazily. + // Bytes requested after the end of lsym.P are 0. + var src []byte + if 0 <= off && off < int64(len(lsym.P)) { + src = lsym.P[off:] + } + buf := make([]byte, 2) + copy(buf, src) + return byteorder.Uint16(buf) +} + +// read32 reads four bytes from the read-only global sym at offset off. +func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 { + lsym := sym.(*obj.LSym) + var src []byte + if 0 <= off && off < int64(len(lsym.P)) { + src = lsym.P[off:] + } + buf := make([]byte, 4) + copy(buf, src) + return byteorder.Uint32(buf) +} + +// read64 reads eight bytes from the read-only global sym at offset off. +func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 { + lsym := sym.(*obj.LSym) + var src []byte + if 0 <= off && off < int64(len(lsym.P)) { + src = lsym.P[off:] + } + buf := make([]byte, 8) + copy(buf, src) + return byteorder.Uint64(buf) +} + +// sequentialAddresses reports true if it can prove that x + n == y +func sequentialAddresses(x, y *Value, n int64) bool { + if x == y && n == 0 { + return true + } + if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { + return true + } + if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { + return true + } + if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { + return true + } + if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { + return true + } + return false +} + +// flagConstant represents the result of a compile-time comparison. +// The sense of these flags does not necessarily represent the hardware's notion +// of a flags register - these are just a compile-time construct. +// We happen to match the semantics to those of arm/arm64. +// Note that these semantics differ from x86: the carry flag has the opposite +// sense on a subtraction! +// +// On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C. +// On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C. +// (because it does x + ^y + C). +// +// See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag +type flagConstant uint8 + +// N reports whether the result of an operation is negative (high bit set). +func (fc flagConstant) N() bool { + return fc&1 != 0 +} + +// Z reports whether the result of an operation is 0. +func (fc flagConstant) Z() bool { + return fc&2 != 0 +} + +// C reports whether an unsigned add overflowed (carry), or an +// unsigned subtract did not underflow (borrow). +func (fc flagConstant) C() bool { + return fc&4 != 0 +} + +// V reports whether a signed operation overflowed or underflowed. +func (fc flagConstant) V() bool { + return fc&8 != 0 +} + +func (fc flagConstant) eq() bool { + return fc.Z() +} +func (fc flagConstant) ne() bool { + return !fc.Z() +} +func (fc flagConstant) lt() bool { + return fc.N() != fc.V() +} +func (fc flagConstant) le() bool { + return fc.Z() || fc.lt() +} +func (fc flagConstant) gt() bool { + return !fc.Z() && fc.ge() +} +func (fc flagConstant) ge() bool { + return fc.N() == fc.V() +} +func (fc flagConstant) ult() bool { + return !fc.C() +} +func (fc flagConstant) ule() bool { + return fc.Z() || fc.ult() +} +func (fc flagConstant) ugt() bool { + return !fc.Z() && fc.uge() +} +func (fc flagConstant) uge() bool { + return fc.C() +} + +func (fc flagConstant) ltNoov() bool { + return fc.lt() && !fc.V() +} +func (fc flagConstant) leNoov() bool { + return fc.le() && !fc.V() +} +func (fc flagConstant) gtNoov() bool { + return fc.gt() && !fc.V() +} +func (fc flagConstant) geNoov() bool { + return fc.ge() && !fc.V() +} + +func (fc flagConstant) String() string { + return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V()) +} + +type flagConstantBuilder struct { + N bool + Z bool + C bool + V bool +} + +func (fcs flagConstantBuilder) encode() flagConstant { + var fc flagConstant + if fcs.N { + fc |= 1 + } + if fcs.Z { + fc |= 2 + } + if fcs.C { + fc |= 4 + } + if fcs.V { + fc |= 8 + } + return fc +} + +// Note: addFlags(x,y) != subFlags(x,-y) in some situations: +// - the results of the C flag are different +// - the results of the V flag when y==minint are different + +// addFlags64 returns the flags that would be set from computing x+y. +func addFlags64(x, y int64) flagConstant { + var fcb flagConstantBuilder + fcb.Z = x+y == 0 + fcb.N = x+y < 0 + fcb.C = uint64(x+y) < uint64(x) + fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 + return fcb.encode() +} + +// subFlags64 returns the flags that would be set from computing x-y. +func subFlags64(x, y int64) flagConstant { + var fcb flagConstantBuilder + fcb.Z = x-y == 0 + fcb.N = x-y < 0 + fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model. + fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 + return fcb.encode() +} + +// addFlags32 returns the flags that would be set from computing x+y. +func addFlags32(x, y int32) flagConstant { + var fcb flagConstantBuilder + fcb.Z = x+y == 0 + fcb.N = x+y < 0 + fcb.C = uint32(x+y) < uint32(x) + fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 + return fcb.encode() +} + +// subFlags32 returns the flags that would be set from computing x-y. +func subFlags32(x, y int32) flagConstant { + var fcb flagConstantBuilder + fcb.Z = x-y == 0 + fcb.N = x-y < 0 + fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model. + fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 + return fcb.encode() +} + +// logicFlags64 returns flags set to the sign/zeroness of x. +// C and V are set to false. +func logicFlags64(x int64) flagConstant { + var fcb flagConstantBuilder + fcb.Z = x == 0 + fcb.N = x < 0 + return fcb.encode() +} + +// logicFlags32 returns flags set to the sign/zeroness of x. +// C and V are set to false. +func logicFlags32(x int32) flagConstant { + var fcb flagConstantBuilder + fcb.Z = x == 0 + fcb.N = x < 0 + return fcb.encode() +} + +func makeJumpTableSym(b *Block) *obj.LSym { + s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID)) + s.Set(obj.AttrDuplicateOK, true) + s.Set(obj.AttrLocal, true) + return s +} + +// canRotate reports whether the architecture supports +// rotates of integer registers with the given number of bits. +func canRotate(c *Config, bits int64) bool { + if bits > c.PtrSize*8 { + // Don't rewrite to rotates bigger than the machine word. + return false + } + switch c.arch { + case "386", "amd64", "arm64": + return true + case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64": + return bits >= 32 + default: + return false + } +} + +// isARM64bitcon reports whether a constant can be encoded into a logical instruction. +func isARM64bitcon(x uint64) bool { + if x == 1<<64-1 || x == 0 { + return false + } + // determine the period and sign-extend a unit to 64 bits + switch { + case x != x>>32|x<<32: + // period is 64 + // nothing to do + case x != x>>16|x<<48: + // period is 32 + x = uint64(int64(int32(x))) + case x != x>>8|x<<56: + // period is 16 + x = uint64(int64(int16(x))) + case x != x>>4|x<<60: + // period is 8 + x = uint64(int64(int8(x))) + default: + // period is 4 or 2, always true + // 0001, 0010, 0100, 1000 -- 0001 rotate + // 0011, 0110, 1100, 1001 -- 0011 rotate + // 0111, 1011, 1101, 1110 -- 0111 rotate + // 0101, 1010 -- 01 rotate, repeat + return true + } + return sequenceOfOnes(x) || sequenceOfOnes(^x) +} + +// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros. +func sequenceOfOnes(x uint64) bool { + y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2 + y += x + return (y-1)&y == 0 +} + +// isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction. +func isARM64addcon(v int64) bool { + /* uimm12 or uimm24? */ + if v < 0 { + return false + } + if (v & 0xFFF) == 0 { + v >>= 12 + } + return v <= 0xFFF +} |