diff options
Diffstat (limited to 'src/cmd/compile/internal/ppc64/ssa.go')
-rw-r--r-- | src/cmd/compile/internal/ppc64/ssa.go | 1967 |
1 files changed, 1967 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go new file mode 100644 index 0000000..3e20c44 --- /dev/null +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -0,0 +1,1967 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ppc64 + +import ( + "cmd/compile/internal/gc" + "cmd/compile/internal/logopt" + "cmd/compile/internal/ssa" + "cmd/compile/internal/types" + "cmd/internal/obj" + "cmd/internal/obj/ppc64" + "cmd/internal/objabi" + "math" + "strings" +) + +// markMoves marks any MOVXconst ops that need to avoid clobbering flags. +func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { + // flive := b.FlagsLiveAtEnd + // if b.Control != nil && b.Control.Type.IsFlags() { + // flive = true + // } + // for i := len(b.Values) - 1; i >= 0; i-- { + // v := b.Values[i] + // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) { + // // The "mark" is any non-nil Aux value. + // v.Aux = v + // } + // if v.Type.IsFlags() { + // flive = false + // } + // for _, a := range v.Args { + // if a.Type.IsFlags() { + // flive = true + // } + // } + // } +} + +// loadByType returns the load instruction of the given type. +func loadByType(t *types.Type) obj.As { + if t.IsFloat() { + switch t.Size() { + case 4: + return ppc64.AFMOVS + case 8: + return ppc64.AFMOVD + } + } else { + switch t.Size() { + case 1: + if t.IsSigned() { + return ppc64.AMOVB + } else { + return ppc64.AMOVBZ + } + case 2: + if t.IsSigned() { + return ppc64.AMOVH + } else { + return ppc64.AMOVHZ + } + case 4: + if t.IsSigned() { + return ppc64.AMOVW + } else { + return ppc64.AMOVWZ + } + case 8: + return ppc64.AMOVD + } + } + panic("bad load type") +} + +// storeByType returns the store instruction of the given type. +func storeByType(t *types.Type) obj.As { + if t.IsFloat() { + switch t.Size() { + case 4: + return ppc64.AFMOVS + case 8: + return ppc64.AFMOVD + } + } else { + switch t.Size() { + case 1: + return ppc64.AMOVB + case 2: + return ppc64.AMOVH + case 4: + return ppc64.AMOVW + case 8: + return ppc64.AMOVD + } + } + panic("bad store type") +} + +func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { + switch v.Op { + case ssa.OpCopy: + t := v.Type + if t.IsMemory() { + return + } + x := v.Args[0].Reg() + y := v.Reg() + if x != y { + rt := obj.TYPE_REG + op := ppc64.AMOVD + + if t.IsFloat() { + op = ppc64.AFMOVD + } + p := s.Prog(op) + p.From.Type = rt + p.From.Reg = x + p.To.Type = rt + p.To.Reg = y + } + + case ssa.OpPPC64LoweredMuluhilo: + // MULHDU Rarg1, Rarg0, Reg0 + // MULLD Rarg1, Rarg0, Reg1 + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + p := s.Prog(ppc64.AMULHDU) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.Reg = r0 + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg0() + p1 := s.Prog(ppc64.AMULLD) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = r1 + p1.Reg = r0 + p1.To.Type = obj.TYPE_REG + p1.To.Reg = v.Reg1() + + case ssa.OpPPC64LoweredAdd64Carry: + // ADDC Rarg2, -1, Rtmp + // ADDE Rarg1, Rarg0, Reg0 + // ADDZE Rzero, Reg1 + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + r2 := v.Args[2].Reg() + p := s.Prog(ppc64.AADDC) + p.From.Type = obj.TYPE_CONST + p.From.Offset = -1 + p.Reg = r2 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + p1 := s.Prog(ppc64.AADDE) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = r1 + p1.Reg = r0 + p1.To.Type = obj.TYPE_REG + p1.To.Reg = v.Reg0() + p2 := s.Prog(ppc64.AADDZE) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = ppc64.REGZERO + p2.To.Type = obj.TYPE_REG + p2.To.Reg = v.Reg1() + + case ssa.OpPPC64LoweredAtomicAnd8, + ssa.OpPPC64LoweredAtomicAnd32, + ssa.OpPPC64LoweredAtomicOr8, + ssa.OpPPC64LoweredAtomicOr32: + // LWSYNC + // LBAR/LWAR (Rarg0), Rtmp + // AND/OR Rarg1, Rtmp + // STBCCC/STWCCC Rtmp, (Rarg0) + // BNE -3(PC) + ld := ppc64.ALBAR + st := ppc64.ASTBCCC + if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 { + ld = ppc64.ALWAR + st = ppc64.ASTWCCC + } + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + // LWSYNC - Assuming shared data not write-through-required nor + // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. + plwsync := s.Prog(ppc64.ALWSYNC) + plwsync.To.Type = obj.TYPE_NONE + // LBAR or LWAR + p := s.Prog(ld) + p.From.Type = obj.TYPE_MEM + p.From.Reg = r0 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + // AND/OR reg1,out + p1 := s.Prog(v.Op.Asm()) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = r1 + p1.To.Type = obj.TYPE_REG + p1.To.Reg = ppc64.REGTMP + // STBCCC or STWCCC + p2 := s.Prog(st) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = ppc64.REGTMP + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = r0 + p2.RegTo2 = ppc64.REGTMP + // BNE retry + p3 := s.Prog(ppc64.ABNE) + p3.To.Type = obj.TYPE_BRANCH + gc.Patch(p3, p) + + case ssa.OpPPC64LoweredAtomicAdd32, + ssa.OpPPC64LoweredAtomicAdd64: + // LWSYNC + // LDAR/LWAR (Rarg0), Rout + // ADD Rarg1, Rout + // STDCCC/STWCCC Rout, (Rarg0) + // BNE -3(PC) + // MOVW Rout,Rout (if Add32) + ld := ppc64.ALDAR + st := ppc64.ASTDCCC + if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { + ld = ppc64.ALWAR + st = ppc64.ASTWCCC + } + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + out := v.Reg0() + // LWSYNC - Assuming shared data not write-through-required nor + // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. + plwsync := s.Prog(ppc64.ALWSYNC) + plwsync.To.Type = obj.TYPE_NONE + // LDAR or LWAR + p := s.Prog(ld) + p.From.Type = obj.TYPE_MEM + p.From.Reg = r0 + p.To.Type = obj.TYPE_REG + p.To.Reg = out + // ADD reg1,out + p1 := s.Prog(ppc64.AADD) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = r1 + p1.To.Reg = out + p1.To.Type = obj.TYPE_REG + // STDCCC or STWCCC + p3 := s.Prog(st) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = out + p3.To.Type = obj.TYPE_MEM + p3.To.Reg = r0 + // BNE retry + p4 := s.Prog(ppc64.ABNE) + p4.To.Type = obj.TYPE_BRANCH + gc.Patch(p4, p) + + // Ensure a 32 bit result + if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { + p5 := s.Prog(ppc64.AMOVWZ) + p5.To.Type = obj.TYPE_REG + p5.To.Reg = out + p5.From.Type = obj.TYPE_REG + p5.From.Reg = out + } + + case ssa.OpPPC64LoweredAtomicExchange32, + ssa.OpPPC64LoweredAtomicExchange64: + // LWSYNC + // LDAR/LWAR (Rarg0), Rout + // STDCCC/STWCCC Rout, (Rarg0) + // BNE -2(PC) + // ISYNC + ld := ppc64.ALDAR + st := ppc64.ASTDCCC + if v.Op == ssa.OpPPC64LoweredAtomicExchange32 { + ld = ppc64.ALWAR + st = ppc64.ASTWCCC + } + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + out := v.Reg0() + // LWSYNC - Assuming shared data not write-through-required nor + // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. + plwsync := s.Prog(ppc64.ALWSYNC) + plwsync.To.Type = obj.TYPE_NONE + // LDAR or LWAR + p := s.Prog(ld) + p.From.Type = obj.TYPE_MEM + p.From.Reg = r0 + p.To.Type = obj.TYPE_REG + p.To.Reg = out + // STDCCC or STWCCC + p1 := s.Prog(st) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = r1 + p1.To.Type = obj.TYPE_MEM + p1.To.Reg = r0 + // BNE retry + p2 := s.Prog(ppc64.ABNE) + p2.To.Type = obj.TYPE_BRANCH + gc.Patch(p2, p) + // ISYNC + pisync := s.Prog(ppc64.AISYNC) + pisync.To.Type = obj.TYPE_NONE + + case ssa.OpPPC64LoweredAtomicLoad8, + ssa.OpPPC64LoweredAtomicLoad32, + ssa.OpPPC64LoweredAtomicLoad64, + ssa.OpPPC64LoweredAtomicLoadPtr: + // SYNC + // MOVB/MOVD/MOVW (Rarg0), Rout + // CMP Rout,Rout + // BNE 1(PC) + // ISYNC + ld := ppc64.AMOVD + cmp := ppc64.ACMP + switch v.Op { + case ssa.OpPPC64LoweredAtomicLoad8: + ld = ppc64.AMOVBZ + case ssa.OpPPC64LoweredAtomicLoad32: + ld = ppc64.AMOVWZ + cmp = ppc64.ACMPW + } + arg0 := v.Args[0].Reg() + out := v.Reg0() + // SYNC when AuxInt == 1; otherwise, load-acquire + if v.AuxInt == 1 { + psync := s.Prog(ppc64.ASYNC) + psync.To.Type = obj.TYPE_NONE + } + // Load + p := s.Prog(ld) + p.From.Type = obj.TYPE_MEM + p.From.Reg = arg0 + p.To.Type = obj.TYPE_REG + p.To.Reg = out + // CMP + p1 := s.Prog(cmp) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = out + p1.To.Type = obj.TYPE_REG + p1.To.Reg = out + // BNE + p2 := s.Prog(ppc64.ABNE) + p2.To.Type = obj.TYPE_BRANCH + // ISYNC + pisync := s.Prog(ppc64.AISYNC) + pisync.To.Type = obj.TYPE_NONE + gc.Patch(p2, pisync) + + case ssa.OpPPC64LoweredAtomicStore8, + ssa.OpPPC64LoweredAtomicStore32, + ssa.OpPPC64LoweredAtomicStore64: + // SYNC or LWSYNC + // MOVB/MOVW/MOVD arg1,(arg0) + st := ppc64.AMOVD + switch v.Op { + case ssa.OpPPC64LoweredAtomicStore8: + st = ppc64.AMOVB + case ssa.OpPPC64LoweredAtomicStore32: + st = ppc64.AMOVW + } + arg0 := v.Args[0].Reg() + arg1 := v.Args[1].Reg() + // If AuxInt == 0, LWSYNC (Store-Release), else SYNC + // SYNC + syncOp := ppc64.ASYNC + if v.AuxInt == 0 { + syncOp = ppc64.ALWSYNC + } + psync := s.Prog(syncOp) + psync.To.Type = obj.TYPE_NONE + // Store + p := s.Prog(st) + p.To.Type = obj.TYPE_MEM + p.To.Reg = arg0 + p.From.Type = obj.TYPE_REG + p.From.Reg = arg1 + + case ssa.OpPPC64LoweredAtomicCas64, + ssa.OpPPC64LoweredAtomicCas32: + // LWSYNC + // loop: + // LDAR (Rarg0), MutexHint, Rtmp + // CMP Rarg1, Rtmp + // BNE fail + // STDCCC Rarg2, (Rarg0) + // BNE loop + // LWSYNC // Only for sequential consistency; not required in CasRel. + // MOVD $1, Rout + // BR end + // fail: + // MOVD $0, Rout + // end: + ld := ppc64.ALDAR + st := ppc64.ASTDCCC + cmp := ppc64.ACMP + if v.Op == ssa.OpPPC64LoweredAtomicCas32 { + ld = ppc64.ALWAR + st = ppc64.ASTWCCC + cmp = ppc64.ACMPW + } + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + r2 := v.Args[2].Reg() + out := v.Reg0() + // LWSYNC - Assuming shared data not write-through-required nor + // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. + plwsync1 := s.Prog(ppc64.ALWSYNC) + plwsync1.To.Type = obj.TYPE_NONE + // LDAR or LWAR + p := s.Prog(ld) + p.From.Type = obj.TYPE_MEM + p.From.Reg = r0 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + // If it is a Compare-and-Swap-Release operation, set the EH field with + // the release hint. + if v.AuxInt == 0 { + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0}) + } + // CMP reg1,reg2 + p1 := s.Prog(cmp) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = r1 + p1.To.Reg = ppc64.REGTMP + p1.To.Type = obj.TYPE_REG + // BNE cas_fail + p2 := s.Prog(ppc64.ABNE) + p2.To.Type = obj.TYPE_BRANCH + // STDCCC or STWCCC + p3 := s.Prog(st) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = r2 + p3.To.Type = obj.TYPE_MEM + p3.To.Reg = r0 + // BNE retry + p4 := s.Prog(ppc64.ABNE) + p4.To.Type = obj.TYPE_BRANCH + gc.Patch(p4, p) + // LWSYNC - Assuming shared data not write-through-required nor + // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b. + // If the operation is a CAS-Release, then synchronization is not necessary. + if v.AuxInt != 0 { + plwsync2 := s.Prog(ppc64.ALWSYNC) + plwsync2.To.Type = obj.TYPE_NONE + } + // return true + p5 := s.Prog(ppc64.AMOVD) + p5.From.Type = obj.TYPE_CONST + p5.From.Offset = 1 + p5.To.Type = obj.TYPE_REG + p5.To.Reg = out + // BR done + p6 := s.Prog(obj.AJMP) + p6.To.Type = obj.TYPE_BRANCH + // return false + p7 := s.Prog(ppc64.AMOVD) + p7.From.Type = obj.TYPE_CONST + p7.From.Offset = 0 + p7.To.Type = obj.TYPE_REG + p7.To.Reg = out + gc.Patch(p2, p7) + // done (label) + p8 := s.Prog(obj.ANOP) + gc.Patch(p6, p8) + + case ssa.OpPPC64LoweredGetClosurePtr: + // Closure pointer is R11 (already) + gc.CheckLoweredGetClosurePtr(v) + + case ssa.OpPPC64LoweredGetCallerSP: + // caller's SP is FixedFrameSize below the address of the first arg + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_ADDR + p.From.Offset = -gc.Ctxt.FixedFrameSize() + p.From.Name = obj.NAME_PARAM + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64LoweredGetCallerPC: + p := s.Prog(obj.AGETCALLERPC) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F: + // input is already rounded + + case ssa.OpLoadReg: + loadOp := loadByType(v.Type) + p := s.Prog(loadOp) + gc.AddrAuto(&p.From, v.Args[0]) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpStoreReg: + storeOp := storeByType(v.Type) + p := s.Prog(storeOp) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + gc.AddrAuto(&p.To, v) + + case ssa.OpPPC64DIVD: + // For now, + // + // cmp arg1, -1 + // be ahead + // v = arg0 / arg1 + // b over + // ahead: v = - arg0 + // over: nop + r := v.Reg() + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + + p := s.Prog(ppc64.ACMP) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.To.Type = obj.TYPE_CONST + p.To.Offset = -1 + + pbahead := s.Prog(ppc64.ABEQ) + pbahead.To.Type = obj.TYPE_BRANCH + + p = s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.Reg = r0 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + pbover := s.Prog(obj.AJMP) + pbover.To.Type = obj.TYPE_BRANCH + + p = s.Prog(ppc64.ANEG) + p.To.Type = obj.TYPE_REG + p.To.Reg = r + p.From.Type = obj.TYPE_REG + p.From.Reg = r0 + gc.Patch(pbahead, p) + + p = s.Prog(obj.ANOP) + gc.Patch(pbover, p) + + case ssa.OpPPC64DIVW: + // word-width version of above + r := v.Reg() + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + + p := s.Prog(ppc64.ACMPW) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.To.Type = obj.TYPE_CONST + p.To.Offset = -1 + + pbahead := s.Prog(ppc64.ABEQ) + pbahead.To.Type = obj.TYPE_BRANCH + + p = s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.Reg = r0 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + pbover := s.Prog(obj.AJMP) + pbover.To.Type = obj.TYPE_BRANCH + + p = s.Prog(ppc64.ANEG) + p.To.Type = obj.TYPE_REG + p.To.Reg = r + p.From.Type = obj.TYPE_REG + p.From.Reg = r0 + gc.Patch(pbahead, p) + + p = s.Prog(obj.ANOP) + gc.Patch(pbover, p) + + case ssa.OpPPC64CLRLSLWI: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + case ssa.OpPPC64CLRLSLDI: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + // Mask has been set as sh + case ssa.OpPPC64RLDICL: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, + ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, + ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, + ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW, + ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, + ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN, + ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV, + ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW: + r := v.Reg() + r1 := v.Args[0].Reg() + r2 := v.Args[1].Reg() + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r2 + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC: + r1 := v.Args[0].Reg() + r2 := v.Args[1].Reg() + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r2 + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP // result is not needed + + case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + // Auxint holds encoded rotate + mask + case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI: + rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt) + p := s.Prog(v.Op.Asm()) + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} + p.Reg = v.Args[0].Reg() + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)}) + + // Auxint holds mask + case ssa.OpPPC64RLWNM: + _, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt) + p := s.Prog(v.Op.Asm()) + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} + p.Reg = v.Args[0].Reg() + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)}) + + case ssa.OpPPC64MADDLD: + r := v.Reg() + r1 := v.Args[0].Reg() + r2 := v.Args[1].Reg() + r3 := v.Args[2].Reg() + // r = r1*r2 ± r3 + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.Reg = r2 + p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3}) + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS: + r := v.Reg() + r1 := v.Args[0].Reg() + r2 := v.Args[1].Reg() + r3 := v.Args[2].Reg() + // r = r1*r2 ± r3 + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.Reg = r3 + p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2}) + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, + ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, + ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, + ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD: + r := v.Reg() + p := s.Prog(v.Op.Asm()) + p.To.Type = obj.TYPE_REG + p.To.Reg = r + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + + case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, + ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, + ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst: + p := s.Prog(v.Op.Asm()) + p.Reg = v.Args[0].Reg() + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64SUBFCconst: + p := s.Prog(v.Op.Asm()) + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt}) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64ANDCCconst: + p := s.Prog(v.Op.Asm()) + p.Reg = v.Args[0].Reg() + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP // discard result + + case ssa.OpPPC64MOVDaddr: + switch v.Aux.(type) { + default: + v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux) + case nil: + // If aux offset and aux int are both 0, and the same + // input and output regs are used, no instruction + // needs to be generated, since it would just be + // addi rx, rx, 0. + if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() { + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_ADDR + p.From.Reg = v.Args[0].Reg() + p.From.Offset = v.AuxInt + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + } + + case *obj.LSym, *gc.Node: + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_ADDR + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + gc.AddAux(&p.From, v) + + } + + case ssa.OpPPC64MOVDconst: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_FCONST + p.From.Val = math.Float64frombits(uint64(v.AuxInt)) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Args[1].Reg() + + case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_CONST + p.To.Offset = v.AuxInt + + case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: + // Shift in register to required size + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Reg = v.Reg() + p.To.Type = obj.TYPE_REG + + case ssa.OpPPC64MOVDload: + + // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4. + // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string, + // the offset is not known until link time. If the load of a go.string uses relocation for the + // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur. + // To avoid this problem, the full address of the go.string is computed and loaded into the base register, + // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with + // go.string types because other types will have proper alignment. + + gostring := false + switch n := v.Aux.(type) { + case *obj.LSym: + gostring = strings.HasPrefix(n.Name, "go.string.") + } + if gostring { + // Generate full addr of the go.string const + // including AuxInt + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_ADDR + p.From.Reg = v.Args[0].Reg() + gc.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + // Load go.string using 0 offset + p = s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + break + } + // Not a go.string, generate a normal load + fallthrough + + case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + gc.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore: + p := s.Prog(v.Op.Asm()) + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + + case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx, + ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx, + ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + p.From.Index = v.Args[1].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REGZERO + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + gc.AddAux(&p.To, v) + + case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + gc.AddAux(&p.To, v) + + case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx, + ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx, + ssa.OpPPC64MOVHBRstoreidx: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[2].Reg() + p.To.Index = v.Args[1].Reg() + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + + case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB: + // ISEL, ISELB + // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE + // ISEL only accepts 0, 1, 2 condition values but the others can be + // achieved by swapping operand order. + // arg0 ? arg1 : arg2 with conditions LT, GT, EQ + // arg0 ? arg2 : arg1 for conditions GE, LE, NE + // ISELB is used when a boolean result is needed, returning 0 or 1 + p := s.Prog(ppc64.AISEL) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load. + r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0} + if v.Op == ssa.OpPPC64ISEL { + r.Reg = v.Args[1].Reg() + } + // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2 + if v.AuxInt > 3 { + p.Reg = r.Reg + p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) + } else { + p.Reg = v.Args[0].Reg() + p.SetFrom3(r) + } + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt & 3 + + case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort: + // The LoweredQuad code generation + // generates STXV instructions on + // power9. The Short variation is used + // if no loop is generated. + + // sizes >= 64 generate a loop as follows: + + // Set up loop counter in CTR, used by BC + // XXLXOR clears VS32 + // XXLXOR VS32,VS32,VS32 + // MOVD len/64,REG_TMP + // MOVD REG_TMP,CTR + // loop: + // STXV VS32,0(R20) + // STXV VS32,16(R20) + // STXV VS32,32(R20) + // STXV VS32,48(R20) + // ADD $64,R20 + // BC 16, 0, loop + + // Bytes per iteration + ctr := v.AuxInt / 64 + + // Remainder bytes + rem := v.AuxInt % 64 + + // Only generate a loop if there is more + // than 1 iteration. + if ctr > 1 { + // Set up VS32 (V0) to hold 0s + p := s.Prog(ppc64.AXXLXOR) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + p.Reg = ppc64.REG_VS32 + + // Set up CTR loop counter + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = ctr + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_CTR + + // Don't generate padding for + // loops with few iterations. + if ctr > 3 { + p = s.Prog(obj.APCALIGN) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 16 + } + + // generate 4 STXVs to zero 64 bytes + var top *obj.Prog + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + + // Save the top of loop + if top == nil { + top = p + } + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Offset = 16 + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Offset = 32 + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Offset = 48 + + // Increment address for the + // 64 bytes just zeroed. + p = s.Prog(ppc64.AADD) + p.Reg = v.Args[0].Reg() + p.From.Type = obj.TYPE_CONST + p.From.Offset = 64 + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Args[0].Reg() + + // Branch back to top of loop + // based on CTR + // BC with BO_BCTR generates bdnz + p = s.Prog(ppc64.ABC) + p.From.Type = obj.TYPE_CONST + p.From.Offset = ppc64.BO_BCTR + p.Reg = ppc64.REG_R0 + p.To.Type = obj.TYPE_BRANCH + gc.Patch(p, top) + } + // When ctr == 1 the loop was not generated but + // there are at least 64 bytes to clear, so add + // that to the remainder to generate the code + // to clear those doublewords + if ctr == 1 { + rem += 64 + } + + // Clear the remainder starting at offset zero + offset := int64(0) + + if rem >= 16 && ctr <= 1 { + // If the XXLXOR hasn't already been + // generated, do it here to initialize + // VS32 (V0) to 0. + p := s.Prog(ppc64.AXXLXOR) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + p.Reg = ppc64.REG_VS32 + } + // Generate STXV for 32 or 64 + // bytes. + for rem >= 32 { + p := s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Offset = offset + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Offset = offset + 16 + offset += 32 + rem -= 32 + } + // Generate 16 bytes + if rem >= 16 { + p := s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Offset = offset + offset += 16 + rem -= 16 + } + + // first clear as many doublewords as possible + // then clear remaining sizes as available + for rem > 0 { + op, size := ppc64.AMOVB, int64(1) + switch { + case rem >= 8: + op, size = ppc64.AMOVD, 8 + case rem >= 4: + op, size = ppc64.AMOVW, 4 + case rem >= 2: + op, size = ppc64.AMOVH, 2 + } + p := s.Prog(op) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_R0 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Offset = offset + rem -= size + offset += size + } + + case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort: + + // Unaligned data doesn't hurt performance + // for these instructions on power8. + + // For sizes >= 64 generate a loop as follows: + + // Set up loop counter in CTR, used by BC + // XXLXOR VS32,VS32,VS32 + // MOVD len/32,REG_TMP + // MOVD REG_TMP,CTR + // MOVD $16,REG_TMP + // loop: + // STXVD2X VS32,(R0)(R20) + // STXVD2X VS32,(R31)(R20) + // ADD $32,R20 + // BC 16, 0, loop + // + // any remainder is done as described below + + // for sizes < 64 bytes, first clear as many doublewords as possible, + // then handle the remainder + // MOVD R0,(R20) + // MOVD R0,8(R20) + // .... etc. + // + // the remainder bytes are cleared using one or more + // of the following instructions with the appropriate + // offsets depending which instructions are needed + // + // MOVW R0,n1(R20) 4 bytes + // MOVH R0,n2(R20) 2 bytes + // MOVB R0,n3(R20) 1 byte + // + // 7 bytes: MOVW, MOVH, MOVB + // 6 bytes: MOVW, MOVH + // 5 bytes: MOVW, MOVB + // 3 bytes: MOVH, MOVB + + // each loop iteration does 32 bytes + ctr := v.AuxInt / 32 + + // remainder bytes + rem := v.AuxInt % 32 + + // only generate a loop if there is more + // than 1 iteration. + if ctr > 1 { + // Set up VS32 (V0) to hold 0s + p := s.Prog(ppc64.AXXLXOR) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + p.Reg = ppc64.REG_VS32 + + // Set up CTR loop counter + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = ctr + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_CTR + + // Set up R31 to hold index value 16 + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 16 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + + // Don't add padding for alignment + // with few loop iterations. + if ctr > 3 { + p = s.Prog(obj.APCALIGN) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 16 + } + + // generate 2 STXVD2Xs to store 16 bytes + // when this is a loop then the top must be saved + var top *obj.Prog + // This is the top of loop + + p = s.Prog(ppc64.ASTXVD2X) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Index = ppc64.REGZERO + // Save the top of loop + if top == nil { + top = p + } + p = s.Prog(ppc64.ASTXVD2X) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Index = ppc64.REGTMP + + // Increment address for the + // 4 doublewords just zeroed. + p = s.Prog(ppc64.AADD) + p.Reg = v.Args[0].Reg() + p.From.Type = obj.TYPE_CONST + p.From.Offset = 32 + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Args[0].Reg() + + // Branch back to top of loop + // based on CTR + // BC with BO_BCTR generates bdnz + p = s.Prog(ppc64.ABC) + p.From.Type = obj.TYPE_CONST + p.From.Offset = ppc64.BO_BCTR + p.Reg = ppc64.REG_R0 + p.To.Type = obj.TYPE_BRANCH + gc.Patch(p, top) + } + + // when ctr == 1 the loop was not generated but + // there are at least 32 bytes to clear, so add + // that to the remainder to generate the code + // to clear those doublewords + if ctr == 1 { + rem += 32 + } + + // clear the remainder starting at offset zero + offset := int64(0) + + // first clear as many doublewords as possible + // then clear remaining sizes as available + for rem > 0 { + op, size := ppc64.AMOVB, int64(1) + switch { + case rem >= 8: + op, size = ppc64.AMOVD, 8 + case rem >= 4: + op, size = ppc64.AMOVW, 4 + case rem >= 2: + op, size = ppc64.AMOVH, 2 + } + p := s.Prog(op) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_R0 + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + p.To.Offset = offset + rem -= size + offset += size + } + + case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort: + + bytesPerLoop := int64(32) + // This will be used when moving more + // than 8 bytes. Moves start with + // as many 8 byte moves as possible, then + // 4, 2, or 1 byte(s) as remaining. This will + // work and be efficient for power8 or later. + // If there are 64 or more bytes, then a + // loop is generated to move 32 bytes and + // update the src and dst addresses on each + // iteration. When < 64 bytes, the appropriate + // number of moves are generated based on the + // size. + // When moving >= 64 bytes a loop is used + // MOVD len/32,REG_TMP + // MOVD REG_TMP,CTR + // MOVD $16,REG_TMP + // top: + // LXVD2X (R0)(R21),VS32 + // LXVD2X (R31)(R21),VS33 + // ADD $32,R21 + // STXVD2X VS32,(R0)(R20) + // STXVD2X VS33,(R31)(R20) + // ADD $32,R20 + // BC 16,0,top + // Bytes not moved by this loop are moved + // with a combination of the following instructions, + // starting with the largest sizes and generating as + // many as needed, using the appropriate offset value. + // MOVD n(R21),R31 + // MOVD R31,n(R20) + // MOVW n1(R21),R31 + // MOVW R31,n1(R20) + // MOVH n2(R21),R31 + // MOVH R31,n2(R20) + // MOVB n3(R21),R31 + // MOVB R31,n3(R20) + + // Each loop iteration moves 32 bytes + ctr := v.AuxInt / bytesPerLoop + + // Remainder after the loop + rem := v.AuxInt % bytesPerLoop + + dstReg := v.Args[0].Reg() + srcReg := v.Args[1].Reg() + + // The set of registers used here, must match the clobbered reg list + // in PPC64Ops.go. + offset := int64(0) + + // top of the loop + var top *obj.Prog + // Only generate looping code when loop counter is > 1 for >= 64 bytes + if ctr > 1 { + // Set up the CTR + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = ctr + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_CTR + + // Use REGTMP as index reg + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 16 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + + // Don't adding padding for + // alignment with small iteration + // counts. + if ctr > 3 { + p = s.Prog(obj.APCALIGN) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 16 + } + + // Generate 16 byte loads and stores. + // Use temp register for index (16) + // on the second one. + + p = s.Prog(ppc64.ALXVD2X) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Index = ppc64.REGZERO + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + if top == nil { + top = p + } + p = s.Prog(ppc64.ALXVD2X) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Index = ppc64.REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS33 + + // increment the src reg for next iteration + p = s.Prog(ppc64.AADD) + p.Reg = srcReg + p.From.Type = obj.TYPE_CONST + p.From.Offset = bytesPerLoop + p.To.Type = obj.TYPE_REG + p.To.Reg = srcReg + + // generate 16 byte stores + p = s.Prog(ppc64.ASTXVD2X) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Index = ppc64.REGZERO + + p = s.Prog(ppc64.ASTXVD2X) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS33 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Index = ppc64.REGTMP + + // increment the dst reg for next iteration + p = s.Prog(ppc64.AADD) + p.Reg = dstReg + p.From.Type = obj.TYPE_CONST + p.From.Offset = bytesPerLoop + p.To.Type = obj.TYPE_REG + p.To.Reg = dstReg + + // BC with BO_BCTR generates bdnz to branch on nonzero CTR + // to loop top. + p = s.Prog(ppc64.ABC) + p.From.Type = obj.TYPE_CONST + p.From.Offset = ppc64.BO_BCTR + p.Reg = ppc64.REG_R0 + p.To.Type = obj.TYPE_BRANCH + gc.Patch(p, top) + + // srcReg and dstReg were incremented in the loop, so + // later instructions start with offset 0. + offset = int64(0) + } + + // No loop was generated for one iteration, so + // add 32 bytes to the remainder to move those bytes. + if ctr == 1 { + rem += bytesPerLoop + } + + if rem >= 16 { + // Generate 16 byte loads and stores. + // Use temp register for index (value 16) + // on the second one. + p := s.Prog(ppc64.ALXVD2X) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Index = ppc64.REGZERO + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + + p = s.Prog(ppc64.ASTXVD2X) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Index = ppc64.REGZERO + + offset = 16 + rem -= 16 + + if rem >= 16 { + // Use REGTMP as index reg + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 16 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + + p = s.Prog(ppc64.ALXVD2X) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Index = ppc64.REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + + p = s.Prog(ppc64.ASTXVD2X) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Index = ppc64.REGTMP + + offset = 32 + rem -= 16 + } + } + + // Generate all the remaining load and store pairs, starting with + // as many 8 byte moves as possible, then 4, 2, 1. + for rem > 0 { + op, size := ppc64.AMOVB, int64(1) + switch { + case rem >= 8: + op, size = ppc64.AMOVD, 8 + case rem >= 4: + op, size = ppc64.AMOVW, 4 + case rem >= 2: + op, size = ppc64.AMOVH, 2 + } + // Load + p := s.Prog(op) + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = offset + + // Store + p = s.Prog(op) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REGTMP + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = offset + rem -= size + offset += size + } + + case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort: + bytesPerLoop := int64(64) + // This is used when moving more + // than 8 bytes on power9. Moves start with + // as many 8 byte moves as possible, then + // 4, 2, or 1 byte(s) as remaining. This will + // work and be efficient for power8 or later. + // If there are 64 or more bytes, then a + // loop is generated to move 32 bytes and + // update the src and dst addresses on each + // iteration. When < 64 bytes, the appropriate + // number of moves are generated based on the + // size. + // When moving >= 64 bytes a loop is used + // MOVD len/32,REG_TMP + // MOVD REG_TMP,CTR + // top: + // LXV 0(R21),VS32 + // LXV 16(R21),VS33 + // ADD $32,R21 + // STXV VS32,0(R20) + // STXV VS33,16(R20) + // ADD $32,R20 + // BC 16,0,top + // Bytes not moved by this loop are moved + // with a combination of the following instructions, + // starting with the largest sizes and generating as + // many as needed, using the appropriate offset value. + // MOVD n(R21),R31 + // MOVD R31,n(R20) + // MOVW n1(R21),R31 + // MOVW R31,n1(R20) + // MOVH n2(R21),R31 + // MOVH R31,n2(R20) + // MOVB n3(R21),R31 + // MOVB R31,n3(R20) + + // Each loop iteration moves 32 bytes + ctr := v.AuxInt / bytesPerLoop + + // Remainder after the loop + rem := v.AuxInt % bytesPerLoop + + dstReg := v.Args[0].Reg() + srcReg := v.Args[1].Reg() + + offset := int64(0) + + // top of the loop + var top *obj.Prog + + // Only generate looping code when loop counter is > 1 for >= 64 bytes + if ctr > 1 { + // Set up the CTR + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = ctr + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_CTR + + p = s.Prog(obj.APCALIGN) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 16 + + // Generate 16 byte loads and stores. + p = s.Prog(ppc64.ALXV) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = offset + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + if top == nil { + top = p + } + p = s.Prog(ppc64.ALXV) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = offset + 16 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS33 + + // generate 16 byte stores + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = offset + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS33 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = offset + 16 + + // Generate 16 byte loads and stores. + p = s.Prog(ppc64.ALXV) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = offset + 32 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + + p = s.Prog(ppc64.ALXV) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = offset + 48 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS33 + + // generate 16 byte stores + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = offset + 32 + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS33 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = offset + 48 + + // increment the src reg for next iteration + p = s.Prog(ppc64.AADD) + p.Reg = srcReg + p.From.Type = obj.TYPE_CONST + p.From.Offset = bytesPerLoop + p.To.Type = obj.TYPE_REG + p.To.Reg = srcReg + + // increment the dst reg for next iteration + p = s.Prog(ppc64.AADD) + p.Reg = dstReg + p.From.Type = obj.TYPE_CONST + p.From.Offset = bytesPerLoop + p.To.Type = obj.TYPE_REG + p.To.Reg = dstReg + + // BC with BO_BCTR generates bdnz to branch on nonzero CTR + // to loop top. + p = s.Prog(ppc64.ABC) + p.From.Type = obj.TYPE_CONST + p.From.Offset = ppc64.BO_BCTR + p.Reg = ppc64.REG_R0 + p.To.Type = obj.TYPE_BRANCH + gc.Patch(p, top) + + // srcReg and dstReg were incremented in the loop, so + // later instructions start with offset 0. + offset = int64(0) + } + + // No loop was generated for one iteration, so + // add 32 bytes to the remainder to move those bytes. + if ctr == 1 { + rem += bytesPerLoop + } + if rem >= 32 { + p := s.Prog(ppc64.ALXV) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + + p = s.Prog(ppc64.ALXV) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = 16 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS33 + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS33 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = 16 + + offset = 32 + rem -= 32 + } + + if rem >= 16 { + // Generate 16 byte loads and stores. + p := s.Prog(ppc64.ALXV) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = offset + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = offset + + offset += 16 + rem -= 16 + + if rem >= 16 { + p := s.Prog(ppc64.ALXV) + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = offset + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_VS32 + + p = s.Prog(ppc64.ASTXV) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_VS32 + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = offset + + offset += 16 + rem -= 16 + } + } + // Generate all the remaining load and store pairs, starting with + // as many 8 byte moves as possible, then 4, 2, 1. + for rem > 0 { + op, size := ppc64.AMOVB, int64(1) + switch { + case rem >= 8: + op, size = ppc64.AMOVD, 8 + case rem >= 4: + op, size = ppc64.AMOVW, 4 + case rem >= 2: + op, size = ppc64.AMOVH, 2 + } + // Load + p := s.Prog(op) + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + p.From.Type = obj.TYPE_MEM + p.From.Reg = srcReg + p.From.Offset = offset + + // Store + p = s.Prog(op) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REGTMP + p.To.Type = obj.TYPE_MEM + p.To.Reg = dstReg + p.To.Offset = offset + rem -= size + offset += size + } + + case ssa.OpPPC64CALLstatic: + s.Call(v) + + case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_LR + + if v.Args[0].Reg() != ppc64.REG_R12 { + v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg) + } + + pp := s.Call(v) + pp.To.Reg = ppc64.REG_LR + + // Insert a hint this is not a subroutine return. + pp.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 1}) + + if gc.Ctxt.Flag_shared { + // When compiling Go into PIC, the function we just + // called via pointer might have been implemented in + // a separate module and so overwritten the TOC + // pointer in R2; reload it. + q := s.Prog(ppc64.AMOVD) + q.From.Type = obj.TYPE_MEM + q.From.Offset = 24 + q.From.Reg = ppc64.REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = ppc64.REG_R2 + } + + case ssa.OpPPC64LoweredWB: + p := s.Prog(obj.ACALL) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = v.Aux.(*obj.LSym) + + case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC: + p := s.Prog(obj.ACALL) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.BoundsCheckFunc[v.AuxInt] + s.UseArgs(16) // space used in callee args area by assembly stubs + + case ssa.OpPPC64LoweredNilCheck: + if objabi.GOOS == "aix" { + // CMP Rarg0, R0 + // BNE 2(PC) + // STW R0, 0(R0) + // NOP (so the BNE has somewhere to land) + + // CMP Rarg0, R0 + p := s.Prog(ppc64.ACMP) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_R0 + + // BNE 2(PC) + p2 := s.Prog(ppc64.ABNE) + p2.To.Type = obj.TYPE_BRANCH + + // STW R0, 0(R0) + // Write at 0 is forbidden and will trigger a SIGSEGV + p = s.Prog(ppc64.AMOVW) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_R0 + p.To.Type = obj.TYPE_MEM + p.To.Reg = ppc64.REG_R0 + + // NOP (so the BNE has somewhere to land) + nop := s.Prog(obj.ANOP) + gc.Patch(p2, nop) + + } else { + // Issue a load which will fault if arg is nil. + p := s.Prog(ppc64.AMOVBZ) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + gc.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REGTMP + } + if logopt.Enabled() { + logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) + } + if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers + gc.Warnl(v.Pos, "generated nil check") + } + + // These should be resolved by rules and not make it here. + case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan, + ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual, + ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual: + v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString()) + case ssa.OpPPC64InvertFlags: + v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) + case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: + v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) + case ssa.OpClobber: + // TODO: implement for clobberdead experiment. Nop is ok for now. + default: + v.Fatalf("genValue not implemented: %s", v.LongString()) + } +} + +var blockJump = [...]struct { + asm, invasm obj.As + asmeq, invasmun bool +}{ + ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false}, + ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false}, + + ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false}, + ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false}, + ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false}, + ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false}, + + // TODO: need to work FP comparisons into block jumps + ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false}, + ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN + ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN + ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false}, +} + +func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { + switch b.Kind { + case ssa.BlockDefer: + // defer returns in R3: + // 0 if we should continue executing + // 1 if we should jump to deferreturn call + p := s.Prog(ppc64.ACMP) + p.From.Type = obj.TYPE_REG + p.From.Reg = ppc64.REG_R3 + p.To.Type = obj.TYPE_REG + p.To.Reg = ppc64.REG_R0 + + p = s.Prog(ppc64.ABNE) + p.To.Type = obj.TYPE_BRANCH + s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) + if b.Succs[0].Block() != next { + p := s.Prog(obj.AJMP) + p.To.Type = obj.TYPE_BRANCH + s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) + } + + case ssa.BlockPlain: + if b.Succs[0].Block() != next { + p := s.Prog(obj.AJMP) + p.To.Type = obj.TYPE_BRANCH + s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) + } + case ssa.BlockExit: + case ssa.BlockRet: + s.Prog(obj.ARET) + case ssa.BlockRetJmp: + p := s.Prog(obj.AJMP) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = b.Aux.(*obj.LSym) + + case ssa.BlockPPC64EQ, ssa.BlockPPC64NE, + ssa.BlockPPC64LT, ssa.BlockPPC64GE, + ssa.BlockPPC64LE, ssa.BlockPPC64GT, + ssa.BlockPPC64FLT, ssa.BlockPPC64FGE, + ssa.BlockPPC64FLE, ssa.BlockPPC64FGT: + jmp := blockJump[b.Kind] + switch next { + case b.Succs[0].Block(): + s.Br(jmp.invasm, b.Succs[1].Block()) + if jmp.invasmun { + // TODO: The second branch is probably predict-not-taken since it is for FP unordered + s.Br(ppc64.ABVS, b.Succs[1].Block()) + } + case b.Succs[1].Block(): + s.Br(jmp.asm, b.Succs[0].Block()) + if jmp.asmeq { + s.Br(ppc64.ABEQ, b.Succs[0].Block()) + } + default: + if b.Likely != ssa.BranchUnlikely { + s.Br(jmp.asm, b.Succs[0].Block()) + if jmp.asmeq { + s.Br(ppc64.ABEQ, b.Succs[0].Block()) + } + s.Br(obj.AJMP, b.Succs[1].Block()) + } else { + s.Br(jmp.invasm, b.Succs[1].Block()) + if jmp.invasmun { + // TODO: The second branch is probably predict-not-taken since it is for FP unordered + s.Br(ppc64.ABVS, b.Succs[1].Block()) + } + s.Br(obj.AJMP, b.Succs[0].Block()) + } + } + default: + b.Fatalf("branch not implemented: %s", b.LongString()) + } +} |