diff options
Diffstat (limited to 'src/cmd/internal/obj')
91 files changed, 73051 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/abi_string.go b/src/cmd/internal/obj/abi_string.go new file mode 100644 index 0000000..a439da3 --- /dev/null +++ b/src/cmd/internal/obj/abi_string.go @@ -0,0 +1,16 @@ +// Code generated by "stringer -type ABI"; DO NOT EDIT. + +package obj + +import "strconv" + +const _ABI_name = "ABI0ABIInternalABICount" + +var _ABI_index = [...]uint8{0, 4, 15, 23} + +func (i ABI) String() string { + if i >= ABI(len(_ABI_index)-1) { + return "ABI(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _ABI_name[_ABI_index[i]:_ABI_index[i+1]] +} diff --git a/src/cmd/internal/obj/addrtype_string.go b/src/cmd/internal/obj/addrtype_string.go new file mode 100644 index 0000000..e6277d3 --- /dev/null +++ b/src/cmd/internal/obj/addrtype_string.go @@ -0,0 +1,37 @@ +// Code generated by "stringer -type AddrType"; DO NOT EDIT. + +package obj + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TYPE_NONE-0] + _ = x[TYPE_BRANCH-1] + _ = x[TYPE_TEXTSIZE-2] + _ = x[TYPE_MEM-3] + _ = x[TYPE_CONST-4] + _ = x[TYPE_FCONST-5] + _ = x[TYPE_SCONST-6] + _ = x[TYPE_REG-7] + _ = x[TYPE_ADDR-8] + _ = x[TYPE_SHIFT-9] + _ = x[TYPE_REGREG-10] + _ = x[TYPE_REGREG2-11] + _ = x[TYPE_INDIR-12] + _ = x[TYPE_REGLIST-13] + _ = x[TYPE_SPECIAL-14] +} + +const _AddrType_name = "TYPE_NONETYPE_BRANCHTYPE_TEXTSIZETYPE_MEMTYPE_CONSTTYPE_FCONSTTYPE_SCONSTTYPE_REGTYPE_ADDRTYPE_SHIFTTYPE_REGREGTYPE_REGREG2TYPE_INDIRTYPE_REGLISTTYPE_SPECIAL" + +var _AddrType_index = [...]uint8{0, 9, 20, 33, 41, 51, 62, 73, 81, 90, 100, 111, 123, 133, 145, 157} + +func (i AddrType) String() string { + if i >= AddrType(len(_AddrType_index)-1) { + return "AddrType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _AddrType_name[_AddrType_index[i]:_AddrType_index[i+1]] +} diff --git a/src/cmd/internal/obj/arm/a.out.go b/src/cmd/internal/obj/arm/a.out.go new file mode 100644 index 0000000..fd695ad --- /dev/null +++ b/src/cmd/internal/obj/arm/a.out.go @@ -0,0 +1,410 @@ +// Inferno utils/5c/5.out.h +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/5c/5.out.h +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm + +import "cmd/internal/obj" + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p arm + +const ( + NSNAME = 8 + NSYM = 50 + NREG = 16 +) + +/* -1 disables use of REGARG */ +const ( + REGARG = -1 +) + +const ( + REG_R0 = obj.RBaseARM + iota // must be 16-aligned + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + + REG_F0 // must be 16-aligned + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + + REG_FPSR // must be 2-aligned + REG_FPCR + + REG_CPSR // must be 2-aligned + REG_SPSR + + REGRET = REG_R0 + /* compiler allocates R1 up as temps */ + /* compiler allocates register variables R3 up */ + /* compiler allocates external registers R10 down */ + REGEXT = REG_R10 + /* these two registers are declared in runtime.h */ + REGG = REGEXT - 0 + REGM = REGEXT - 1 + + REGCTXT = REG_R7 + REGTMP = REG_R11 + REGSP = REG_R13 + REGLINK = REG_R14 + REGPC = REG_R15 + + NFREG = 16 + /* compiler allocates register variables F0 up */ + /* compiler allocates external registers F7 down */ + FREGRET = REG_F0 + FREGEXT = REG_F7 + FREGTMP = REG_F15 +) + +// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040b/IHI0040B_aadwarf.pdf +var ARMDWARFRegisters = map[int16]int16{} + +func init() { + // f assigns dwarfregisters[from:to] = (base):(step*(to-from)+base) + f := func(from, to, base, step int16) { + for r := int16(from); r <= to; r++ { + ARMDWARFRegisters[r] = step*(r-from) + base + } + } + f(REG_R0, REG_R15, 0, 1) + f(REG_F0, REG_F15, 64, 2) // Use d0 through D15, aka S0, S2, ..., S30 +} + +// Special registers, after subtracting obj.RBaseARM, bit 9 indicates +// a special register and the low bits select the register. +const ( + REG_SPECIAL = obj.RBaseARM + 1<<9 + iota + REG_MB_SY + REG_MB_ST + REG_MB_ISH + REG_MB_ISHST + REG_MB_NSH + REG_MB_NSHST + REG_MB_OSH + REG_MB_OSHST + + MAXREG +) + +const ( + C_NONE = iota + C_REG + C_REGREG + C_REGREG2 + C_REGLIST + C_SHIFT /* register shift R>>x */ + C_SHIFTADDR /* memory address with shifted offset R>>x(R) */ + C_FREG + C_PSR + C_FCR + C_SPR /* REG_MB_SY */ + + C_RCON /* 0xff rotated */ + C_NCON /* ~RCON */ + C_RCON2A /* OR of two disjoint C_RCON constants */ + C_RCON2S /* subtraction of two disjoint C_RCON constants */ + C_SCON /* 0xffff */ + C_LCON + C_LCONADDR + C_ZFCON + C_SFCON + C_LFCON + + C_RACON /* <=0xff rotated constant offset from auto */ + C_LACON /* Large Auto CONstant, i.e. large offset from SP */ + + C_SBRA + C_LBRA + + C_HAUTO /* halfword insn offset (-0xff to 0xff) */ + C_FAUTO /* float insn offset (0 to 0x3fc, word aligned) */ + C_HFAUTO /* both H and F */ + C_SAUTO /* -0xfff to 0xfff */ + C_LAUTO + + C_HOREG + C_FOREG + C_HFOREG + C_SOREG + C_ROREG + C_SROREG /* both nil and R */ + C_LOREG + + C_PC + C_SP + C_HREG + + C_ADDR /* reference to relocatable address */ + + // TLS "var" in local exec mode: will become a constant offset from + // thread local base that is ultimately chosen by the program linker. + C_TLS_LE + + // TLS "var" in initial exec mode: will become a memory address (chosen + // by the program linker) that the dynamic linker will fill with the + // offset from the thread local base. + C_TLS_IE + + C_TEXTSIZE + + C_GOK + + C_NCLASS /* must be the last */ +) + +const ( + AAND = obj.ABaseARM + obj.A_ARCHSPECIFIC + iota + AEOR + ASUB + ARSB + AADD + AADC + ASBC + ARSC + ATST + ATEQ + ACMP + ACMN + AORR + ABIC + + AMVN + + /* + * Do not reorder or fragment the conditional branch + * opcodes, or the predication code will break + */ + ABEQ + ABNE + ABCS + ABHS + ABCC + ABLO + ABMI + ABPL + ABVS + ABVC + ABHI + ABLS + ABGE + ABLT + ABGT + ABLE + + AMOVWD + AMOVWF + AMOVDW + AMOVFW + AMOVFD + AMOVDF + AMOVF + AMOVD + + ACMPF + ACMPD + AADDF + AADDD + ASUBF + ASUBD + AMULF + AMULD + ANMULF + ANMULD + AMULAF + AMULAD + ANMULAF + ANMULAD + AMULSF + AMULSD + ANMULSF + ANMULSD + AFMULAF + AFMULAD + AFNMULAF + AFNMULAD + AFMULSF + AFMULSD + AFNMULSF + AFNMULSD + ADIVF + ADIVD + ASQRTF + ASQRTD + AABSF + AABSD + ANEGF + ANEGD + + ASRL + ASRA + ASLL + AMULU + ADIVU + AMUL + AMMUL + ADIV + AMOD + AMODU + ADIVHW + ADIVUHW + + AMOVB + AMOVBS + AMOVBU + AMOVH + AMOVHS + AMOVHU + AMOVW + AMOVM + ASWPBU + ASWPW + + ARFE + ASWI + AMULA + AMULS + AMMULA + AMMULS + + AWORD + + AMULL + AMULAL + AMULLU + AMULALU + + ABX + ABXRET + ADWORD + + ALDREX + ASTREX + ALDREXD + ASTREXD + + ADMB + + APLD + + ACLZ + AREV + AREV16 + AREVSH + ARBIT + + AXTAB + AXTAH + AXTABU + AXTAHU + + ABFX + ABFXU + ABFC + ABFI + + AMULWT + AMULWB + AMULBB + AMULAWT + AMULAWB + AMULABB + + AMRC // MRC/MCR + + ALAST + + // aliases + AB = obj.AJMP + ABL = obj.ACALL +) + +/* scond byte */ +const ( + C_SCOND = (1 << 4) - 1 + C_SBIT = 1 << 4 + C_PBIT = 1 << 5 + C_WBIT = 1 << 6 + C_FBIT = 1 << 7 /* psr flags-only */ + C_UBIT = 1 << 7 /* up bit, unsigned bit */ + + // These constants are the ARM condition codes encodings, + // XORed with 14 so that C_SCOND_NONE has value 0, + // so that a zeroed Prog.scond means "always execute". + C_SCOND_XOR = 14 + + C_SCOND_EQ = 0 ^ C_SCOND_XOR + C_SCOND_NE = 1 ^ C_SCOND_XOR + C_SCOND_HS = 2 ^ C_SCOND_XOR + C_SCOND_LO = 3 ^ C_SCOND_XOR + C_SCOND_MI = 4 ^ C_SCOND_XOR + C_SCOND_PL = 5 ^ C_SCOND_XOR + C_SCOND_VS = 6 ^ C_SCOND_XOR + C_SCOND_VC = 7 ^ C_SCOND_XOR + C_SCOND_HI = 8 ^ C_SCOND_XOR + C_SCOND_LS = 9 ^ C_SCOND_XOR + C_SCOND_GE = 10 ^ C_SCOND_XOR + C_SCOND_LT = 11 ^ C_SCOND_XOR + C_SCOND_GT = 12 ^ C_SCOND_XOR + C_SCOND_LE = 13 ^ C_SCOND_XOR + C_SCOND_NONE = 14 ^ C_SCOND_XOR + C_SCOND_NV = 15 ^ C_SCOND_XOR + + /* D_SHIFT type */ + SHIFT_LL = 0 << 5 + SHIFT_LR = 1 << 5 + SHIFT_AR = 2 << 5 + SHIFT_RR = 3 << 5 +) diff --git a/src/cmd/internal/obj/arm/anames.go b/src/cmd/internal/obj/arm/anames.go new file mode 100644 index 0000000..f5e92de --- /dev/null +++ b/src/cmd/internal/obj/arm/anames.go @@ -0,0 +1,144 @@ +// Code generated by stringer -i a.out.go -o anames.go -p arm; DO NOT EDIT. + +package arm + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "AND", + "EOR", + "SUB", + "RSB", + "ADD", + "ADC", + "SBC", + "RSC", + "TST", + "TEQ", + "CMP", + "CMN", + "ORR", + "BIC", + "MVN", + "BEQ", + "BNE", + "BCS", + "BHS", + "BCC", + "BLO", + "BMI", + "BPL", + "BVS", + "BVC", + "BHI", + "BLS", + "BGE", + "BLT", + "BGT", + "BLE", + "MOVWD", + "MOVWF", + "MOVDW", + "MOVFW", + "MOVFD", + "MOVDF", + "MOVF", + "MOVD", + "CMPF", + "CMPD", + "ADDF", + "ADDD", + "SUBF", + "SUBD", + "MULF", + "MULD", + "NMULF", + "NMULD", + "MULAF", + "MULAD", + "NMULAF", + "NMULAD", + "MULSF", + "MULSD", + "NMULSF", + "NMULSD", + "FMULAF", + "FMULAD", + "FNMULAF", + "FNMULAD", + "FMULSF", + "FMULSD", + "FNMULSF", + "FNMULSD", + "DIVF", + "DIVD", + "SQRTF", + "SQRTD", + "ABSF", + "ABSD", + "NEGF", + "NEGD", + "SRL", + "SRA", + "SLL", + "MULU", + "DIVU", + "MUL", + "MMUL", + "DIV", + "MOD", + "MODU", + "DIVHW", + "DIVUHW", + "MOVB", + "MOVBS", + "MOVBU", + "MOVH", + "MOVHS", + "MOVHU", + "MOVW", + "MOVM", + "SWPBU", + "SWPW", + "RFE", + "SWI", + "MULA", + "MULS", + "MMULA", + "MMULS", + "WORD", + "MULL", + "MULAL", + "MULLU", + "MULALU", + "BX", + "BXRET", + "DWORD", + "LDREX", + "STREX", + "LDREXD", + "STREXD", + "DMB", + "PLD", + "CLZ", + "REV", + "REV16", + "REVSH", + "RBIT", + "XTAB", + "XTAH", + "XTABU", + "XTAHU", + "BFX", + "BFXU", + "BFC", + "BFI", + "MULWT", + "MULWB", + "MULBB", + "MULAWT", + "MULAWB", + "MULABB", + "MRC", + "LAST", +} diff --git a/src/cmd/internal/obj/arm/anames5.go b/src/cmd/internal/obj/arm/anames5.go new file mode 100644 index 0000000..78fcd55 --- /dev/null +++ b/src/cmd/internal/obj/arm/anames5.go @@ -0,0 +1,77 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package arm + +var cnames5 = []string{ + "NONE", + "REG", + "REGREG", + "REGREG2", + "REGLIST", + "SHIFT", + "SHIFTADDR", + "FREG", + "PSR", + "FCR", + "SPR", + "RCON", + "NCON", + "RCON2A", + "RCON2S", + "SCON", + "LCON", + "LCONADDR", + "ZFCON", + "SFCON", + "LFCON", + "RACON", + "LACON", + "SBRA", + "LBRA", + "HAUTO", + "FAUTO", + "HFAUTO", + "SAUTO", + "LAUTO", + "HOREG", + "FOREG", + "HFOREG", + "SOREG", + "ROREG", + "SROREG", + "LOREG", + "PC", + "SP", + "HREG", + "ADDR", + "C_TLS_LE", + "C_TLS_IE", + "TEXTSIZE", + "GOK", + "NCLASS", + "SCOND = (1<<4)-1", + "SBIT = 1<<4", + "PBIT = 1<<5", + "WBIT = 1<<6", + "FBIT = 1<<7", + "UBIT = 1<<7", + "SCOND_XOR = 14", + "SCOND_EQ = 0 ^ C_SCOND_XOR", + "SCOND_NE = 1 ^ C_SCOND_XOR", + "SCOND_HS = 2 ^ C_SCOND_XOR", + "SCOND_LO = 3 ^ C_SCOND_XOR", + "SCOND_MI = 4 ^ C_SCOND_XOR", + "SCOND_PL = 5 ^ C_SCOND_XOR", + "SCOND_VS = 6 ^ C_SCOND_XOR", + "SCOND_VC = 7 ^ C_SCOND_XOR", + "SCOND_HI = 8 ^ C_SCOND_XOR", + "SCOND_LS = 9 ^ C_SCOND_XOR", + "SCOND_GE = 10 ^ C_SCOND_XOR", + "SCOND_LT = 11 ^ C_SCOND_XOR", + "SCOND_GT = 12 ^ C_SCOND_XOR", + "SCOND_LE = 13 ^ C_SCOND_XOR", + "SCOND_NONE = 14 ^ C_SCOND_XOR", + "SCOND_NV = 15 ^ C_SCOND_XOR", +} diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go new file mode 100644 index 0000000..7b16827 --- /dev/null +++ b/src/cmd/internal/obj/arm/asm5.go @@ -0,0 +1,3094 @@ +// Inferno utils/5l/span.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/5l/span.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "fmt" + "internal/buildcfg" + "log" + "math" + "sort" +) + +// ctxt5 holds state while assembling a single function. +// Each function gets a fresh ctxt5. +// This allows for multiple functions to be safely concurrently assembled. +type ctxt5 struct { + ctxt *obj.Link + newprog obj.ProgAlloc + cursym *obj.LSym + printp *obj.Prog + blitrl *obj.Prog + elitrl *obj.Prog + autosize int64 + instoffset int64 + pc int64 + pool struct { + start uint32 + size uint32 + extra uint32 + } +} + +type Optab struct { + as obj.As + a1 uint8 + a2 int8 + a3 uint8 + type_ uint8 + size int8 + param int16 + flag int8 + pcrelsiz uint8 + scond uint8 // optional flags accepted by the instruction +} + +type Opcross [32][2][32]uint8 + +const ( + LFROM = 1 << 0 + LTO = 1 << 1 + LPOOL = 1 << 2 + LPCREL = 1 << 3 +) + +var optab = []Optab{ + /* struct Optab: + OPCODE, from, prog->reg, to, type, size, param, flag, extra data size, optional suffix */ + {obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0, 0}, + {AADD, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AAND, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AORR, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AORR, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AMVN, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0, 0}, + {AADD, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AADD, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AAND, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AAND, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AORR, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AORR, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AMOVW, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, 0}, + {AMVN, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, 0}, + {ACMP, C_RCON, C_REG, C_NONE, 2, 4, 0, 0, 0, 0}, + {AADD, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AADD, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AORR, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AORR, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AMVN, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {ACMP, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0, 0}, + {AMOVW, C_RACON, C_NONE, C_REG, 4, 4, REGSP, 0, 0, C_SBIT}, + {AB, C_NONE, C_NONE, C_SBRA, 5, 4, 0, LPOOL, 0, 0}, + {ABL, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, + {ABX, C_NONE, C_NONE, C_SBRA, 74, 20, 0, 0, 0, 0}, + {ABEQ, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, + {ABEQ, C_RCON, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, // prediction hinted form, hint ignored + {AB, C_NONE, C_NONE, C_ROREG, 6, 4, 0, LPOOL, 0, 0}, + {ABL, C_NONE, C_NONE, C_ROREG, 7, 4, 0, 0, 0, 0}, + {ABL, C_REG, C_NONE, C_ROREG, 7, 4, 0, 0, 0, 0}, + {ABX, C_NONE, C_NONE, C_ROREG, 75, 12, 0, 0, 0, 0}, + {ABXRET, C_NONE, C_NONE, C_ROREG, 76, 4, 0, 0, 0, 0}, + {ASLL, C_RCON, C_REG, C_REG, 8, 4, 0, 0, 0, C_SBIT}, + {ASLL, C_RCON, C_NONE, C_REG, 8, 4, 0, 0, 0, C_SBIT}, + {ASLL, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0, C_SBIT}, + {ASLL, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0, C_SBIT}, + {ASWI, C_NONE, C_NONE, C_NONE, 10, 4, 0, 0, 0, 0}, + {ASWI, C_NONE, C_NONE, C_LCON, 10, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_LCON, 11, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_LCONADDR, 11, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_ADDR, 11, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_TLS_LE, 103, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_TLS_IE, 104, 4, 0, 0, 0, 0}, + {AMOVW, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0, 0}, + {AMOVW, C_SCON, C_NONE, C_REG, 12, 4, 0, 0, 0, 0}, + {AMOVW, C_LCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0, 0}, + {AMOVW, C_LCONADDR, C_NONE, C_REG, 12, 4, 0, LFROM | LPCREL, 4, 0}, + {AMVN, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0, 0}, + {AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AAND, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AAND, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AORR, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AORR, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0, 0}, + {AADD, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AADD, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AAND, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AAND, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AORR, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AORR, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AMVN, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0, 0}, + {ACMP, C_SCON, C_REG, C_NONE, 13, 8, 0, 0, 0, 0}, + {AADD, C_RCON2A, C_REG, C_REG, 106, 8, 0, 0, 0, 0}, + {AADD, C_RCON2A, C_NONE, C_REG, 106, 8, 0, 0, 0, 0}, + {AORR, C_RCON2A, C_REG, C_REG, 106, 8, 0, 0, 0, 0}, + {AORR, C_RCON2A, C_NONE, C_REG, 106, 8, 0, 0, 0, 0}, + {AADD, C_RCON2S, C_REG, C_REG, 107, 8, 0, 0, 0, 0}, + {AADD, C_RCON2S, C_NONE, C_REG, 107, 8, 0, 0, 0, 0}, + {AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AAND, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AAND, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AORR, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AORR, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0, 0}, + {ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0, 0}, + {AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, 0}, + {AMOVBS, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_REG, 58, 4, 0, 0, 0, 0}, + {AMOVH, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, 0}, + {AMOVHS, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0, 0}, + {AMOVHU, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0, 0}, + {AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0, C_SBIT}, + {AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0, C_SBIT}, + {ADIV, C_REG, C_REG, C_REG, 16, 4, 0, 0, 0, 0}, + {ADIV, C_REG, C_NONE, C_REG, 16, 4, 0, 0, 0, 0}, + {ADIVHW, C_REG, C_REG, C_REG, 105, 4, 0, 0, 0, 0}, + {ADIVHW, C_REG, C_NONE, C_REG, 105, 4, 0, 0, 0, 0}, + {AMULL, C_REG, C_REG, C_REGREG, 17, 4, 0, 0, 0, C_SBIT}, + {ABFX, C_LCON, C_REG, C_REG, 18, 4, 0, 0, 0, 0}, // width in From, LSB in From3 + {ABFX, C_LCON, C_NONE, C_REG, 18, 4, 0, 0, 0, 0}, // width in From, LSB in From3 + {AMOVW, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_SAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_SOREG, C_NONE, C_REG, 21, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_SAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_SOREG, C_NONE, C_REG, 21, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AXTAB, C_SHIFT, C_REG, C_REG, 22, 4, 0, 0, 0, 0}, + {AXTAB, C_SHIFT, C_NONE, C_REG, 22, 4, 0, 0, 0, 0}, + {AMOVW, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, C_SBIT}, + {AMOVB, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVBS, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVBU, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVH, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVHS, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVHU, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVW, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_TLS_LE, C_NONE, C_REG, 101, 4, 0, LFROM, 0, 0}, + {AMOVW, C_TLS_IE, C_NONE, C_REG, 102, 8, 0, LFROM, 0, 0}, + {AMOVW, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_ADDR, C_NONE, C_REG, 65, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_ADDR, C_NONE, C_REG, 65, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_LACON, C_NONE, C_REG, 34, 8, REGSP, LFROM, 0, C_SBIT}, + {AMOVW, C_PSR, C_NONE, C_REG, 35, 4, 0, 0, 0, 0}, + {AMOVW, C_REG, C_NONE, C_PSR, 36, 4, 0, 0, 0, 0}, + {AMOVW, C_RCON, C_NONE, C_PSR, 37, 4, 0, 0, 0, 0}, + {AMOVM, C_REGLIST, C_NONE, C_SOREG, 38, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVM, C_SOREG, C_NONE, C_REGLIST, 39, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {ASWPW, C_SOREG, C_REG, C_REG, 40, 4, 0, 0, 0, 0}, + {ARFE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0, 0}, + {AMOVF, C_FREG, C_NONE, C_FAUTO, 50, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FREG, C_NONE, C_FOREG, 50, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FAUTO, C_NONE, C_FREG, 51, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FOREG, C_NONE, C_FREG, 51, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FREG, C_NONE, C_LAUTO, 52, 12, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FREG, C_NONE, C_LOREG, 52, 12, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_LAUTO, C_NONE, C_FREG, 53, 12, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_LOREG, C_NONE, C_FREG, 53, 12, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FREG, C_NONE, C_ADDR, 68, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_ADDR, C_NONE, C_FREG, 69, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AADDF, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0, 0}, + {AADDF, C_FREG, C_FREG, C_FREG, 54, 4, 0, 0, 0, 0}, + {AMOVF, C_FREG, C_NONE, C_FREG, 55, 4, 0, 0, 0, 0}, + {ANEGF, C_FREG, C_NONE, C_FREG, 55, 4, 0, 0, 0, 0}, + {AMOVW, C_REG, C_NONE, C_FCR, 56, 4, 0, 0, 0, 0}, + {AMOVW, C_FCR, C_NONE, C_REG, 57, 4, 0, 0, 0, 0}, + {AMOVW, C_SHIFTADDR, C_NONE, C_REG, 59, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_SHIFTADDR, C_NONE, C_REG, 59, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_REG, C_NONE, C_SHIFTADDR, 61, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_SHIFTADDR, 61, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_SHIFTADDR, 61, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_SHIFTADDR, 61, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_SHIFTADDR, 62, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_SHIFTADDR, 62, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_SHIFTADDR, 62, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {ALDREX, C_SOREG, C_NONE, C_REG, 77, 4, 0, 0, 0, 0}, + {ASTREX, C_SOREG, C_REG, C_REG, 78, 4, 0, 0, 0, 0}, + {ADMB, C_NONE, C_NONE, C_NONE, 110, 4, 0, 0, 0, 0}, + {ADMB, C_LCON, C_NONE, C_NONE, 110, 4, 0, 0, 0, 0}, + {ADMB, C_SPR, C_NONE, C_NONE, 110, 4, 0, 0, 0, 0}, + {AMOVF, C_ZFCON, C_NONE, C_FREG, 80, 8, 0, 0, 0, 0}, + {AMOVF, C_SFCON, C_NONE, C_FREG, 81, 4, 0, 0, 0, 0}, + {ACMPF, C_FREG, C_FREG, C_NONE, 82, 8, 0, 0, 0, 0}, + {ACMPF, C_FREG, C_NONE, C_NONE, 83, 8, 0, 0, 0, 0}, + {AMOVFW, C_FREG, C_NONE, C_FREG, 84, 4, 0, 0, 0, C_UBIT}, + {AMOVWF, C_FREG, C_NONE, C_FREG, 85, 4, 0, 0, 0, C_UBIT}, + {AMOVFW, C_FREG, C_NONE, C_REG, 86, 8, 0, 0, 0, C_UBIT}, + {AMOVWF, C_REG, C_NONE, C_FREG, 87, 8, 0, 0, 0, C_UBIT}, + {AMOVW, C_REG, C_NONE, C_FREG, 88, 4, 0, 0, 0, 0}, + {AMOVW, C_FREG, C_NONE, C_REG, 89, 4, 0, 0, 0, 0}, + {ALDREXD, C_SOREG, C_NONE, C_REG, 91, 4, 0, 0, 0, 0}, + {ASTREXD, C_SOREG, C_REG, C_REG, 92, 4, 0, 0, 0, 0}, + {APLD, C_SOREG, C_NONE, C_NONE, 95, 4, 0, 0, 0, 0}, + {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 96, 4, 0, 0, 0, 0}, + {ACLZ, C_REG, C_NONE, C_REG, 97, 4, 0, 0, 0, 0}, + {AMULWT, C_REG, C_REG, C_REG, 98, 4, 0, 0, 0, 0}, + {AMULA, C_REG, C_REG, C_REGREG2, 99, 4, 0, 0, 0, C_SBIT}, + {AMULAWT, C_REG, C_REG, C_REGREG2, 99, 4, 0, 0, 0, 0}, + {obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0, 0}, + {obj.AFUNCDATA, C_LCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0, 0}, + {obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, + {obj.ANOP, C_LCON, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_REG, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, + {obj.ANOP, C_FREG, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, + {obj.ADUFFZERO, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, // same as ABL + {obj.ADUFFCOPY, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, // same as ABL + {obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0, 0}, +} + +var mbOp = []struct { + reg int16 + enc uint32 +}{ + {REG_MB_SY, 15}, + {REG_MB_ST, 14}, + {REG_MB_ISH, 11}, + {REG_MB_ISHST, 10}, + {REG_MB_NSH, 7}, + {REG_MB_NSHST, 6}, + {REG_MB_OSH, 3}, + {REG_MB_OSHST, 2}, +} + +var oprange [ALAST & obj.AMask][]Optab + +var xcmp [C_GOK + 1][C_GOK + 1]bool + +var ( + symdiv *obj.LSym + symdivu *obj.LSym + symmod *obj.LSym + symmodu *obj.LSym +) + +// Note about encoding: Prog.scond holds the condition encoding, +// but XOR'ed with C_SCOND_XOR, so that C_SCOND_NONE == 0. +// The code that shifts the value << 28 has the responsibility +// for XORing with C_SCOND_XOR too. + +func checkSuffix(c *ctxt5, p *obj.Prog, o *Optab) { + if p.Scond&C_SBIT != 0 && o.scond&C_SBIT == 0 { + c.ctxt.Diag("invalid .S suffix: %v", p) + } + if p.Scond&C_PBIT != 0 && o.scond&C_PBIT == 0 { + c.ctxt.Diag("invalid .P suffix: %v", p) + } + if p.Scond&C_WBIT != 0 && o.scond&C_WBIT == 0 { + c.ctxt.Diag("invalid .W suffix: %v", p) + } + if p.Scond&C_UBIT != 0 && o.scond&C_UBIT == 0 { + c.ctxt.Diag("invalid .U suffix: %v", p) + } +} + +func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on arm") + ctxt.Retpoline = false // don't keep printing + } + + var p *obj.Prog + var op *obj.Prog + + p = cursym.Func().Text + if p == nil || p.Link == nil { // handle external functions and ELF section symbols + return + } + + if oprange[AAND&obj.AMask] == nil { + ctxt.Diag("arm ops not initialized, call arm.buildop first") + } + + c := ctxt5{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: p.To.Offset + 4} + pc := int32(0) + + op = p + p = p.Link + var m int + var o *Optab + for ; p != nil || c.blitrl != nil; op, p = p, p.Link { + if p == nil { + if c.checkpool(op, pc) { + p = op + continue + } + + // can't happen: blitrl is not nil, but checkpool didn't flushpool + ctxt.Diag("internal inconsistency") + + break + } + + p.Pc = int64(pc) + o = c.oplook(p) + m = int(o.size) + + if m%4 != 0 || p.Pc%4 != 0 { + ctxt.Diag("!pc invalid: %v size=%d", p, m) + } + + // must check literal pool here in case p generates many instructions + if c.blitrl != nil { + // Emit the constant pool just before p if p + // would push us over the immediate size limit. + if c.checkpool(op, pc+int32(m)) { + // Back up to the instruction just + // before the pool and continue with + // the first instruction of the pool. + p = op + continue + } + } + + if m == 0 && (p.As != obj.AFUNCDATA && p.As != obj.APCDATA && p.As != obj.ANOP) { + ctxt.Diag("zero-width instruction\n%v", p) + continue + } + + switch o.flag & (LFROM | LTO | LPOOL) { + case LFROM: + c.addpool(p, &p.From) + + case LTO: + c.addpool(p, &p.To) + + case LPOOL: + if p.Scond&C_SCOND == C_SCOND_NONE { + c.flushpool(p, 0, 0) + } + } + + if p.As == AMOVW && p.To.Type == obj.TYPE_REG && p.To.Reg == REGPC && p.Scond&C_SCOND == C_SCOND_NONE { + c.flushpool(p, 0, 0) + } + + pc += int32(m) + } + + c.cursym.Size = int64(pc) + + /* + * if any procedure is large enough to + * generate a large SBRA branch, then + * generate extra passes putting branches + * around jmps to fix. this is rare. + */ + times := 0 + + var bflag int + var opc int32 + var out [6 + 3]uint32 + for { + bflag = 0 + pc = 0 + times++ + c.cursym.Func().Text.Pc = 0 // force re-layout the code. + for p = c.cursym.Func().Text; p != nil; p = p.Link { + o = c.oplook(p) + if int64(pc) > p.Pc { + p.Pc = int64(pc) + } + + /* very large branches + if(o->type == 6 && p->pcond) { + otxt = p->pcond->pc - c; + if(otxt < 0) + otxt = -otxt; + if(otxt >= (1L<<17) - 10) { + q = emallocz(sizeof(Prog)); + q->link = p->link; + p->link = q; + q->as = AB; + q->to.type = TYPE_BRANCH; + q->pcond = p->pcond; + p->pcond = q; + q = emallocz(sizeof(Prog)); + q->link = p->link; + p->link = q; + q->as = AB; + q->to.type = TYPE_BRANCH; + q->pcond = q->link->link; + bflag = 1; + } + } + */ + opc = int32(p.Pc) + m = int(o.size) + if p.Pc != int64(opc) { + bflag = 1 + } + + //print("%v pc changed %d to %d in iter. %d\n", p, opc, (int32)p->pc, times); + pc = int32(p.Pc + int64(m)) + + if m%4 != 0 || p.Pc%4 != 0 { + ctxt.Diag("pc invalid: %v size=%d", p, m) + } + + if m/4 > len(out) { + ctxt.Diag("instruction size too large: %d > %d", m/4, len(out)) + } + if m == 0 && (p.As != obj.AFUNCDATA && p.As != obj.APCDATA && p.As != obj.ANOP) { + if p.As == obj.ATEXT { + c.autosize = p.To.Offset + 4 + continue + } + + ctxt.Diag("zero-width instruction\n%v", p) + continue + } + } + + c.cursym.Size = int64(pc) + if bflag == 0 { + break + } + } + + if pc%4 != 0 { + ctxt.Diag("sym->size=%d, invalid", pc) + } + + /* + * lay out the code. all the pc-relative code references, + * even cross-function, are resolved now; + * only data references need to be relocated. + * with more work we could leave cross-function + * code references to be relocated too, and then + * perhaps we'd be able to parallelize the span loop above. + */ + + p = c.cursym.Func().Text + c.autosize = p.To.Offset + 4 + c.cursym.Grow(c.cursym.Size) + + bp := c.cursym.P + pc = int32(p.Pc) // even p->link might need extra padding + var v int + for p = p.Link; p != nil; p = p.Link { + c.pc = p.Pc + o = c.oplook(p) + opc = int32(p.Pc) + c.asmout(p, o, out[:]) + m = int(o.size) + + if m%4 != 0 || p.Pc%4 != 0 { + ctxt.Diag("final stage: pc invalid: %v size=%d", p, m) + } + + if int64(pc) > p.Pc { + ctxt.Diag("PC padding invalid: want %#d, has %#d: %v", p.Pc, pc, p) + } + for int64(pc) != p.Pc { + // emit 0xe1a00000 (MOVW R0, R0) + bp[0] = 0x00 + bp = bp[1:] + + bp[0] = 0x00 + bp = bp[1:] + bp[0] = 0xa0 + bp = bp[1:] + bp[0] = 0xe1 + bp = bp[1:] + pc += 4 + } + + for i := 0; i < m/4; i++ { + v = int(out[i]) + bp[0] = byte(v) + bp = bp[1:] + bp[0] = byte(v >> 8) + bp = bp[1:] + bp[0] = byte(v >> 16) + bp = bp[1:] + bp[0] = byte(v >> 24) + bp = bp[1:] + } + + pc += int32(m) + } +} + +// checkpool flushes the literal pool when the first reference to +// it threatens to go out of range of a 12-bit PC-relative offset. +// +// nextpc is the tentative next PC at which the pool could be emitted. +// checkpool should be called *before* emitting the instruction that +// would cause the PC to reach nextpc. +// If nextpc is too far from the first pool reference, checkpool will +// flush the pool immediately after p. +// The caller should resume processing a p.Link. +func (c *ctxt5) checkpool(p *obj.Prog, nextpc int32) bool { + poolLast := nextpc + poolLast += 4 // the AB instruction to jump around the pool + poolLast += int32(c.pool.size) - 4 // the offset of the last pool entry + + refPC := int32(c.pool.start) // PC of the first pool reference + + v := poolLast - refPC - 8 // 12-bit PC-relative offset (see omvl) + + if c.pool.size >= 0xff0 || immaddr(v) == 0 { + return c.flushpool(p, 1, 0) + } else if p.Link == nil { + return c.flushpool(p, 2, 0) + } + return false +} + +func (c *ctxt5) flushpool(p *obj.Prog, skip int, force int) bool { + if c.blitrl != nil { + if skip != 0 { + if false && skip == 1 { + fmt.Printf("note: flush literal pool at %x: len=%d ref=%x\n", uint64(p.Pc+4), c.pool.size, c.pool.start) + } + q := c.newprog() + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.Link) + q.Link = c.blitrl + q.Pos = p.Pos + c.blitrl = q + } else if force == 0 && (p.Pc+int64(c.pool.size)-int64(c.pool.start) < 2048) { + return false + } + + // The line number for constant pool entries doesn't really matter. + // We set it to the line number of the preceding instruction so that + // there are no deltas to encode in the pc-line tables. + for q := c.blitrl; q != nil; q = q.Link { + q.Pos = p.Pos + } + + c.elitrl.Link = p.Link + p.Link = c.blitrl + + c.blitrl = nil /* BUG: should refer back to values until out-of-range */ + c.elitrl = nil + c.pool.size = 0 + c.pool.start = 0 + c.pool.extra = 0 + return true + } + + return false +} + +func (c *ctxt5) addpool(p *obj.Prog, a *obj.Addr) { + t := c.newprog() + t.As = AWORD + + switch c.aclass(a) { + default: + t.To.Offset = a.Offset + t.To.Sym = a.Sym + t.To.Type = a.Type + t.To.Name = a.Name + + if c.ctxt.Flag_shared && t.To.Sym != nil { + t.Rel = p + } + + case C_SROREG, + C_LOREG, + C_ROREG, + C_FOREG, + C_SOREG, + C_HOREG, + C_FAUTO, + C_SAUTO, + C_LAUTO, + C_LACON: + t.To.Type = obj.TYPE_CONST + t.To.Offset = c.instoffset + } + + if t.Rel == nil { + for q := c.blitrl; q != nil; q = q.Link { /* could hash on t.t0.offset */ + if q.Rel == nil && q.To == t.To { + p.Pool = q + return + } + } + } + + q := c.newprog() + *q = *t + q.Pc = int64(c.pool.size) + + if c.blitrl == nil { + c.blitrl = q + c.pool.start = uint32(p.Pc) + } else { + c.elitrl.Link = q + } + c.elitrl = q + c.pool.size += 4 + + // Store the link to the pool entry in Pool. + p.Pool = q +} + +func (c *ctxt5) regoff(a *obj.Addr) int32 { + c.instoffset = 0 + c.aclass(a) + return int32(c.instoffset) +} + +func immrot(v uint32) int32 { + for i := 0; i < 16; i++ { + if v&^0xff == 0 { + return int32(uint32(int32(i)<<8) | v | 1<<25) + } + v = v<<2 | v>>30 + } + + return 0 +} + +// immrot2a returns bits encoding the immediate constant fields of two instructions, +// such that the encoded constants x, y satisfy x|y==v, x&y==0. +// Returns 0,0 if no such decomposition of v exists. +func immrot2a(v uint32) (uint32, uint32) { + for i := uint(1); i < 32; i++ { + m := uint32(1<<i - 1) + if x, y := immrot(v&m), immrot(v&^m); x != 0 && y != 0 { + return uint32(x), uint32(y) + } + } + // TODO: handle some more cases, like where + // the wraparound from the rotate could help. + return 0, 0 +} + +// immrot2s returns bits encoding the immediate constant fields of two instructions, +// such that the encoded constants y, x satisfy y-x==v, y&x==0. +// Returns 0,0 if no such decomposition of v exists. +func immrot2s(v uint32) (uint32, uint32) { + if immrot(v) != 0 { + return v, 0 + } + // suppose v in the form of {leading 00, upper effective bits, lower 8 effective bits, trailing 00} + // omit trailing 00 + var i uint32 + for i = 2; i < 32; i += 2 { + if v&(1<<i-1) != 0 { + break + } + } + // i must be <= 24, then adjust i just above lower 8 effective bits of v + i += 6 + // let x = {the complement of lower 8 effective bits, trailing 00}, y = x + v + x := 1<<i - v&(1<<i-1) + y := v + x + if y, x = uint32(immrot(y)), uint32(immrot(x)); y != 0 && x != 0 { + return y, x + } + return 0, 0 +} + +func immaddr(v int32) int32 { + if v >= 0 && v <= 0xfff { + return v&0xfff | 1<<24 | 1<<23 /* pre indexing */ /* pre indexing, up */ + } + if v >= -0xfff && v < 0 { + return -v&0xfff | 1<<24 /* pre indexing */ + } + return 0 +} + +func immfloat(v int32) bool { + return v&0xC03 == 0 /* offset will fit in floating-point load/store */ +} + +func immhalf(v int32) bool { + if v >= 0 && v <= 0xff { + return v|1<<24|1<<23 != 0 /* pre indexing */ /* pre indexing, up */ + } + if v >= -0xff && v < 0 { + return -v&0xff|1<<24 != 0 /* pre indexing */ + } + return false +} + +func (c *ctxt5) aclass(a *obj.Addr) int { + switch a.Type { + case obj.TYPE_NONE: + return C_NONE + + case obj.TYPE_REG: + c.instoffset = 0 + if REG_R0 <= a.Reg && a.Reg <= REG_R15 { + return C_REG + } + if REG_F0 <= a.Reg && a.Reg <= REG_F15 { + return C_FREG + } + if a.Reg == REG_FPSR || a.Reg == REG_FPCR { + return C_FCR + } + if a.Reg == REG_CPSR || a.Reg == REG_SPSR { + return C_PSR + } + if a.Reg >= REG_SPECIAL { + return C_SPR + } + return C_GOK + + case obj.TYPE_REGREG: + return C_REGREG + + case obj.TYPE_REGREG2: + return C_REGREG2 + + case obj.TYPE_REGLIST: + return C_REGLIST + + case obj.TYPE_SHIFT: + if a.Reg == 0 { + // register shift R>>i + return C_SHIFT + } else { + // memory address with shifted offset R>>i(R) + return C_SHIFTADDR + } + + case obj.TYPE_MEM: + switch a.Name { + case obj.NAME_EXTERN, + obj.NAME_GOTREF, + obj.NAME_STATIC: + if a.Sym == nil || a.Sym.Name == "" { + fmt.Printf("null sym external\n") + return C_GOK + } + + c.instoffset = 0 // s.b. unused but just in case + if a.Sym.Type == objabi.STLSBSS { + if c.ctxt.Flag_shared { + return C_TLS_IE + } else { + return C_TLS_LE + } + } + + return C_ADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = c.autosize + a.Offset + if t := immaddr(int32(c.instoffset)); t != 0 { + if immhalf(int32(c.instoffset)) { + if immfloat(t) { + return C_HFAUTO + } + return C_HAUTO + } + + if immfloat(t) { + return C_FAUTO + } + return C_SAUTO + } + + return C_LAUTO + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = c.autosize + a.Offset + 4 + if t := immaddr(int32(c.instoffset)); t != 0 { + if immhalf(int32(c.instoffset)) { + if immfloat(t) { + return C_HFAUTO + } + return C_HAUTO + } + + if immfloat(t) { + return C_FAUTO + } + return C_SAUTO + } + + return C_LAUTO + + case obj.NAME_NONE: + c.instoffset = a.Offset + if t := immaddr(int32(c.instoffset)); t != 0 { + if immhalf(int32(c.instoffset)) { /* n.b. that it will also satisfy immrot */ + if immfloat(t) { + return C_HFOREG + } + return C_HOREG + } + + if immfloat(t) { + return C_FOREG /* n.b. that it will also satisfy immrot */ + } + if immrot(uint32(c.instoffset)) != 0 { + return C_SROREG + } + if immhalf(int32(c.instoffset)) { + return C_HOREG + } + return C_SOREG + } + + if immrot(uint32(c.instoffset)) != 0 { + return C_ROREG + } + return C_LOREG + } + + return C_GOK + + case obj.TYPE_FCONST: + if c.chipzero5(a.Val.(float64)) >= 0 { + return C_ZFCON + } + if c.chipfloat5(a.Val.(float64)) >= 0 { + return C_SFCON + } + return C_LFCON + + case obj.TYPE_TEXTSIZE: + return C_TEXTSIZE + + case obj.TYPE_CONST, + obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Reg != 0 { + return c.aconsize() + } + + if immrot(uint32(c.instoffset)) != 0 { + return C_RCON + } + if immrot(^uint32(c.instoffset)) != 0 { + return C_NCON + } + if uint32(c.instoffset) <= 0xffff && buildcfg.GOARM == 7 { + return C_SCON + } + if x, y := immrot2a(uint32(c.instoffset)); x != 0 && y != 0 { + return C_RCON2A + } + if y, x := immrot2s(uint32(c.instoffset)); x != 0 && y != 0 { + return C_RCON2S + } + return C_LCON + + case obj.NAME_EXTERN, + obj.NAME_GOTREF, + obj.NAME_STATIC: + s := a.Sym + if s == nil { + break + } + c.instoffset = 0 // s.b. unused but just in case + return C_LCONADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = c.autosize + a.Offset + return c.aconsize() + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = c.autosize + a.Offset + 4 + return c.aconsize() + } + + return C_GOK + + case obj.TYPE_BRANCH: + return C_SBRA + } + + return C_GOK +} + +func (c *ctxt5) aconsize() int { + if immrot(uint32(c.instoffset)) != 0 { + return C_RACON + } + if immrot(uint32(-c.instoffset)) != 0 { + return C_RACON + } + return C_LACON +} + +func (c *ctxt5) oplook(p *obj.Prog) *Optab { + a1 := int(p.Optab) + if a1 != 0 { + return &optab[a1-1] + } + a1 = int(p.From.Class) + if a1 == 0 { + a1 = c.aclass(&p.From) + 1 + p.From.Class = int8(a1) + } + + a1-- + a3 := int(p.To.Class) + if a3 == 0 { + a3 = c.aclass(&p.To) + 1 + p.To.Class = int8(a3) + } + + a3-- + a2 := C_NONE + if p.Reg != 0 { + switch { + case REG_F0 <= p.Reg && p.Reg <= REG_F15: + a2 = C_FREG + case REG_R0 <= p.Reg && p.Reg <= REG_R15: + a2 = C_REG + default: + c.ctxt.Diag("invalid register in %v", p) + } + } + + // check illegal base register + switch a1 { + case C_SOREG, C_LOREG, C_HOREG, C_FOREG, C_ROREG, C_HFOREG, C_SROREG, C_SHIFTADDR: + if p.From.Reg < REG_R0 || REG_R15 < p.From.Reg { + c.ctxt.Diag("illegal base register: %v", p) + } + default: + } + switch a3 { + case C_SOREG, C_LOREG, C_HOREG, C_FOREG, C_ROREG, C_HFOREG, C_SROREG, C_SHIFTADDR: + if p.To.Reg < REG_R0 || REG_R15 < p.To.Reg { + c.ctxt.Diag("illegal base register: %v", p) + } + default: + } + + // If current instruction has a .S suffix (flags update), + // we must use the constant pool instead of splitting it. + if (a1 == C_RCON2A || a1 == C_RCON2S) && p.Scond&C_SBIT != 0 { + a1 = C_LCON + } + if (a3 == C_RCON2A || a3 == C_RCON2S) && p.Scond&C_SBIT != 0 { + a3 = C_LCON + } + + if false { /*debug['O']*/ + fmt.Printf("oplook %v %v %v %v\n", p.As, DRconv(a1), DRconv(a2), DRconv(a3)) + fmt.Printf("\t\t%d %d\n", p.From.Type, p.To.Type) + } + + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] + c3 := &xcmp[a3] + for i := range ops { + op := &ops[i] + if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + checkSuffix(c, p, op) + return op + } + } + + c.ctxt.Diag("illegal combination %v; %v %v %v; from %d %d; to %d %d", p, DRconv(a1), DRconv(a2), DRconv(a3), p.From.Type, p.From.Name, p.To.Type, p.To.Name) + if ops == nil { + ops = optab + } + return &ops[0] +} + +func cmp(a int, b int) bool { + if a == b { + return true + } + switch a { + case C_LCON: + if b == C_RCON || b == C_NCON || b == C_SCON || b == C_RCON2A || b == C_RCON2S { + return true + } + + case C_LACON: + if b == C_RACON { + return true + } + + case C_LFCON: + if b == C_ZFCON || b == C_SFCON { + return true + } + + case C_HFAUTO: + return b == C_HAUTO || b == C_FAUTO + + case C_FAUTO, C_HAUTO: + return b == C_HFAUTO + + case C_SAUTO: + return cmp(C_HFAUTO, b) + + case C_LAUTO: + return cmp(C_SAUTO, b) + + case C_HFOREG: + return b == C_HOREG || b == C_FOREG + + case C_FOREG, C_HOREG: + return b == C_HFOREG + + case C_SROREG: + return cmp(C_SOREG, b) || cmp(C_ROREG, b) + + case C_SOREG, C_ROREG: + return b == C_SROREG || cmp(C_HFOREG, b) + + case C_LOREG: + return cmp(C_SROREG, b) + + case C_LBRA: + if b == C_SBRA { + return true + } + + case C_HREG: + return cmp(C_SP, b) || cmp(C_PC, b) + } + + return false +} + +type ocmp []Optab + +func (x ocmp) Len() int { + return len(x) +} + +func (x ocmp) Swap(i, j int) { + x[i], x[j] = x[j], x[i] +} + +func (x ocmp) Less(i, j int) bool { + p1 := &x[i] + p2 := &x[j] + n := int(p1.as) - int(p2.as) + if n != 0 { + return n < 0 + } + n = int(p1.a1) - int(p2.a1) + if n != 0 { + return n < 0 + } + n = int(p1.a2) - int(p2.a2) + if n != 0 { + return n < 0 + } + n = int(p1.a3) - int(p2.a3) + if n != 0 { + return n < 0 + } + return false +} + +func opset(a, b0 obj.As) { + oprange[a&obj.AMask] = oprange[b0] +} + +func buildop(ctxt *obj.Link) { + if oprange[AAND&obj.AMask] != nil { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + symdiv = ctxt.Lookup("runtime._div") + symdivu = ctxt.Lookup("runtime._divu") + symmod = ctxt.Lookup("runtime._mod") + symmodu = ctxt.Lookup("runtime._modu") + + var n int + + for i := 0; i < C_GOK; i++ { + for n = 0; n < C_GOK; n++ { + if cmp(n, i) { + xcmp[i][n] = true + } + } + } + for n = 0; optab[n].as != obj.AXXX; n++ { + if optab[n].flag&LPCREL != 0 { + if ctxt.Flag_shared { + optab[n].size += int8(optab[n].pcrelsiz) + } else { + optab[n].flag &^= LPCREL + } + } + } + + sort.Sort(ocmp(optab[:n])) + for i := 0; i < n; i++ { + r := optab[i].as + r0 := r & obj.AMask + start := i + for optab[i].as == r { + i++ + } + oprange[r0] = optab[start:i] + i-- + + switch r { + default: + ctxt.Diag("unknown op in build: %v", r) + ctxt.DiagFlush() + log.Fatalf("bad code") + + case AADD: + opset(ASUB, r0) + opset(ARSB, r0) + opset(AADC, r0) + opset(ASBC, r0) + opset(ARSC, r0) + + case AORR: + opset(AEOR, r0) + opset(ABIC, r0) + + case ACMP: + opset(ATEQ, r0) + opset(ACMN, r0) + opset(ATST, r0) + + case AMVN: + break + + case ABEQ: + opset(ABNE, r0) + opset(ABCS, r0) + opset(ABHS, r0) + opset(ABCC, r0) + opset(ABLO, r0) + opset(ABMI, r0) + opset(ABPL, r0) + opset(ABVS, r0) + opset(ABVC, r0) + opset(ABHI, r0) + opset(ABLS, r0) + opset(ABGE, r0) + opset(ABLT, r0) + opset(ABGT, r0) + opset(ABLE, r0) + + case ASLL: + opset(ASRL, r0) + opset(ASRA, r0) + + case AMUL: + opset(AMULU, r0) + + case ADIV: + opset(AMOD, r0) + opset(AMODU, r0) + opset(ADIVU, r0) + + case ADIVHW: + opset(ADIVUHW, r0) + + case AMOVW, + AMOVB, + AMOVBS, + AMOVBU, + AMOVH, + AMOVHS, + AMOVHU: + break + + case ASWPW: + opset(ASWPBU, r0) + + case AB, + ABL, + ABX, + ABXRET, + obj.ADUFFZERO, + obj.ADUFFCOPY, + ASWI, + AWORD, + AMOVM, + ARFE, + obj.ATEXT: + break + + case AADDF: + opset(AADDD, r0) + opset(ASUBF, r0) + opset(ASUBD, r0) + opset(AMULF, r0) + opset(AMULD, r0) + opset(ANMULF, r0) + opset(ANMULD, r0) + opset(AMULAF, r0) + opset(AMULAD, r0) + opset(AMULSF, r0) + opset(AMULSD, r0) + opset(ANMULAF, r0) + opset(ANMULAD, r0) + opset(ANMULSF, r0) + opset(ANMULSD, r0) + opset(AFMULAF, r0) + opset(AFMULAD, r0) + opset(AFMULSF, r0) + opset(AFMULSD, r0) + opset(AFNMULAF, r0) + opset(AFNMULAD, r0) + opset(AFNMULSF, r0) + opset(AFNMULSD, r0) + opset(ADIVF, r0) + opset(ADIVD, r0) + + case ANEGF: + opset(ANEGD, r0) + opset(ASQRTF, r0) + opset(ASQRTD, r0) + opset(AMOVFD, r0) + opset(AMOVDF, r0) + opset(AABSF, r0) + opset(AABSD, r0) + + case ACMPF: + opset(ACMPD, r0) + + case AMOVF: + opset(AMOVD, r0) + + case AMOVFW: + opset(AMOVDW, r0) + + case AMOVWF: + opset(AMOVWD, r0) + + case AMULL: + opset(AMULAL, r0) + opset(AMULLU, r0) + opset(AMULALU, r0) + + case AMULWT: + opset(AMULWB, r0) + opset(AMULBB, r0) + opset(AMMUL, r0) + + case AMULAWT: + opset(AMULAWB, r0) + opset(AMULABB, r0) + opset(AMULS, r0) + opset(AMMULA, r0) + opset(AMMULS, r0) + + case ABFX: + opset(ABFXU, r0) + opset(ABFC, r0) + opset(ABFI, r0) + + case ACLZ: + opset(AREV, r0) + opset(AREV16, r0) + opset(AREVSH, r0) + opset(ARBIT, r0) + + case AXTAB: + opset(AXTAH, r0) + opset(AXTABU, r0) + opset(AXTAHU, r0) + + case ALDREX, + ASTREX, + ALDREXD, + ASTREXD, + ADMB, + APLD, + AAND, + AMULA, + obj.AUNDEF, + obj.AFUNCDATA, + obj.APCDATA, + obj.ANOP: + break + } + } +} + +func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) { + c.printp = p + o1 := uint32(0) + o2 := uint32(0) + o3 := uint32(0) + o4 := uint32(0) + o5 := uint32(0) + o6 := uint32(0) + if false { /*debug['P']*/ + fmt.Printf("%x: %v\ttype %d\n", uint32(p.Pc), p, o.type_) + } + switch o.type_ { + default: + c.ctxt.Diag("%v: unknown asm %d", p, o.type_) + + case 0: /* pseudo ops */ + if false { /*debug['G']*/ + fmt.Printf("%x: %s: arm\n", uint32(p.Pc), p.From.Sym.Name) + } + + case 1: /* op R,[R],R */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = 0 + } + if p.As == AMOVB || p.As == AMOVH || p.As == AMOVW || p.As == AMVN { + r = 0 + } else if r == 0 { + r = rt + } + o1 |= (uint32(rf)&15)<<0 | (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + + case 2: /* movbu $I,[R],R */ + c.aclass(&p.From) + + o1 = c.oprrr(p, p.As, int(p.Scond)) + o1 |= uint32(immrot(uint32(c.instoffset))) + rt := int(p.To.Reg) + r := int(p.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = 0 + } + if p.As == AMOVW || p.As == AMVN { + r = 0 + } else if r == 0 { + r = rt + } + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + + case 106: /* op $I,R,R where I can be decomposed into 2 immediates */ + c.aclass(&p.From) + r := int(p.Reg) + rt := int(p.To.Reg) + if r == 0 { + r = rt + } + x, y := immrot2a(uint32(c.instoffset)) + var as2 obj.As + switch p.As { + case AADD, ASUB, AORR, AEOR, ABIC: + as2 = p.As // ADD, SUB, ORR, EOR, BIC + case ARSB: + as2 = AADD // RSB -> RSB/ADD pair + case AADC: + as2 = AADD // ADC -> ADC/ADD pair + case ASBC: + as2 = ASUB // SBC -> SBC/SUB pair + case ARSC: + as2 = AADD // RSC -> RSC/ADD pair + default: + c.ctxt.Diag("unknown second op for %v", p) + } + o1 = c.oprrr(p, p.As, int(p.Scond)) + o2 = c.oprrr(p, as2, int(p.Scond)) + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + o2 |= (uint32(rt)&15)<<16 | (uint32(rt)&15)<<12 + o1 |= x + o2 |= y + + case 107: /* op $I,R,R where I can be decomposed into 2 immediates */ + c.aclass(&p.From) + r := int(p.Reg) + rt := int(p.To.Reg) + if r == 0 { + r = rt + } + y, x := immrot2s(uint32(c.instoffset)) + var as2 obj.As + switch p.As { + case AADD: + as2 = ASUB // ADD -> ADD/SUB pair + case ASUB: + as2 = AADD // SUB -> SUB/ADD pair + case ARSB: + as2 = ASUB // RSB -> RSB/SUB pair + case AADC: + as2 = ASUB // ADC -> ADC/SUB pair + case ASBC: + as2 = AADD // SBC -> SBC/ADD pair + case ARSC: + as2 = ASUB // RSC -> RSC/SUB pair + default: + c.ctxt.Diag("unknown second op for %v", p) + } + o1 = c.oprrr(p, p.As, int(p.Scond)) + o2 = c.oprrr(p, as2, int(p.Scond)) + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + o2 |= (uint32(rt)&15)<<16 | (uint32(rt)&15)<<12 + o1 |= y + o2 |= x + + case 3: /* add R<<[IR],[R],R */ + o1 = c.mov(p) + + case 4: /* MOVW $off(R), R -> add $off,[R],R */ + c.aclass(&p.From) + if c.instoffset < 0 { + o1 = c.oprrr(p, ASUB, int(p.Scond)) + o1 |= uint32(immrot(uint32(-c.instoffset))) + } else { + o1 = c.oprrr(p, AADD, int(p.Scond)) + o1 |= uint32(immrot(uint32(c.instoffset))) + } + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 |= (uint32(r) & 15) << 16 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 5: /* bra s */ + o1 = c.opbra(p, p.As, int(p.Scond)) + + v := int32(-8) + if p.To.Sym != nil { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + v += int32(p.To.Offset) + rel.Add = int64(o1) | (int64(v)>>2)&0xffffff + rel.Type = objabi.R_CALLARM + break + } + + if p.To.Target() != nil { + v = int32((p.To.Target().Pc - c.pc) - 8) + } + o1 |= (uint32(v) >> 2) & 0xffffff + + case 6: /* b ,O(R) -> add $O,R,PC */ + c.aclass(&p.To) + + o1 = c.oprrr(p, AADD, int(p.Scond)) + o1 |= uint32(immrot(uint32(c.instoffset))) + o1 |= (uint32(p.To.Reg) & 15) << 16 + o1 |= (REGPC & 15) << 12 + + case 7: /* bl (R) -> blx R */ + c.aclass(&p.To) + + if c.instoffset != 0 { + c.ctxt.Diag("%v: doesn't support BL offset(REG) with non-zero offset %d", p, c.instoffset) + } + o1 = c.oprrr(p, ABL, int(p.Scond)) + o1 |= (uint32(p.To.Reg) & 15) << 0 + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 0 + rel.Type = objabi.R_CALLIND + + case 8: /* sll $c,[R],R -> mov (R<<$c),R */ + c.aclass(&p.From) + + o1 = c.oprrr(p, p.As, int(p.Scond)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 |= (uint32(r) & 15) << 0 + o1 |= uint32((c.instoffset & 31) << 7) + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 9: /* sll R,[R],R -> mov (R<<R),R */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 |= (uint32(r) & 15) << 0 + o1 |= (uint32(p.From.Reg)&15)<<8 | 1<<4 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 10: /* swi [$con] */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + if p.To.Type != obj.TYPE_NONE { + c.aclass(&p.To) + o1 |= uint32(c.instoffset & 0xffffff) + } + + case 11: /* word */ + c.aclass(&p.To) + + o1 = uint32(c.instoffset) + if p.To.Sym != nil { + // This case happens with words generated + // in the PC stream as part of the literal pool (c.pool). + rel := obj.Addrel(c.cursym) + + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + + if c.ctxt.Flag_shared { + if p.To.Name == obj.NAME_GOTREF { + rel.Type = objabi.R_GOTPCREL + } else { + rel.Type = objabi.R_PCREL + } + rel.Add += c.pc - p.Rel.Pc - 8 + } else { + rel.Type = objabi.R_ADDR + } + o1 = 0 + } + + case 12: /* movw $lcon, reg */ + if o.a1 == C_SCON { + o1 = c.omvs(p, &p.From, int(p.To.Reg)) + } else if p.As == AMVN { + o1 = c.omvr(p, &p.From, int(p.To.Reg)) + } else { + o1 = c.omvl(p, &p.From, int(p.To.Reg)) + } + + if o.flag&LPCREL != 0 { + o2 = c.oprrr(p, AADD, int(p.Scond)) | (uint32(p.To.Reg)&15)<<0 | (REGPC&15)<<16 | (uint32(p.To.Reg)&15)<<12 + } + + case 13: /* op $lcon, [R], R */ + if o.a1 == C_SCON { + o1 = c.omvs(p, &p.From, REGTMP) + } else { + o1 = c.omvl(p, &p.From, REGTMP) + } + + if o1 == 0 { + break + } + o2 = c.oprrr(p, p.As, int(p.Scond)) + o2 |= REGTMP & 15 + r := int(p.Reg) + if p.As == AMVN { + r = 0 + } else if r == 0 { + r = int(p.To.Reg) + } + o2 |= (uint32(r) & 15) << 16 + if p.To.Type != obj.TYPE_NONE { + o2 |= (uint32(p.To.Reg) & 15) << 12 + } + + case 14: /* movb/movbu/movh/movhu R,R */ + o1 = c.oprrr(p, ASLL, int(p.Scond)) + + if p.As == AMOVBU || p.As == AMOVHU { + o2 = c.oprrr(p, ASRL, int(p.Scond)) + } else { + o2 = c.oprrr(p, ASRA, int(p.Scond)) + } + + r := int(p.To.Reg) + o1 |= (uint32(p.From.Reg)&15)<<0 | (uint32(r)&15)<<12 + o2 |= uint32(r)&15 | (uint32(r)&15)<<12 + if p.As == AMOVB || p.As == AMOVBS || p.As == AMOVBU { + o1 |= 24 << 7 + o2 |= 24 << 7 + } else { + o1 |= 16 << 7 + o2 |= 16 << 7 + } + + case 15: /* mul r,[r,]r */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + + o1 |= (uint32(rf)&15)<<8 | (uint32(r)&15)<<0 | (uint32(rt)&15)<<16 + + case 16: /* div r,[r,]r */ + o1 = 0xf << 28 + + o2 = 0 + + case 17: + o1 = c.oprrr(p, p.As, int(p.Scond)) + rf := int(p.From.Reg) + rt := int(p.To.Reg) + rt2 := int(p.To.Offset) + r := int(p.Reg) + o1 |= (uint32(rf)&15)<<8 | (uint32(r)&15)<<0 | (uint32(rt)&15)<<16 | (uint32(rt2)&15)<<12 + + case 18: /* BFX/BFXU/BFC/BFI */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } else if p.As == ABFC { // only "BFC $width, $lsb, Reg" is accepted, p.Reg must be 0 + c.ctxt.Diag("illegal combination: %v", p) + } + if p.GetFrom3() == nil || p.GetFrom3().Type != obj.TYPE_CONST { + c.ctxt.Diag("%v: missing or wrong LSB", p) + break + } + lsb := p.GetFrom3().Offset + width := p.From.Offset + if lsb < 0 || lsb > 31 || width <= 0 || (lsb+width) > 32 { + c.ctxt.Diag("%v: wrong width or LSB", p) + } + switch p.As { + case ABFX, ABFXU: // (width-1) is encoded + o1 |= (uint32(r)&15)<<0 | (uint32(rt)&15)<<12 | uint32(lsb)<<7 | uint32(width-1)<<16 + case ABFC, ABFI: // MSB is encoded + o1 |= (uint32(r)&15)<<0 | (uint32(rt)&15)<<12 | uint32(lsb)<<7 | uint32(lsb+width-1)<<16 + default: + c.ctxt.Diag("illegal combination: %v", p) + } + + case 20: /* mov/movb/movbu R,O(R) */ + c.aclass(&p.To) + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.osr(p.As, int(p.From.Reg), int32(c.instoffset), r, int(p.Scond)) + + case 21: /* mov/movbu O(R),R -> lr */ + c.aclass(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.olr(int32(c.instoffset), r, int(p.To.Reg), int(p.Scond)) + if p.As != AMOVW { + o1 |= 1 << 22 + } + + case 22: /* XTAB R@>i, [R], R */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + switch p.From.Offset &^ 0xf { + // only 0/8/16/24 bits rotation is accepted + case SHIFT_RR, SHIFT_RR | 8<<7, SHIFT_RR | 16<<7, SHIFT_RR | 24<<7: + o1 |= uint32(p.From.Offset) & 0xc0f + default: + c.ctxt.Diag("illegal shift: %v", p) + } + rt := p.To.Reg + r := p.Reg + if r == 0 { + r = rt + } + o1 |= (uint32(rt)&15)<<12 | (uint32(r)&15)<<16 + + case 23: /* MOVW/MOVB/MOVH R@>i, R */ + switch p.As { + case AMOVW: + o1 = c.mov(p) + case AMOVBU, AMOVBS, AMOVB, AMOVHU, AMOVHS, AMOVH: + o1 = c.movxt(p) + default: + c.ctxt.Diag("illegal combination: %v", p) + } + + case 30: /* mov/movb/movbu R,L(R) */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.osrr(int(p.From.Reg), REGTMP&15, r, int(p.Scond)) + if p.As != AMOVW { + o2 |= 1 << 22 + } + + case 31: /* mov/movbu L(R),R -> lr[b] */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.olrr(REGTMP&15, r, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVBU || p.As == AMOVBS || p.As == AMOVB { + o2 |= 1 << 22 + } + + case 34: /* mov $lacon,R */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + + o2 = c.oprrr(p, AADD, int(p.Scond)) + o2 |= REGTMP & 15 + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 |= (uint32(r) & 15) << 16 + if p.To.Type != obj.TYPE_NONE { + o2 |= (uint32(p.To.Reg) & 15) << 12 + } + + case 35: /* mov PSR,R */ + o1 = 2<<23 | 0xf<<16 | 0<<0 + + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= (uint32(p.From.Reg) & 1) << 22 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 36: /* mov R,PSR */ + o1 = 2<<23 | 0x2cf<<12 | 0<<4 + + if p.Scond&C_FBIT != 0 { + o1 ^= 0x010 << 12 + } + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= (uint32(p.To.Reg) & 1) << 22 + o1 |= (uint32(p.From.Reg) & 15) << 0 + + case 37: /* mov $con,PSR */ + c.aclass(&p.From) + + o1 = 2<<23 | 0x2cf<<12 | 0<<4 + if p.Scond&C_FBIT != 0 { + o1 ^= 0x010 << 12 + } + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= uint32(immrot(uint32(c.instoffset))) + o1 |= (uint32(p.To.Reg) & 1) << 22 + o1 |= (uint32(p.From.Reg) & 15) << 0 + + case 38, 39: + switch o.type_ { + case 38: /* movm $con,oreg -> stm */ + o1 = 0x4 << 25 + + o1 |= uint32(p.From.Offset & 0xffff) + o1 |= (uint32(p.To.Reg) & 15) << 16 + c.aclass(&p.To) + + case 39: /* movm oreg,$con -> ldm */ + o1 = 0x4<<25 | 1<<20 + + o1 |= uint32(p.To.Offset & 0xffff) + o1 |= (uint32(p.From.Reg) & 15) << 16 + c.aclass(&p.From) + } + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in MOVM; %v", p) + } + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + if p.Scond&C_PBIT != 0 { + o1 |= 1 << 24 + } + if p.Scond&C_UBIT != 0 { + o1 |= 1 << 23 + } + if p.Scond&C_WBIT != 0 { + o1 |= 1 << 21 + } + + case 40: /* swp oreg,reg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in SWP") + } + o1 = 0x2<<23 | 0x9<<4 + if p.As != ASWPW { + o1 |= 1 << 22 + } + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 41: /* rfe -> movm.s.w.u 0(r13),[r15] */ + o1 = 0xe8fd8000 + + case 50: /* floating point store */ + v := c.regoff(&p.To) + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.ofsr(p.As, int(p.From.Reg), v, r, int(p.Scond), p) + + case 51: /* floating point load */ + v := c.regoff(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.ofsr(p.As, int(p.To.Reg), v, r, int(p.Scond), p) | 1<<20 + + case 52: /* floating point store, int32 offset UGLY */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.oprrr(p, AADD, int(p.Scond)) | (REGTMP&15)<<12 | (REGTMP&15)<<16 | (uint32(r)&15)<<0 + o3 = c.ofsr(p.As, int(p.From.Reg), 0, REGTMP, int(p.Scond), p) + + case 53: /* floating point load, int32 offset UGLY */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.oprrr(p, AADD, int(p.Scond)) | (REGTMP&15)<<12 | (REGTMP&15)<<16 | (uint32(r)&15)<<0 + o3 = c.ofsr(p.As, int(p.To.Reg), 0, (REGTMP&15), int(p.Scond), p) | 1<<20 + + case 54: /* floating point arith */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + switch p.As { + case AMULAD, AMULAF, AMULSF, AMULSD, ANMULAF, ANMULAD, ANMULSF, ANMULSD, + AFMULAD, AFMULAF, AFMULSF, AFMULSD, AFNMULAF, AFNMULAD, AFNMULSF, AFNMULSD: + c.ctxt.Diag("illegal combination: %v", p) + default: + r = rt + } + } + + o1 |= (uint32(rf)&15)<<0 | (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + + case 55: /* negf freg, freg */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + + o1 |= (uint32(rf)&15)<<0 | (uint32(rt)&15)<<12 + + case 56: /* move to FP[CS]R */ + o1 = ((uint32(p.Scond)&C_SCOND)^C_SCOND_XOR)<<28 | 0xee1<<16 | 0xa1<<4 + + o1 |= (uint32(p.From.Reg) & 15) << 12 + + case 57: /* move from FP[CS]R */ + o1 = ((uint32(p.Scond)&C_SCOND)^C_SCOND_XOR)<<28 | 0xef1<<16 | 0xa1<<4 + + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 58: /* movbu R,R */ + o1 = c.oprrr(p, AAND, int(p.Scond)) + + o1 |= uint32(immrot(0xff)) + rt := int(p.To.Reg) + r := int(p.From.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = 0 + } + if r == 0 { + r = rt + } + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + + case 59: /* movw/bu R<<I(R),R -> ldr indexed */ + if p.From.Reg == 0 { + c.ctxt.Diag("source operand is not a memory address: %v", p) + break + } + if p.From.Offset&(1<<4) != 0 { + c.ctxt.Diag("bad shift in LDR") + break + } + o1 = c.olrr(int(p.From.Offset), int(p.From.Reg), int(p.To.Reg), int(p.Scond)) + if p.As == AMOVBU { + o1 |= 1 << 22 + } + + case 60: /* movb R(R),R -> ldrsb indexed */ + if p.From.Reg == 0 { + c.ctxt.Diag("source operand is not a memory address: %v", p) + break + } + if p.From.Offset&(^0xf) != 0 { + c.ctxt.Diag("bad shift: %v", p) + break + } + o1 = c.olhrr(int(p.From.Offset), int(p.From.Reg), int(p.To.Reg), int(p.Scond)) + switch p.As { + case AMOVB, AMOVBS: + o1 ^= 1<<5 | 1<<6 + case AMOVH, AMOVHS: + o1 ^= 1 << 6 + default: + } + if p.Scond&C_UBIT != 0 { + o1 &^= 1 << 23 + } + + case 61: /* movw/b/bu R,R<<[IR](R) -> str indexed */ + if p.To.Reg == 0 { + c.ctxt.Diag("MOV to shifter operand") + } + o1 = c.osrr(int(p.From.Reg), int(p.To.Offset), int(p.To.Reg), int(p.Scond)) + if p.As == AMOVB || p.As == AMOVBS || p.As == AMOVBU { + o1 |= 1 << 22 + } + + case 62: /* MOVH/MOVHS/MOVHU Reg, Reg<<0(Reg) -> strh */ + if p.To.Reg == 0 { + c.ctxt.Diag("MOV to shifter operand") + } + if p.To.Offset&(^0xf) != 0 { + c.ctxt.Diag("bad shift: %v", p) + } + o1 = c.olhrr(int(p.To.Offset), int(p.To.Reg), int(p.From.Reg), int(p.Scond)) + o1 ^= 1 << 20 + if p.Scond&C_UBIT != 0 { + o1 &^= 1 << 23 + } + + /* reloc ops */ + case 64: /* mov/movb/movbu R,addr */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + o2 = c.osr(p.As, int(p.From.Reg), 0, REGTMP, int(p.Scond)) + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 65: /* mov/movbu addr,R */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + o2 = c.olr(0, REGTMP, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVBU || p.As == AMOVBS || p.As == AMOVB { + o2 |= 1 << 22 + } + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 101: /* movw tlsvar,R, local exec*/ + o1 = c.omvl(p, &p.From, int(p.To.Reg)) + + case 102: /* movw tlsvar,R, initial exec*/ + o1 = c.omvl(p, &p.From, int(p.To.Reg)) + o2 = c.olrr(int(p.To.Reg)&15, (REGPC & 15), int(p.To.Reg), int(p.Scond)) + + case 103: /* word tlsvar, local exec */ + if p.To.Sym == nil { + c.ctxt.Diag("nil sym in tls %v", p) + } + if p.To.Offset != 0 { + c.ctxt.Diag("offset against tls var in %v", p) + } + // This case happens with words generated in the PC stream as part of + // the literal c.pool. + rel := obj.Addrel(c.cursym) + + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Type = objabi.R_TLS_LE + o1 = 0 + + case 104: /* word tlsvar, initial exec */ + if p.To.Sym == nil { + c.ctxt.Diag("nil sym in tls %v", p) + } + if p.To.Offset != 0 { + c.ctxt.Diag("offset against tls var in %v", p) + } + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Type = objabi.R_TLS_IE + rel.Add = c.pc - p.Rel.Pc - 8 - int64(rel.Siz) + + case 68: /* floating point store -> ADDR */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + o2 = c.ofsr(p.As, int(p.From.Reg), 0, REGTMP, int(p.Scond), p) + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 69: /* floating point load <- ADDR */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + o2 = c.ofsr(p.As, int(p.To.Reg), 0, (REGTMP&15), int(p.Scond), p) | 1<<20 + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + /* ArmV4 ops: */ + case 70: /* movh/movhu R,O(R) -> strh */ + c.aclass(&p.To) + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.oshr(int(p.From.Reg), int32(c.instoffset), r, int(p.Scond)) + + case 71: /* movb/movh/movhu O(R),R -> ldrsb/ldrsh/ldrh */ + c.aclass(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.olhr(int32(c.instoffset), r, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVB || p.As == AMOVBS { + o1 ^= 1<<5 | 1<<6 + } else if p.As == AMOVH || p.As == AMOVHS { + o1 ^= (1 << 6) + } + + case 72: /* movh/movhu R,L(R) -> strh */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.oshrr(int(p.From.Reg), REGTMP&15, r, int(p.Scond)) + + case 73: /* movb/movh/movhu L(R),R -> ldrsb/ldrsh/ldrh */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.olhrr(REGTMP&15, r, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVB || p.As == AMOVBS { + o2 ^= 1<<5 | 1<<6 + } else if p.As == AMOVH || p.As == AMOVHS { + o2 ^= (1 << 6) + } + + case 74: /* bx $I */ + c.ctxt.Diag("ABX $I") + + case 75: /* bx O(R) */ + c.aclass(&p.To) + + if c.instoffset != 0 { + c.ctxt.Diag("non-zero offset in ABX") + } + + /* + o1 = c.oprrr(p, AADD, p->scond) | immrot(0) | ((REGPC&15)<<16) | ((REGLINK&15)<<12); // mov PC, LR + o2 = (((p->scond&C_SCOND) ^ C_SCOND_XOR)<<28) | (0x12fff<<8) | (1<<4) | ((p->to.reg&15) << 0); // BX R + */ + // p->to.reg may be REGLINK + o1 = c.oprrr(p, AADD, int(p.Scond)) + + o1 |= uint32(immrot(uint32(c.instoffset))) + o1 |= (uint32(p.To.Reg) & 15) << 16 + o1 |= (REGTMP & 15) << 12 + o2 = c.oprrr(p, AADD, int(p.Scond)) | uint32(immrot(0)) | (REGPC&15)<<16 | (REGLINK&15)<<12 // mov PC, LR + o3 = ((uint32(p.Scond)&C_SCOND)^C_SCOND_XOR)<<28 | 0x12fff<<8 | 1<<4 | REGTMP&15 // BX Rtmp + + case 76: /* bx O(R) when returning from fn*/ + c.ctxt.Diag("ABXRET") + + case 77: /* ldrex oreg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in LDREX") + } + o1 = 0x19<<20 | 0xf9f + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 78: /* strex reg,oreg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in STREX") + } + if p.To.Reg == p.From.Reg || p.To.Reg == p.Reg { + c.ctxt.Diag("cannot use same register as both source and destination: %v", p) + } + o1 = 0x18<<20 | 0xf90 + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 80: /* fmov zfcon,freg */ + if p.As == AMOVD { + o1 = 0xeeb00b00 // VMOV imm 64 + o2 = c.oprrr(p, ASUBD, int(p.Scond)) + } else { + o1 = 0x0eb00a00 // VMOV imm 32 + o2 = c.oprrr(p, ASUBF, int(p.Scond)) + } + + v := int32(0x70) // 1.0 + r := (int(p.To.Reg) & 15) << 0 + + // movf $1.0, r + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + o1 |= (uint32(r) & 15) << 12 + o1 |= (uint32(v) & 0xf) << 0 + o1 |= (uint32(v) & 0xf0) << 12 + + // subf r,r,r + o2 |= (uint32(r)&15)<<0 | (uint32(r)&15)<<16 | (uint32(r)&15)<<12 + + case 81: /* fmov sfcon,freg */ + o1 = 0x0eb00a00 // VMOV imm 32 + if p.As == AMOVD { + o1 = 0xeeb00b00 // VMOV imm 64 + } + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= (uint32(p.To.Reg) & 15) << 12 + v := int32(c.chipfloat5(p.From.Val.(float64))) + o1 |= (uint32(v) & 0xf) << 0 + o1 |= (uint32(v) & 0xf0) << 12 + + case 82: /* fcmp freg,freg, */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.Reg)&15)<<12 | (uint32(p.From.Reg)&15)<<0 + o2 = 0x0ef1fa10 // VMRS R15 + o2 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 83: /* fcmp freg,, */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.From.Reg)&15)<<12 | 1<<16 + o2 = 0x0ef1fa10 // VMRS R15 + o2 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 84: /* movfw freg,freg - truncate float-to-fix */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 85: /* movwf freg,freg - fix-to-float */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + // macro for movfw freg,FTMP; movw FTMP,reg + case 86: /* movfw freg,reg - truncate float-to-fix */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 0 + o1 |= (FREGTMP & 15) << 12 + o2 = c.oprrr(p, -AMOVFW, int(p.Scond)) + o2 |= (FREGTMP & 15) << 16 + o2 |= (uint32(p.To.Reg) & 15) << 12 + + // macro for movw reg,FTMP; movwf FTMP,freg + case 87: /* movwf reg,freg - fix-to-float */ + o1 = c.oprrr(p, -AMOVWF, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 12 + o1 |= (FREGTMP & 15) << 16 + o2 = c.oprrr(p, p.As, int(p.Scond)) + o2 |= (FREGTMP & 15) << 0 + o2 |= (uint32(p.To.Reg) & 15) << 12 + + case 88: /* movw reg,freg */ + o1 = c.oprrr(p, -AMOVWF, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 12 + o1 |= (uint32(p.To.Reg) & 15) << 16 + + case 89: /* movw freg,reg */ + o1 = c.oprrr(p, -AMOVFW, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 91: /* ldrexd oreg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in LDREX") + } + o1 = 0x1b<<20 | 0xf9f + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 92: /* strexd reg,oreg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in STREX") + } + if p.Reg&1 != 0 { + c.ctxt.Diag("source register must be even in STREXD: %v", p) + } + if p.To.Reg == p.From.Reg || p.To.Reg == p.Reg || p.To.Reg == p.Reg+1 { + c.ctxt.Diag("cannot use same register as both source and destination: %v", p) + } + o1 = 0x1a<<20 | 0xf90 + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 93: /* movb/movh/movhu addr,R -> ldrsb/ldrsh/ldrh */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + o2 = c.olhr(0, REGTMP, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVB || p.As == AMOVBS { + o2 ^= 1<<5 | 1<<6 + } else if p.As == AMOVH || p.As == AMOVHS { + o2 ^= (1 << 6) + } + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 94: /* movh/movhu R,addr -> strh */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + o2 = c.oshr(int(p.From.Reg), 0, REGTMP, int(p.Scond)) + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 95: /* PLD off(reg) */ + o1 = 0xf5d0f000 + + o1 |= (uint32(p.From.Reg) & 15) << 16 + if p.From.Offset < 0 { + o1 &^= (1 << 23) + o1 |= uint32((-p.From.Offset) & 0xfff) + } else { + o1 |= uint32(p.From.Offset & 0xfff) + } + + // This is supposed to be something that stops execution. + // It's not supposed to be reached, ever, but if it is, we'd + // like to be able to tell how we got there. Assemble as + // 0xf7fabcfd which is guaranteed to raise undefined instruction + // exception. + case 96: /* UNDEF */ + o1 = 0xf7fabcfd + + case 97: /* CLZ Rm, Rd */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= (uint32(p.From.Reg) & 15) << 0 + + case 98: /* MULW{T,B} Rs, Rm, Rd */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.To.Reg) & 15) << 16 + o1 |= (uint32(p.From.Reg) & 15) << 8 + o1 |= (uint32(p.Reg) & 15) << 0 + + case 99: /* MULAW{T,B} Rs, Rm, Rn, Rd */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.To.Reg) & 15) << 16 + o1 |= (uint32(p.From.Reg) & 15) << 8 + o1 |= (uint32(p.Reg) & 15) << 0 + o1 |= uint32((p.To.Offset & 15) << 12) + + case 105: /* divhw r,[r,]r */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 |= (uint32(rf)&15)<<8 | (uint32(r)&15)<<0 | (uint32(rt)&15)<<16 + + case 110: /* dmb [mbop | $con] */ + o1 = 0xf57ff050 + mbop := uint32(0) + + switch c.aclass(&p.From) { + case C_SPR: + for _, f := range mbOp { + if f.reg == p.From.Reg { + mbop = f.enc + break + } + } + case C_RCON: + for _, f := range mbOp { + enc := uint32(c.instoffset) + if f.enc == enc { + mbop = enc + break + } + } + case C_NONE: + mbop = 0xf + } + + if mbop == 0 { + c.ctxt.Diag("illegal mb option:\n%v", p) + } + o1 |= mbop + } + + out[0] = o1 + out[1] = o2 + out[2] = o3 + out[3] = o4 + out[4] = o5 + out[5] = o6 +} + +func (c *ctxt5) movxt(p *obj.Prog) uint32 { + o1 := ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + switch p.As { + case AMOVB, AMOVBS: + o1 |= 0x6af<<16 | 0x7<<4 + case AMOVH, AMOVHS: + o1 |= 0x6bf<<16 | 0x7<<4 + case AMOVBU: + o1 |= 0x6ef<<16 | 0x7<<4 + case AMOVHU: + o1 |= 0x6ff<<16 | 0x7<<4 + default: + c.ctxt.Diag("illegal combination: %v", p) + } + switch p.From.Offset &^ 0xf { + // only 0/8/16/24 bits rotation is accepted + case SHIFT_RR, SHIFT_RR | 8<<7, SHIFT_RR | 16<<7, SHIFT_RR | 24<<7: + o1 |= uint32(p.From.Offset) & 0xc0f + default: + c.ctxt.Diag("illegal shift: %v", p) + } + o1 |= (uint32(p.To.Reg) & 15) << 12 + return o1 +} + +func (c *ctxt5) mov(p *obj.Prog) uint32 { + c.aclass(&p.From) + o1 := c.oprrr(p, p.As, int(p.Scond)) + o1 |= uint32(p.From.Offset) + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = 0 + } + r := int(p.Reg) + if p.As == AMOVW || p.As == AMVN { + r = 0 + } else if r == 0 { + r = rt + } + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + return o1 +} + +func (c *ctxt5) oprrr(p *obj.Prog, a obj.As, sc int) uint32 { + o := ((uint32(sc) & C_SCOND) ^ C_SCOND_XOR) << 28 + if sc&C_SBIT != 0 { + o |= 1 << 20 + } + switch a { + case ADIVHW: + return o | 0x71<<20 | 0xf<<12 | 0x1<<4 + case ADIVUHW: + return o | 0x73<<20 | 0xf<<12 | 0x1<<4 + case AMMUL: + return o | 0x75<<20 | 0xf<<12 | 0x1<<4 + case AMULS: + return o | 0x6<<20 | 0x9<<4 + case AMMULA: + return o | 0x75<<20 | 0x1<<4 + case AMMULS: + return o | 0x75<<20 | 0xd<<4 + case AMULU, AMUL: + return o | 0x0<<21 | 0x9<<4 + case AMULA: + return o | 0x1<<21 | 0x9<<4 + case AMULLU: + return o | 0x4<<21 | 0x9<<4 + case AMULL: + return o | 0x6<<21 | 0x9<<4 + case AMULALU: + return o | 0x5<<21 | 0x9<<4 + case AMULAL: + return o | 0x7<<21 | 0x9<<4 + case AAND: + return o | 0x0<<21 + case AEOR: + return o | 0x1<<21 + case ASUB: + return o | 0x2<<21 + case ARSB: + return o | 0x3<<21 + case AADD: + return o | 0x4<<21 + case AADC: + return o | 0x5<<21 + case ASBC: + return o | 0x6<<21 + case ARSC: + return o | 0x7<<21 + case ATST: + return o | 0x8<<21 | 1<<20 + case ATEQ: + return o | 0x9<<21 | 1<<20 + case ACMP: + return o | 0xa<<21 | 1<<20 + case ACMN: + return o | 0xb<<21 | 1<<20 + case AORR: + return o | 0xc<<21 + + case AMOVB, AMOVH, AMOVW: + if sc&(C_PBIT|C_WBIT) != 0 { + c.ctxt.Diag("invalid .P/.W suffix: %v", p) + } + return o | 0xd<<21 + case ABIC: + return o | 0xe<<21 + case AMVN: + return o | 0xf<<21 + case ASLL: + return o | 0xd<<21 | 0<<5 + case ASRL: + return o | 0xd<<21 | 1<<5 + case ASRA: + return o | 0xd<<21 | 2<<5 + case ASWI: + return o | 0xf<<24 + + case AADDD: + return o | 0xe<<24 | 0x3<<20 | 0xb<<8 | 0<<4 + case AADDF: + return o | 0xe<<24 | 0x3<<20 | 0xa<<8 | 0<<4 + case ASUBD: + return o | 0xe<<24 | 0x3<<20 | 0xb<<8 | 4<<4 + case ASUBF: + return o | 0xe<<24 | 0x3<<20 | 0xa<<8 | 4<<4 + case AMULD: + return o | 0xe<<24 | 0x2<<20 | 0xb<<8 | 0<<4 + case AMULF: + return o | 0xe<<24 | 0x2<<20 | 0xa<<8 | 0<<4 + case ANMULD: + return o | 0xe<<24 | 0x2<<20 | 0xb<<8 | 0x4<<4 + case ANMULF: + return o | 0xe<<24 | 0x2<<20 | 0xa<<8 | 0x4<<4 + case AMULAD: + return o | 0xe<<24 | 0xb<<8 + case AMULAF: + return o | 0xe<<24 | 0xa<<8 + case AMULSD: + return o | 0xe<<24 | 0xb<<8 | 0x4<<4 + case AMULSF: + return o | 0xe<<24 | 0xa<<8 | 0x4<<4 + case ANMULAD: + return o | 0xe<<24 | 0x1<<20 | 0xb<<8 | 0x4<<4 + case ANMULAF: + return o | 0xe<<24 | 0x1<<20 | 0xa<<8 | 0x4<<4 + case ANMULSD: + return o | 0xe<<24 | 0x1<<20 | 0xb<<8 + case ANMULSF: + return o | 0xe<<24 | 0x1<<20 | 0xa<<8 + case AFMULAD: + return o | 0xe<<24 | 0xa<<20 | 0xb<<8 + case AFMULAF: + return o | 0xe<<24 | 0xa<<20 | 0xa<<8 + case AFMULSD: + return o | 0xe<<24 | 0xa<<20 | 0xb<<8 | 0x4<<4 + case AFMULSF: + return o | 0xe<<24 | 0xa<<20 | 0xa<<8 | 0x4<<4 + case AFNMULAD: + return o | 0xe<<24 | 0x9<<20 | 0xb<<8 | 0x4<<4 + case AFNMULAF: + return o | 0xe<<24 | 0x9<<20 | 0xa<<8 | 0x4<<4 + case AFNMULSD: + return o | 0xe<<24 | 0x9<<20 | 0xb<<8 + case AFNMULSF: + return o | 0xe<<24 | 0x9<<20 | 0xa<<8 + case ADIVD: + return o | 0xe<<24 | 0x8<<20 | 0xb<<8 | 0<<4 + case ADIVF: + return o | 0xe<<24 | 0x8<<20 | 0xa<<8 | 0<<4 + case ASQRTD: + return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xb<<8 | 0xc<<4 + case ASQRTF: + return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xa<<8 | 0xc<<4 + case AABSD: + return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xb<<8 | 0xc<<4 + case AABSF: + return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xa<<8 | 0xc<<4 + case ANEGD: + return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xb<<8 | 0x4<<4 + case ANEGF: + return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xa<<8 | 0x4<<4 + case ACMPD: + return o | 0xe<<24 | 0xb<<20 | 4<<16 | 0xb<<8 | 0xc<<4 + case ACMPF: + return o | 0xe<<24 | 0xb<<20 | 4<<16 | 0xa<<8 | 0xc<<4 + + case AMOVF: + return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xa<<8 | 4<<4 + case AMOVD: + return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xb<<8 | 4<<4 + + case AMOVDF: + return o | 0xe<<24 | 0xb<<20 | 7<<16 | 0xa<<8 | 0xc<<4 | 1<<8 // dtof + case AMOVFD: + return o | 0xe<<24 | 0xb<<20 | 7<<16 | 0xa<<8 | 0xc<<4 | 0<<8 // dtof + + case AMOVWF: + if sc&C_UBIT == 0 { + o |= 1 << 7 /* signed */ + } + return o | 0xe<<24 | 0xb<<20 | 8<<16 | 0xa<<8 | 4<<4 | 0<<18 | 0<<8 // toint, double + + case AMOVWD: + if sc&C_UBIT == 0 { + o |= 1 << 7 /* signed */ + } + return o | 0xe<<24 | 0xb<<20 | 8<<16 | 0xa<<8 | 4<<4 | 0<<18 | 1<<8 // toint, double + + case AMOVFW: + if sc&C_UBIT == 0 { + o |= 1 << 16 /* signed */ + } + return o | 0xe<<24 | 0xb<<20 | 8<<16 | 0xa<<8 | 4<<4 | 1<<18 | 0<<8 | 1<<7 // toint, double, trunc + + case AMOVDW: + if sc&C_UBIT == 0 { + o |= 1 << 16 /* signed */ + } + return o | 0xe<<24 | 0xb<<20 | 8<<16 | 0xa<<8 | 4<<4 | 1<<18 | 1<<8 | 1<<7 // toint, double, trunc + + case -AMOVWF: // copy WtoF + return o | 0xe<<24 | 0x0<<20 | 0xb<<8 | 1<<4 + + case -AMOVFW: // copy FtoW + return o | 0xe<<24 | 0x1<<20 | 0xb<<8 | 1<<4 + + case -ACMP: // cmp imm + return o | 0x3<<24 | 0x5<<20 + + case ABFX: + return o | 0x3d<<21 | 0x5<<4 + + case ABFXU: + return o | 0x3f<<21 | 0x5<<4 + + case ABFC: + return o | 0x3e<<21 | 0x1f + + case ABFI: + return o | 0x3e<<21 | 0x1<<4 + + case AXTAB: + return o | 0x6a<<20 | 0x7<<4 + + case AXTAH: + return o | 0x6b<<20 | 0x7<<4 + + case AXTABU: + return o | 0x6e<<20 | 0x7<<4 + + case AXTAHU: + return o | 0x6f<<20 | 0x7<<4 + + // CLZ doesn't support .nil + case ACLZ: + return o&(0xf<<28) | 0x16f<<16 | 0xf1<<4 + + case AREV: + return o&(0xf<<28) | 0x6bf<<16 | 0xf3<<4 + + case AREV16: + return o&(0xf<<28) | 0x6bf<<16 | 0xfb<<4 + + case AREVSH: + return o&(0xf<<28) | 0x6ff<<16 | 0xfb<<4 + + case ARBIT: + return o&(0xf<<28) | 0x6ff<<16 | 0xf3<<4 + + case AMULWT: + return o&(0xf<<28) | 0x12<<20 | 0xe<<4 + + case AMULWB: + return o&(0xf<<28) | 0x12<<20 | 0xa<<4 + + case AMULBB: + return o&(0xf<<28) | 0x16<<20 | 0x8<<4 + + case AMULAWT: + return o&(0xf<<28) | 0x12<<20 | 0xc<<4 + + case AMULAWB: + return o&(0xf<<28) | 0x12<<20 | 0x8<<4 + + case AMULABB: + return o&(0xf<<28) | 0x10<<20 | 0x8<<4 + + case ABL: // BLX REG + return o&(0xf<<28) | 0x12fff3<<4 + } + + c.ctxt.Diag("%v: bad rrr %d", p, a) + return 0 +} + +func (c *ctxt5) opbra(p *obj.Prog, a obj.As, sc int) uint32 { + sc &= C_SCOND + sc ^= C_SCOND_XOR + if a == ABL || a == obj.ADUFFZERO || a == obj.ADUFFCOPY { + return uint32(sc)<<28 | 0x5<<25 | 0x1<<24 + } + if sc != 0xe { + c.ctxt.Diag("%v: .COND on bcond instruction", p) + } + switch a { + case ABEQ: + return 0x0<<28 | 0x5<<25 + case ABNE: + return 0x1<<28 | 0x5<<25 + case ABCS: + return 0x2<<28 | 0x5<<25 + case ABHS: + return 0x2<<28 | 0x5<<25 + case ABCC: + return 0x3<<28 | 0x5<<25 + case ABLO: + return 0x3<<28 | 0x5<<25 + case ABMI: + return 0x4<<28 | 0x5<<25 + case ABPL: + return 0x5<<28 | 0x5<<25 + case ABVS: + return 0x6<<28 | 0x5<<25 + case ABVC: + return 0x7<<28 | 0x5<<25 + case ABHI: + return 0x8<<28 | 0x5<<25 + case ABLS: + return 0x9<<28 | 0x5<<25 + case ABGE: + return 0xa<<28 | 0x5<<25 + case ABLT: + return 0xb<<28 | 0x5<<25 + case ABGT: + return 0xc<<28 | 0x5<<25 + case ABLE: + return 0xd<<28 | 0x5<<25 + case AB: + return 0xe<<28 | 0x5<<25 + } + + c.ctxt.Diag("%v: bad bra %v", p, a) + return 0 +} + +func (c *ctxt5) olr(v int32, b int, r int, sc int) uint32 { + o := ((uint32(sc) & C_SCOND) ^ C_SCOND_XOR) << 28 + if sc&C_PBIT == 0 { + o |= 1 << 24 + } + if sc&C_UBIT == 0 { + o |= 1 << 23 + } + if sc&C_WBIT != 0 { + o |= 1 << 21 + } + o |= 1<<26 | 1<<20 + if v < 0 { + if sc&C_UBIT != 0 { + c.ctxt.Diag(".U on neg offset") + } + v = -v + o ^= 1 << 23 + } + + if v >= 1<<12 || v < 0 { + c.ctxt.Diag("literal span too large: %d (R%d)\n%v", v, b, c.printp) + } + o |= uint32(v) + o |= (uint32(b) & 15) << 16 + o |= (uint32(r) & 15) << 12 + return o +} + +func (c *ctxt5) olhr(v int32, b int, r int, sc int) uint32 { + o := ((uint32(sc) & C_SCOND) ^ C_SCOND_XOR) << 28 + if sc&C_PBIT == 0 { + o |= 1 << 24 + } + if sc&C_WBIT != 0 { + o |= 1 << 21 + } + o |= 1<<23 | 1<<20 | 0xb<<4 + if v < 0 { + v = -v + o ^= 1 << 23 + } + + if v >= 1<<8 || v < 0 { + c.ctxt.Diag("literal span too large: %d (R%d)\n%v", v, b, c.printp) + } + o |= uint32(v)&0xf | (uint32(v)>>4)<<8 | 1<<22 + o |= (uint32(b) & 15) << 16 + o |= (uint32(r) & 15) << 12 + return o +} + +func (c *ctxt5) osr(a obj.As, r int, v int32, b int, sc int) uint32 { + o := c.olr(v, b, r, sc) ^ (1 << 20) + if a != AMOVW { + o |= 1 << 22 + } + return o +} + +func (c *ctxt5) oshr(r int, v int32, b int, sc int) uint32 { + o := c.olhr(v, b, r, sc) ^ (1 << 20) + return o +} + +func (c *ctxt5) osrr(r int, i int, b int, sc int) uint32 { + return c.olr(int32(i), b, r, sc) ^ (1<<25 | 1<<20) +} + +func (c *ctxt5) oshrr(r int, i int, b int, sc int) uint32 { + return c.olhr(int32(i), b, r, sc) ^ (1<<22 | 1<<20) +} + +func (c *ctxt5) olrr(i int, b int, r int, sc int) uint32 { + return c.olr(int32(i), b, r, sc) ^ (1 << 25) +} + +func (c *ctxt5) olhrr(i int, b int, r int, sc int) uint32 { + return c.olhr(int32(i), b, r, sc) ^ (1 << 22) +} + +func (c *ctxt5) ofsr(a obj.As, r int, v int32, b int, sc int, p *obj.Prog) uint32 { + o := ((uint32(sc) & C_SCOND) ^ C_SCOND_XOR) << 28 + if sc&C_PBIT == 0 { + o |= 1 << 24 + } + if sc&C_WBIT != 0 { + o |= 1 << 21 + } + o |= 6<<25 | 1<<24 | 1<<23 | 10<<8 + if v < 0 { + v = -v + o ^= 1 << 23 + } + + if v&3 != 0 { + c.ctxt.Diag("odd offset for floating point op: %d\n%v", v, p) + } else if v >= 1<<10 || v < 0 { + c.ctxt.Diag("literal span too large: %d\n%v", v, p) + } + o |= (uint32(v) >> 2) & 0xFF + o |= (uint32(b) & 15) << 16 + o |= (uint32(r) & 15) << 12 + + switch a { + default: + c.ctxt.Diag("bad fst %v", a) + fallthrough + + case AMOVD: + o |= 1 << 8 + fallthrough + + case AMOVF: + break + } + + return o +} + +// MOVW $"lower 16-bit", Reg +func (c *ctxt5) omvs(p *obj.Prog, a *obj.Addr, dr int) uint32 { + o1 := ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= 0x30 << 20 + o1 |= (uint32(dr) & 15) << 12 + o1 |= uint32(a.Offset) & 0x0fff + o1 |= (uint32(a.Offset) & 0xf000) << 4 + return o1 +} + +// MVN $C_NCON, Reg -> MOVW $C_RCON, Reg +func (c *ctxt5) omvr(p *obj.Prog, a *obj.Addr, dr int) uint32 { + o1 := c.oprrr(p, AMOVW, int(p.Scond)) + o1 |= (uint32(dr) & 15) << 12 + v := immrot(^uint32(a.Offset)) + if v == 0 { + c.ctxt.Diag("%v: missing literal", p) + return 0 + } + o1 |= uint32(v) + return o1 +} + +func (c *ctxt5) omvl(p *obj.Prog, a *obj.Addr, dr int) uint32 { + var o1 uint32 + if p.Pool == nil { + c.aclass(a) + v := immrot(^uint32(c.instoffset)) + if v == 0 { + c.ctxt.Diag("%v: missing literal", p) + return 0 + } + + o1 = c.oprrr(p, AMVN, int(p.Scond)&C_SCOND) + o1 |= uint32(v) + o1 |= (uint32(dr) & 15) << 12 + } else { + v := int32(p.Pool.Pc - p.Pc - 8) + o1 = c.olr(v, REGPC, dr, int(p.Scond)&C_SCOND) + } + + return o1 +} + +func (c *ctxt5) chipzero5(e float64) int { + // We use GOARM=7 to gate the use of VFPv3 vmov (imm) instructions. + if buildcfg.GOARM < 7 || math.Float64bits(e) != 0 { + return -1 + } + return 0 +} + +func (c *ctxt5) chipfloat5(e float64) int { + // We use GOARM=7 to gate the use of VFPv3 vmov (imm) instructions. + if buildcfg.GOARM < 7 { + return -1 + } + + ei := math.Float64bits(e) + l := uint32(ei) + h := uint32(ei >> 32) + + if l != 0 || h&0xffff != 0 { + return -1 + } + h1 := h & 0x7fc00000 + if h1 != 0x40000000 && h1 != 0x3fc00000 { + return -1 + } + n := 0 + + // sign bit (a) + if h&0x80000000 != 0 { + n |= 1 << 7 + } + + // exp sign bit (b) + if h1 == 0x3fc00000 { + n |= 1 << 6 + } + + // rest of exp and mantissa (cd-efgh) + n |= int((h >> 16) & 0x3f) + + //print("match %.8lux %.8lux %d\n", l, h, n); + return n +} + +func nocache(p *obj.Prog) { + p.Optab = 0 + p.From.Class = 0 + if p.GetFrom3() != nil { + p.GetFrom3().Class = 0 + } + p.To.Class = 0 +} diff --git a/src/cmd/internal/obj/arm/list5.go b/src/cmd/internal/obj/arm/list5.go new file mode 100644 index 0000000..6d630f6 --- /dev/null +++ b/src/cmd/internal/obj/arm/list5.go @@ -0,0 +1,124 @@ +// Inferno utils/5c/list.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/5c/list.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm + +import ( + "cmd/internal/obj" + "fmt" +) + +func init() { + obj.RegisterRegister(obj.RBaseARM, MAXREG, rconv) + obj.RegisterOpcode(obj.ABaseARM, Anames) + obj.RegisterRegisterList(obj.RegListARMLo, obj.RegListARMHi, rlconv) + obj.RegisterOpSuffix("arm", obj.CConvARM) +} + +func rconv(r int) string { + if r == 0 { + return "NONE" + } + if r == REGG { + // Special case. + return "g" + } + if REG_R0 <= r && r <= REG_R15 { + return fmt.Sprintf("R%d", r-REG_R0) + } + if REG_F0 <= r && r <= REG_F15 { + return fmt.Sprintf("F%d", r-REG_F0) + } + + switch r { + case REG_FPSR: + return "FPSR" + + case REG_FPCR: + return "FPCR" + + case REG_CPSR: + return "CPSR" + + case REG_SPSR: + return "SPSR" + + case REG_MB_SY: + return "MB_SY" + case REG_MB_ST: + return "MB_ST" + case REG_MB_ISH: + return "MB_ISH" + case REG_MB_ISHST: + return "MB_ISHST" + case REG_MB_NSH: + return "MB_NSH" + case REG_MB_NSHST: + return "MB_NSHST" + case REG_MB_OSH: + return "MB_OSH" + case REG_MB_OSHST: + return "MB_OSHST" + } + + return fmt.Sprintf("Rgok(%d)", r-obj.RBaseARM) +} + +func DRconv(a int) string { + s := "C_??" + if a >= C_NONE && a <= C_NCLASS { + s = cnames5[a] + } + var fp string + fp += s + return fp +} + +func rlconv(list int64) string { + str := "" + for i := 0; i < 16; i++ { + if list&(1<<uint(i)) != 0 { + if str == "" { + str += "[" + } else { + str += "," + } + // This is ARM-specific; R10 is g. + if i == REGG-REG_R0 { + str += "g" + } else { + str += fmt.Sprintf("R%d", i) + } + } + } + + str += "]" + return str +} diff --git a/src/cmd/internal/obj/arm/obj5.go b/src/cmd/internal/obj/arm/obj5.go new file mode 100644 index 0000000..38aa11c --- /dev/null +++ b/src/cmd/internal/obj/arm/obj5.go @@ -0,0 +1,838 @@ +// Derived from Inferno utils/5c/swt.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/5c/swt.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "internal/buildcfg" + "log" +) + +var progedit_tlsfallback *obj.LSym + +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + p.From.Class = 0 + p.To.Class = 0 + + c := ctxt5{ctxt: ctxt, newprog: newprog} + + // Rewrite B/BL to symbol as TYPE_BRANCH. + switch p.As { + case AB, ABL, obj.ADUFFZERO, obj.ADUFFCOPY: + if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { + p.To.Type = obj.TYPE_BRANCH + } + } + + // Replace TLS register fetches on older ARM processors. + switch p.As { + // Treat MRC 15, 0, <reg>, C13, C0, 3 specially. + case AMRC: + if p.To.Offset&0xffff0fff == 0xee1d0f70 { + // Because the instruction might be rewritten to a BL which returns in R0 + // the register must be zero. + if p.To.Offset&0xf000 != 0 { + ctxt.Diag("%v: TLS MRC instruction must write to R0 as it might get translated into a BL instruction", p.Line()) + } + + if buildcfg.GOARM < 7 { + // Replace it with BL runtime.read_tls_fallback(SB) for ARM CPUs that lack the tls extension. + if progedit_tlsfallback == nil { + progedit_tlsfallback = ctxt.Lookup("runtime.read_tls_fallback") + } + + // MOVW LR, R11 + p.As = AMOVW + + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + + // BL runtime.read_tls_fallback(SB) + p = obj.Appendp(p, newprog) + + p.As = ABL + p.To.Type = obj.TYPE_BRANCH + p.To.Sym = progedit_tlsfallback + p.To.Offset = 0 + + // MOVW R11, LR + p = obj.Appendp(p, newprog) + + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + break + } + } + + // Otherwise, MRC/MCR instructions need no further treatment. + p.As = AWORD + } + + // Rewrite float constants to values stored in memory. + switch p.As { + case AMOVF: + if p.From.Type == obj.TYPE_FCONST && c.chipfloat5(p.From.Val.(float64)) < 0 && (c.chipzero5(p.From.Val.(float64)) < 0 || p.Scond&C_SCOND != C_SCOND_NONE) { + f32 := float32(p.From.Val.(float64)) + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float32Sym(f32) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + case AMOVD: + if p.From.Type == obj.TYPE_FCONST && c.chipfloat5(p.From.Val.(float64)) < 0 && (c.chipzero5(p.From.Val.(float64)) < 0 || p.Scond&C_SCOND != C_SCOND_NONE) { + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float64Sym(p.From.Val.(float64)) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + } + + if ctxt.Flag_dynlink { + c.rewriteToUseGot(p) + } +} + +// Rewrite p, if necessary, to access global data via the global offset table. +func (c *ctxt5) rewriteToUseGot(p *obj.Prog) { + if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { + // ADUFFxxx $offset + // becomes + // MOVW runtime.duffxxx@GOT, R9 + // ADD $offset, R9 + // CALL (R9) + var sym *obj.LSym + if p.As == obj.ADUFFZERO { + sym = c.ctxt.Lookup("runtime.duffzero") + } else { + sym = c.ctxt.Lookup("runtime.duffcopy") + } + offset := p.To.Offset + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + p.From.Sym = sym + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R9 + p.To.Name = obj.NAME_NONE + p.To.Offset = 0 + p.To.Sym = nil + p1 := obj.Appendp(p, c.newprog) + p1.As = AADD + p1.From.Type = obj.TYPE_CONST + p1.From.Offset = offset + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REG_R9 + p2 := obj.Appendp(p1, c.newprog) + p2.As = obj.ACALL + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = REG_R9 + return + } + + // We only care about global data: NAME_EXTERN means a global + // symbol in the Go sense, and p.Sym.Local is true for a few + // internally defined symbols. + if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // MOVW $sym, Rx becomes MOVW sym@GOT, Rx + // MOVW $sym+<off>, Rx becomes MOVW sym@GOT, Rx; ADD <off>, Rx + if p.As != AMOVW { + c.ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -dynlink", p) + } + if p.To.Type != obj.TYPE_REG { + c.ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -dynlink", p) + } + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + if p.From.Offset != 0 { + q := obj.Appendp(p, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = p.From.Offset + q.To = p.To + p.From.Offset = 0 + } + } + if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + var source *obj.Addr + // MOVx sym, Ry becomes MOVW sym@GOT, R9; MOVx (R9), Ry + // MOVx Ry, sym becomes MOVW sym@GOT, R9; MOVx Ry, (R9) + // An addition may be inserted between the two MOVs if there is an offset. + if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + c.ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) + } + source = &p.From + } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + source = &p.To + } else { + return + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { + return + } + if source.Sym.Type == objabi.STLSBSS { + return + } + if source.Type != obj.TYPE_MEM { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + p1 := obj.Appendp(p, c.newprog) + p2 := obj.Appendp(p1, c.newprog) + + p1.As = AMOVW + p1.From.Type = obj.TYPE_MEM + p1.From.Sym = source.Sym + p1.From.Name = obj.NAME_GOTREF + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REG_R9 + + p2.As = p.As + p2.From = p.From + p2.To = p.To + if p.From.Name == obj.NAME_EXTERN { + p2.From.Reg = REG_R9 + p2.From.Name = obj.NAME_NONE + p2.From.Sym = nil + } else if p.To.Name == obj.NAME_EXTERN { + p2.To.Reg = REG_R9 + p2.To.Name = obj.NAME_NONE + p2.To.Sym = nil + } else { + return + } + obj.Nopout(p) +} + +// Prog.mark +const ( + FOLL = 1 << 0 + LABEL = 1 << 1 + LEAF = 1 << 2 +) + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + autosize := int32(0) + + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + c := ctxt5{ctxt: ctxt, cursym: cursym, newprog: newprog} + + p := c.cursym.Func().Text + autoffset := int32(p.To.Offset) + if autoffset == -4 { + // Historical way to mark NOFRAME. + p.From.Sym.Set(obj.AttrNoFrame, true) + autoffset = 0 + } + if autoffset < 0 || autoffset%4 != 0 { + c.ctxt.Diag("frame size %d not 0 or a positive multiple of 4", autoffset) + } + if p.From.Sym.NoFrame() { + if autoffset != 0 { + c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", autoffset) + } + } + + cursym.Func().Locals = autoffset + cursym.Func().Args = p.To.Val.(int32) + + /* + * find leaf subroutines + */ + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.ATEXT: + p.Mark |= LEAF + + case ADIV, ADIVU, AMOD, AMODU: + cursym.Func().Text.Mark &^= LEAF + + case ABL, + ABX, + obj.ADUFFZERO, + obj.ADUFFCOPY: + cursym.Func().Text.Mark &^= LEAF + } + } + + var q2 *obj.Prog + for p := cursym.Func().Text; p != nil; p = p.Link { + o := p.As + switch o { + case obj.ATEXT: + autosize = autoffset + + if p.Mark&LEAF != 0 && autosize == 0 { + // A leaf function with no locals has no frame. + p.From.Sym.Set(obj.AttrNoFrame, true) + } + + if !p.From.Sym.NoFrame() { + // If there is a stack frame at all, it includes + // space to save the LR. + autosize += 4 + } + + if autosize == 0 && cursym.Func().Text.Mark&LEAF == 0 { + // A very few functions that do not return to their caller + // are not identified as leaves but still have no frame. + if ctxt.Debugvlog { + ctxt.Logf("save suppressed in: %s\n", cursym.Name) + } + + cursym.Func().Text.Mark |= LEAF + } + + // FP offsets need an updated p.To.Offset. + p.To.Offset = int64(autosize) - 4 + + if cursym.Func().Text.Mark&LEAF != 0 { + cursym.Set(obj.AttrLeaf, true) + if p.From.Sym.NoFrame() { + break + } + } + + if !p.From.Sym.NoSplit() { + p = c.stacksplit(p, autosize) // emit split check + } + + // MOVW.W R14,$-autosize(SP) + p = obj.Appendp(p, c.newprog) + + p.As = AMOVW + p.Scond |= C_WBIT + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_MEM + p.To.Offset = int64(-autosize) + p.To.Reg = REGSP + p.Spadj = autosize + + if cursym.Func().Text.From.Sym.Wrapper() { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOVW g_panic(g), R1 + // CMP $0, R1 + // B.NE checkargp + // end: + // NOP + // ... function ... + // checkargp: + // MOVW panic_argp(R1), R2 + // ADD $(autosize+4), R13, R3 + // CMP R2, R3 + // B.NE end + // ADD $4, R13, R4 + // MOVW R4, panic_argp(R1) + // B end + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not an ARM NOP: it encodes to 0 instruction bytes. + + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R1 + + p = obj.Appendp(p, newprog) + p.As = ACMP + p.From.Type = obj.TYPE_CONST + p.From.Offset = 0 + p.Reg = REG_R1 + + // B.NE checkargp + bne := obj.Appendp(p, newprog) + bne.As = ABNE + bne.To.Type = obj.TYPE_BRANCH + + // end: NOP + end := obj.Appendp(bne, newprog) + end.As = obj.ANOP + + // find end of function + var last *obj.Prog + for last = end; last.Link != nil; last = last.Link { + } + + // MOVW panic_argp(R1), R2 + mov := obj.Appendp(last, newprog) + mov.As = AMOVW + mov.From.Type = obj.TYPE_MEM + mov.From.Reg = REG_R1 + mov.From.Offset = 0 // Panic.argp + mov.To.Type = obj.TYPE_REG + mov.To.Reg = REG_R2 + + // B.NE branch target is MOVW above + bne.To.SetTarget(mov) + + // ADD $(autosize+4), R13, R3 + p = obj.Appendp(mov, newprog) + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(autosize) + 4 + p.Reg = REG_R13 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R3 + + // CMP R2, R3 + p = obj.Appendp(p, newprog) + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R2 + p.Reg = REG_R3 + + // B.NE end + p = obj.Appendp(p, newprog) + p.As = ABNE + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(end) + + // ADD $4, R13, R4 + p = obj.Appendp(p, newprog) + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = 4 + p.Reg = REG_R13 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R4 + + // MOVW R4, panic_argp(R1) + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R4 + p.To.Type = obj.TYPE_MEM + p.To.Reg = REG_R1 + p.To.Offset = 0 // Panic.argp + + // B end + p = obj.Appendp(p, newprog) + p.As = AB + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(end) + + // reset for subsequent passes + p = end + } + + case obj.ARET: + nocache(p) + if cursym.Func().Text.Mark&LEAF != 0 { + if autosize == 0 { + p.As = AB + p.From = obj.Addr{} + if p.To.Sym != nil { // retjmp + p.To.Type = obj.TYPE_BRANCH + } else { + p.To.Type = obj.TYPE_MEM + p.To.Offset = 0 + p.To.Reg = REGLINK + } + + break + } + } + + p.As = AMOVW + p.Scond |= C_PBIT + p.From.Type = obj.TYPE_MEM + p.From.Offset = int64(autosize) + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGPC + + // If there are instructions following + // this ARET, they come from a branch + // with the same stackframe, so no spadj. + + if p.To.Sym != nil { // retjmp + p.To.Reg = REGLINK + q2 = obj.Appendp(p, newprog) + q2.As = AB + q2.To.Type = obj.TYPE_BRANCH + q2.To.Sym = p.To.Sym + p.To.Sym = nil + p.To.Name = obj.NAME_NONE + p = q2 + } + + case AADD: + if p.From.Type == obj.TYPE_CONST && p.From.Reg == 0 && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { + p.Spadj = int32(-p.From.Offset) + } + + case ASUB: + if p.From.Type == obj.TYPE_CONST && p.From.Reg == 0 && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { + p.Spadj = int32(p.From.Offset) + } + + case ADIV, ADIVU, AMOD, AMODU: + if cursym.Func().Text.From.Sym.NoSplit() { + ctxt.Diag("cannot divide in NOSPLIT function") + } + const debugdivmod = false + if debugdivmod { + break + } + if p.From.Type != obj.TYPE_REG { + break + } + if p.To.Type != obj.TYPE_REG { + break + } + + // Make copy because we overwrite p below. + q1 := *p + if q1.Reg == REGTMP || q1.Reg == 0 && q1.To.Reg == REGTMP { + ctxt.Diag("div already using REGTMP: %v", p) + } + + /* MOV m(g),REGTMP */ + p.As = AMOVW + p.Pos = q1.Pos + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 6 * 4 // offset of g.m + p.Reg = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + + /* MOV a,m_divmod(REGTMP) */ + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.Pos = q1.Pos + p.From.Type = obj.TYPE_REG + p.From.Reg = q1.From.Reg + p.To.Type = obj.TYPE_MEM + p.To.Reg = REGTMP + p.To.Offset = 8 * 4 // offset of m.divmod + + /* MOV b, R8 */ + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.Pos = q1.Pos + p.From.Type = obj.TYPE_REG + p.From.Reg = q1.Reg + if q1.Reg == 0 { + p.From.Reg = q1.To.Reg + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R8 + p.To.Offset = 0 + + /* CALL appropriate */ + p = obj.Appendp(p, newprog) + p.As = ABL + p.Pos = q1.Pos + p.To.Type = obj.TYPE_BRANCH + switch o { + case ADIV: + p.To.Sym = symdiv + case ADIVU: + p.To.Sym = symdivu + case AMOD: + p.To.Sym = symmod + case AMODU: + p.To.Sym = symmodu + } + + /* MOV REGTMP, b */ + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.Pos = q1.Pos + p.From.Type = obj.TYPE_REG + p.From.Reg = REGTMP + p.From.Offset = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = q1.To.Reg + + case AMOVW: + if (p.Scond&C_WBIT != 0) && p.To.Type == obj.TYPE_MEM && p.To.Reg == REGSP { + p.Spadj = int32(-p.To.Offset) + } + if (p.Scond&C_PBIT != 0) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REGSP && p.To.Reg != REGPC { + p.Spadj = int32(-p.From.Offset) + } + if p.From.Type == obj.TYPE_ADDR && p.From.Reg == REGSP && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { + p.Spadj = int32(-p.From.Offset) + } + + case obj.AGETCALLERPC: + if cursym.Leaf() { + /* MOVW LR, Rd */ + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + } else { + /* MOVW (RSP), Rd */ + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + } + } + + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { + f := c.cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + } +} + +func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { + if c.ctxt.Flag_maymorestack != "" { + // Save LR and make room for REGCTXT. + const frameSize = 8 + // MOVW.W R14,$-8(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVW + p.Scond |= C_WBIT + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_MEM + p.To.Offset = -frameSize + p.To.Reg = REGSP + p.Spadj = frameSize + + // MOVW REGCTXT, 4(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REGCTXT + p.To.Type = obj.TYPE_MEM + p.To.Offset = 4 + p.To.Reg = REGSP + + // CALL maymorestack + p = obj.Appendp(p, c.newprog) + p.As = obj.ACALL + p.To.Type = obj.TYPE_BRANCH + // See ../x86/obj6.go + p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI()) + + // Restore REGCTXT and LR. + + // MOVW 4(SP), REGCTXT + p = obj.Appendp(p, c.newprog) + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Offset = 4 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGCTXT + + // MOVW.P 8(SP), R14 + p.As = AMOVW + p.Scond |= C_PBIT + p.From.Type = obj.TYPE_MEM + p.From.Offset = frameSize + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + p.Spadj = -frameSize + } + + // Jump back to here after morestack returns. + startPred := p + + // MOVW g_stackguard(g), R1 + p = obj.Appendp(p, c.newprog) + + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0 + if c.cursym.CFunc() { + p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R1 + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + if framesize <= objabi.StackSmall { + // small stack: SP < stackguard + // CMP stackguard, SP + p = obj.Appendp(p, c.newprog) + + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R1 + p.Reg = REGSP + } else if framesize <= objabi.StackBig { + // large stack: SP-framesize < stackguard-StackSmall + // MOVW $-(framesize-StackSmall)(SP), R2 + // CMP stackguard, R2 + p = obj.Appendp(p, c.newprog) + + p.As = AMOVW + p.From.Type = obj.TYPE_ADDR + p.From.Reg = REGSP + p.From.Offset = -(int64(framesize) - objabi.StackSmall) + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R2 + + p = obj.Appendp(p, c.newprog) + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R1 + p.Reg = REG_R2 + } else { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // // Try subtracting from SP and check for underflow. + // // If this underflows, it sets C to 0. + // SUB.S $(framesize-StackSmall), SP, R2 + // // If C is 1 (unsigned >=), compare with guard. + // CMP.HS stackguard, R2 + + p = obj.Appendp(p, c.newprog) + p.As = ASUB + p.Scond = C_SBIT + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(framesize) - objabi.StackSmall + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R2 + + p = obj.Appendp(p, c.newprog) + p.As = ACMP + p.Scond = C_SCOND_HS + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R1 + p.Reg = REG_R2 + } + + // BLS call-to-morestack (C is 0 or Z is 1) + bls := obj.Appendp(p, c.newprog) + bls.As = ABLS + bls.To.Type = obj.TYPE_BRANCH + + end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) + + var last *obj.Prog + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { + } + + // Now we are at the end of the function, but logically + // we are still in function prologue. We need to fix the + // SP data and PCDATA. + spfix := obj.Appendp(last, c.newprog) + spfix.As = obj.ANOP + spfix.Spadj = -framesize + + pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog) + pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog) + + // MOVW LR, R3 + movw := obj.Appendp(pcdata, c.newprog) + movw.As = AMOVW + movw.From.Type = obj.TYPE_REG + movw.From.Reg = REGLINK + movw.To.Type = obj.TYPE_REG + movw.To.Reg = REG_R3 + + bls.To.SetTarget(movw) + + // BL runtime.morestack + call := obj.Appendp(movw, c.newprog) + call.As = obj.ACALL + call.To.Type = obj.TYPE_BRANCH + morestack := "runtime.morestack" + switch { + case c.cursym.CFunc(): + morestack = "runtime.morestackc" + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): + morestack = "runtime.morestack_noctxt" + } + call.To.Sym = c.ctxt.Lookup(morestack) + + pcdata = c.ctxt.EndUnsafePoint(call, c.newprog, -1) + + // B start + b := obj.Appendp(pcdata, c.newprog) + b.As = obj.AJMP + b.To.Type = obj.TYPE_BRANCH + b.To.SetTarget(startPred.Link) + b.Spadj = +framesize + + return end +} + +var unaryDst = map[obj.As]bool{ + ASWI: true, + AWORD: true, +} + +var Linkarm = obj.LinkArch{ + Arch: sys.ArchARM, + Init: buildop, + Preprocess: preprocess, + Assemble: span5, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: ARMDWARFRegisters, +} diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go new file mode 100644 index 0000000..3bfc675 --- /dev/null +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -0,0 +1,1213 @@ +// cmd/7c/7.out.h from Vita Nuova. +// https://code.google.com/p/ken-cc/source/browse/src/cmd/7c/7.out.h +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm64 + +import "cmd/internal/obj" + +const ( + NSNAME = 8 + NSYM = 50 + NREG = 32 /* number of general registers */ + NFREG = 32 /* number of floating point registers */ +) + +// General purpose registers, kept in the low bits of Prog.Reg. +const ( + // integer + REG_R0 = obj.RBaseARM64 + iota + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + REG_R16 + REG_R17 + REG_R18 + REG_R19 + REG_R20 + REG_R21 + REG_R22 + REG_R23 + REG_R24 + REG_R25 + REG_R26 + REG_R27 + REG_R28 + REG_R29 + REG_R30 + REG_R31 + + // scalar floating point + REG_F0 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + REG_F16 + REG_F17 + REG_F18 + REG_F19 + REG_F20 + REG_F21 + REG_F22 + REG_F23 + REG_F24 + REG_F25 + REG_F26 + REG_F27 + REG_F28 + REG_F29 + REG_F30 + REG_F31 + + // SIMD + REG_V0 + REG_V1 + REG_V2 + REG_V3 + REG_V4 + REG_V5 + REG_V6 + REG_V7 + REG_V8 + REG_V9 + REG_V10 + REG_V11 + REG_V12 + REG_V13 + REG_V14 + REG_V15 + REG_V16 + REG_V17 + REG_V18 + REG_V19 + REG_V20 + REG_V21 + REG_V22 + REG_V23 + REG_V24 + REG_V25 + REG_V26 + REG_V27 + REG_V28 + REG_V29 + REG_V30 + REG_V31 + + REG_RSP = REG_V31 + 32 // to differentiate ZR/SP, REG_RSP&0x1f = 31 +) + +// bits 0-4 indicates register: Vn +// bits 5-8 indicates arrangement: <T> +const ( + REG_ARNG = obj.RBaseARM64 + 1<<10 + iota<<9 // Vn.<T> + REG_ELEM // Vn.<T>[index] + REG_ELEM_END +) + +// Not registers, but flags that can be combined with regular register +// constants to indicate extended register conversion. When checking, +// you should subtract obj.RBaseARM64 first. From this difference, bit 11 +// indicates extended register, bits 8-10 select the conversion mode. +// REG_LSL is the index shift specifier, bit 9 indicates shifted offset register. +const REG_LSL = obj.RBaseARM64 + 1<<9 +const REG_EXT = obj.RBaseARM64 + 1<<11 + +const ( + REG_UXTB = REG_EXT + iota<<8 + REG_UXTH + REG_UXTW + REG_UXTX + REG_SXTB + REG_SXTH + REG_SXTW + REG_SXTX +) + +// Special registers, after subtracting obj.RBaseARM64, bit 12 indicates +// a special register and the low bits select the register. +// SYSREG_END is the last item in the automatically generated system register +// declaration, and it is defined in the sysRegEnc.go file. +// Define the special register after REG_SPECIAL, the first value of it should be +// REG_{name} = SYSREG_END + iota. +const ( + REG_SPECIAL = obj.RBaseARM64 + 1<<12 +) + +// Register assignments: +// +// compiler allocates R0 up as temps +// compiler allocates register variables R7-R25 +// compiler allocates external registers R26 down +// +// compiler allocates register variables F7-F26 +// compiler allocates external registers F26 down +const ( + REGMIN = REG_R7 // register variables allocated from here to REGMAX + REGRT1 = REG_R16 // ARM64 IP0, external linker may use as a scrach register in trampoline + REGRT2 = REG_R17 // ARM64 IP1, external linker may use as a scrach register in trampoline + REGPR = REG_R18 // ARM64 platform register, unused in the Go toolchain + REGMAX = REG_R25 + + REGCTXT = REG_R26 // environment for closures + REGTMP = REG_R27 // reserved for liblink + REGG = REG_R28 // G + REGFP = REG_R29 // frame pointer + REGLINK = REG_R30 + + // ARM64 uses R31 as both stack pointer and zero register, + // depending on the instruction. To differentiate RSP from ZR, + // we use a different numeric value for REGZERO and REGSP. + REGZERO = REG_R31 + REGSP = REG_RSP + + FREGRET = REG_F0 + FREGMIN = REG_F7 // first register variable + FREGMAX = REG_F26 // last register variable for 7g only + FREGEXT = REG_F26 // first external register +) + +// http://infocenter.arm.com/help/topic/com.arm.doc.ecm0665627/abi_sve_aadwarf_100985_0000_00_en.pdf +var ARM64DWARFRegisters = map[int16]int16{ + REG_R0: 0, + REG_R1: 1, + REG_R2: 2, + REG_R3: 3, + REG_R4: 4, + REG_R5: 5, + REG_R6: 6, + REG_R7: 7, + REG_R8: 8, + REG_R9: 9, + REG_R10: 10, + REG_R11: 11, + REG_R12: 12, + REG_R13: 13, + REG_R14: 14, + REG_R15: 15, + REG_R16: 16, + REG_R17: 17, + REG_R18: 18, + REG_R19: 19, + REG_R20: 20, + REG_R21: 21, + REG_R22: 22, + REG_R23: 23, + REG_R24: 24, + REG_R25: 25, + REG_R26: 26, + REG_R27: 27, + REG_R28: 28, + REG_R29: 29, + REG_R30: 30, + + // floating point + REG_F0: 64, + REG_F1: 65, + REG_F2: 66, + REG_F3: 67, + REG_F4: 68, + REG_F5: 69, + REG_F6: 70, + REG_F7: 71, + REG_F8: 72, + REG_F9: 73, + REG_F10: 74, + REG_F11: 75, + REG_F12: 76, + REG_F13: 77, + REG_F14: 78, + REG_F15: 79, + REG_F16: 80, + REG_F17: 81, + REG_F18: 82, + REG_F19: 83, + REG_F20: 84, + REG_F21: 85, + REG_F22: 86, + REG_F23: 87, + REG_F24: 88, + REG_F25: 89, + REG_F26: 90, + REG_F27: 91, + REG_F28: 92, + REG_F29: 93, + REG_F30: 94, + REG_F31: 95, + + // SIMD + REG_V0: 64, + REG_V1: 65, + REG_V2: 66, + REG_V3: 67, + REG_V4: 68, + REG_V5: 69, + REG_V6: 70, + REG_V7: 71, + REG_V8: 72, + REG_V9: 73, + REG_V10: 74, + REG_V11: 75, + REG_V12: 76, + REG_V13: 77, + REG_V14: 78, + REG_V15: 79, + REG_V16: 80, + REG_V17: 81, + REG_V18: 82, + REG_V19: 83, + REG_V20: 84, + REG_V21: 85, + REG_V22: 86, + REG_V23: 87, + REG_V24: 88, + REG_V25: 89, + REG_V26: 90, + REG_V27: 91, + REG_V28: 92, + REG_V29: 93, + REG_V30: 94, + REG_V31: 95, +} + +const ( + BIG = 2048 - 8 +) + +const ( + /* mark flags */ + LABEL = 1 << iota + LEAF + FLOAT + BRANCH + LOAD + FCMP + SYNC + LIST + FOLL + NOSCHED +) + +const ( + // optab is sorted based on the order of these constants + // and the first match is chosen. + // The more specific class needs to come earlier. + C_NONE = iota + C_REG // R0..R30 + C_ZREG // R0..R30, ZR + C_RSP // R0..R30, RSP + C_FREG // F0..F31 + C_VREG // V0..V31 + C_PAIR // (Rn, Rm) + C_SHIFT // Rn<<2 + C_EXTREG // Rn.UXTB[<<3] + C_SPR // REG_NZCV + C_COND // condition code, EQ, NE, etc. + C_SPOP // special operand, PLDL1KEEP, VMALLE1IS, etc. + C_ARNG // Vn.<T> + C_ELEM // Vn.<T>[index] + C_LIST // [V1, V2, V3] + + C_ZCON // $0 + C_ABCON0 // could be C_ADDCON0 or C_BITCON + C_ADDCON0 // 12-bit unsigned, unshifted + C_ABCON // could be C_ADDCON or C_BITCON + C_AMCON // could be C_ADDCON or C_MOVCON + C_ADDCON // 12-bit unsigned, shifted left by 0 or 12 + C_MBCON // could be C_MOVCON or C_BITCON + C_MOVCON // generated by a 16-bit constant, optionally inverted and/or shifted by multiple of 16 + C_BITCON // bitfield and logical immediate masks + C_ADDCON2 // 24-bit constant + C_LCON // 32-bit constant + C_MOVCON2 // a constant that can be loaded with one MOVZ/MOVN and one MOVK + C_MOVCON3 // a constant that can be loaded with one MOVZ/MOVN and two MOVKs + C_VCON // 64-bit constant + C_FCON // floating-point constant + C_VCONADDR // 64-bit memory address + + C_AACON // ADDCON offset in auto constant $a(FP) + C_AACON2 // 24-bit offset in auto constant $a(FP) + C_LACON // 32-bit offset in auto constant $a(FP) + C_AECON // ADDCON offset in extern constant $e(SB) + + // TODO(aram): only one branch class should be enough + C_SBRA // for TYPE_BRANCH + C_LBRA + + C_ZAUTO // 0(RSP) + C_NSAUTO_16 // -256 <= x < 0, 0 mod 16 + C_NSAUTO_8 // -256 <= x < 0, 0 mod 8 + C_NSAUTO_4 // -256 <= x < 0, 0 mod 4 + C_NSAUTO // -256 <= x < 0 + C_NPAUTO_16 // -512 <= x < 0, 0 mod 16 + C_NPAUTO // -512 <= x < 0, 0 mod 8 + C_NQAUTO_16 // -1024 <= x < 0, 0 mod 16 + C_NAUTO4K // -4095 <= x < 0 + C_PSAUTO_16 // 0 to 255, 0 mod 16 + C_PSAUTO_8 // 0 to 255, 0 mod 8 + C_PSAUTO_4 // 0 to 255, 0 mod 4 + C_PSAUTO // 0 to 255 + C_PPAUTO_16 // 0 to 504, 0 mod 16 + C_PPAUTO // 0 to 504, 0 mod 8 + C_PQAUTO_16 // 0 to 1008, 0 mod 16 + C_UAUTO4K_16 // 0 to 4095, 0 mod 16 + C_UAUTO4K_8 // 0 to 4095, 0 mod 8 + C_UAUTO4K_4 // 0 to 4095, 0 mod 4 + C_UAUTO4K_2 // 0 to 4095, 0 mod 2 + C_UAUTO4K // 0 to 4095 + C_UAUTO8K_16 // 0 to 8190, 0 mod 16 + C_UAUTO8K_8 // 0 to 8190, 0 mod 8 + C_UAUTO8K_4 // 0 to 8190, 0 mod 4 + C_UAUTO8K // 0 to 8190, 0 mod 2 + C_PSAUTO + C_UAUTO16K_16 // 0 to 16380, 0 mod 16 + C_UAUTO16K_8 // 0 to 16380, 0 mod 8 + C_UAUTO16K // 0 to 16380, 0 mod 4 + C_PSAUTO + C_UAUTO32K_16 // 0 to 32760, 0 mod 16 + C_PSAUTO + C_UAUTO32K // 0 to 32760, 0 mod 8 + C_PSAUTO + C_UAUTO64K // 0 to 65520, 0 mod 16 + C_PSAUTO + C_LAUTO // any other 32-bit constant + + C_SEXT1 // 0 to 4095, direct + C_SEXT2 // 0 to 8190 + C_SEXT4 // 0 to 16380 + C_SEXT8 // 0 to 32760 + C_SEXT16 // 0 to 65520 + C_LEXT + + C_ZOREG // 0(R) + C_NSOREG_16 // must mirror C_NSAUTO_16, etc + C_NSOREG_8 + C_NSOREG_4 + C_NSOREG + C_NPOREG_16 + C_NPOREG + C_NQOREG_16 + C_NOREG4K + C_PSOREG_16 + C_PSOREG_8 + C_PSOREG_4 + C_PSOREG + C_PPOREG_16 + C_PPOREG + C_PQOREG_16 + C_UOREG4K_16 + C_UOREG4K_8 + C_UOREG4K_4 + C_UOREG4K_2 + C_UOREG4K + C_UOREG8K_16 + C_UOREG8K_8 + C_UOREG8K_4 + C_UOREG8K + C_UOREG16K_16 + C_UOREG16K_8 + C_UOREG16K + C_UOREG32K_16 + C_UOREG32K + C_UOREG64K + C_LOREG + + C_ADDR // TODO(aram): explain difference from C_VCONADDR + + // The GOT slot for a symbol in -dynlink mode. + C_GOTADDR + + // TLS "var" in local exec mode: will become a constant offset from + // thread local base that is ultimately chosen by the program linker. + C_TLS_LE + + // TLS "var" in initial exec mode: will become a memory address (chosen + // by the program linker) that the dynamic linker will fill with the + // offset from the thread local base. + C_TLS_IE + + C_ROFF // register offset (including register extended) + + C_GOK + C_TEXTSIZE + C_NCLASS // must be last +) + +const ( + C_XPRE = 1 << 6 // match arm.C_WBIT, so Prog.String know how to print it + C_XPOST = 1 << 5 // match arm.C_PBIT, so Prog.String know how to print it +) + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p arm64 + +const ( + AADC = obj.ABaseARM64 + obj.A_ARCHSPECIFIC + iota + AADCS + AADCSW + AADCW + AADD + AADDS + AADDSW + AADDW + AADR + AADRP + AAND + AANDS + AANDSW + AANDW + AASR + AASRW + AAT + ABFI + ABFIW + ABFM + ABFMW + ABFXIL + ABFXILW + ABIC + ABICS + ABICSW + ABICW + ABRK + ACBNZ + ACBNZW + ACBZ + ACBZW + ACCMN + ACCMNW + ACCMP + ACCMPW + ACINC + ACINCW + ACINV + ACINVW + ACLREX + ACLS + ACLSW + ACLZ + ACLZW + ACMN + ACMNW + ACMP + ACMPW + ACNEG + ACNEGW + ACRC32B + ACRC32CB + ACRC32CH + ACRC32CW + ACRC32CX + ACRC32H + ACRC32W + ACRC32X + ACSEL + ACSELW + ACSET + ACSETM + ACSETMW + ACSETW + ACSINC + ACSINCW + ACSINV + ACSINVW + ACSNEG + ACSNEGW + ADC + ADCPS1 + ADCPS2 + ADCPS3 + ADMB + ADRPS + ADSB + AEON + AEONW + AEOR + AEORW + AERET + AEXTR + AEXTRW + AHINT + AHLT + AHVC + AIC + AISB + ALDADDAB + ALDADDAD + ALDADDAH + ALDADDAW + ALDADDALB + ALDADDALD + ALDADDALH + ALDADDALW + ALDADDB + ALDADDD + ALDADDH + ALDADDW + ALDADDLB + ALDADDLD + ALDADDLH + ALDADDLW + ALDAR + ALDARB + ALDARH + ALDARW + ALDAXP + ALDAXPW + ALDAXR + ALDAXRB + ALDAXRH + ALDAXRW + ALDCLRAB + ALDCLRAD + ALDCLRAH + ALDCLRAW + ALDCLRALB + ALDCLRALD + ALDCLRALH + ALDCLRALW + ALDCLRB + ALDCLRD + ALDCLRH + ALDCLRW + ALDCLRLB + ALDCLRLD + ALDCLRLH + ALDCLRLW + ALDEORAB + ALDEORAD + ALDEORAH + ALDEORAW + ALDEORALB + ALDEORALD + ALDEORALH + ALDEORALW + ALDEORB + ALDEORD + ALDEORH + ALDEORW + ALDEORLB + ALDEORLD + ALDEORLH + ALDEORLW + ALDORAB + ALDORAD + ALDORAH + ALDORAW + ALDORALB + ALDORALD + ALDORALH + ALDORALW + ALDORB + ALDORD + ALDORH + ALDORW + ALDORLB + ALDORLD + ALDORLH + ALDORLW + ALDP + ALDPW + ALDPSW + ALDXR + ALDXRB + ALDXRH + ALDXRW + ALDXP + ALDXPW + ALSL + ALSLW + ALSR + ALSRW + AMADD + AMADDW + AMNEG + AMNEGW + AMOVK + AMOVKW + AMOVN + AMOVNW + AMOVZ + AMOVZW + AMRS + AMSR + AMSUB + AMSUBW + AMUL + AMULW + AMVN + AMVNW + ANEG + ANEGS + ANEGSW + ANEGW + ANGC + ANGCS + ANGCSW + ANGCW + ANOOP + AORN + AORNW + AORR + AORRW + APRFM + APRFUM + ARBIT + ARBITW + AREM + AREMW + AREV + AREV16 + AREV16W + AREV32 + AREVW + AROR + ARORW + ASBC + ASBCS + ASBCSW + ASBCW + ASBFIZ + ASBFIZW + ASBFM + ASBFMW + ASBFX + ASBFXW + ASDIV + ASDIVW + ASEV + ASEVL + ASMADDL + ASMC + ASMNEGL + ASMSUBL + ASMULH + ASMULL + ASTXR + ASTXRB + ASTXRH + ASTXP + ASTXPW + ASTXRW + ASTLP + ASTLPW + ASTLR + ASTLRB + ASTLRH + ASTLRW + ASTLXP + ASTLXPW + ASTLXR + ASTLXRB + ASTLXRH + ASTLXRW + ASTP + ASTPW + ASUB + ASUBS + ASUBSW + ASUBW + ASVC + ASXTB + ASXTBW + ASXTH + ASXTHW + ASXTW + ASYS + ASYSL + ATBNZ + ATBZ + ATLBI + ATST + ATSTW + AUBFIZ + AUBFIZW + AUBFM + AUBFMW + AUBFX + AUBFXW + AUDIV + AUDIVW + AUMADDL + AUMNEGL + AUMSUBL + AUMULH + AUMULL + AUREM + AUREMW + AUXTB + AUXTH + AUXTW + AUXTBW + AUXTHW + AWFE + AWFI + AYIELD + AMOVB + AMOVBU + AMOVH + AMOVHU + AMOVW + AMOVWU + AMOVD + AMOVNP + AMOVNPW + AMOVP + AMOVPD + AMOVPQ + AMOVPS + AMOVPSW + AMOVPW + ASWPAD + ASWPAW + ASWPAH + ASWPAB + ASWPALD + ASWPALW + ASWPALH + ASWPALB + ASWPD + ASWPW + ASWPH + ASWPB + ASWPLD + ASWPLW + ASWPLH + ASWPLB + ACASD + ACASW + ACASH + ACASB + ACASAD + ACASAW + ACASLD + ACASLW + ACASALD + ACASALW + ACASALH + ACASALB + ACASPD + ACASPW + ABEQ + ABNE + ABCS + ABHS + ABCC + ABLO + ABMI + ABPL + ABVS + ABVC + ABHI + ABLS + ABGE + ABLT + ABGT + ABLE + AFABSD + AFABSS + AFADDD + AFADDS + AFCCMPD + AFCCMPED + AFCCMPS + AFCCMPES + AFCMPD + AFCMPED + AFCMPES + AFCMPS + AFCVTSD + AFCVTDS + AFCVTZSD + AFCVTZSDW + AFCVTZSS + AFCVTZSSW + AFCVTZUD + AFCVTZUDW + AFCVTZUS + AFCVTZUSW + AFDIVD + AFDIVS + AFLDPD + AFLDPQ + AFLDPS + AFMOVQ + AFMOVD + AFMOVS + AVMOVQ + AVMOVD + AVMOVS + AFMULD + AFMULS + AFNEGD + AFNEGS + AFSQRTD + AFSQRTS + AFSTPD + AFSTPQ + AFSTPS + AFSUBD + AFSUBS + ASCVTFD + ASCVTFS + ASCVTFWD + ASCVTFWS + AUCVTFD + AUCVTFS + AUCVTFWD + AUCVTFWS + AWORD + ADWORD + AFCSELS + AFCSELD + AFMAXS + AFMINS + AFMAXD + AFMIND + AFMAXNMS + AFMAXNMD + AFNMULS + AFNMULD + AFRINTNS + AFRINTND + AFRINTPS + AFRINTPD + AFRINTMS + AFRINTMD + AFRINTZS + AFRINTZD + AFRINTAS + AFRINTAD + AFRINTXS + AFRINTXD + AFRINTIS + AFRINTID + AFMADDS + AFMADDD + AFMSUBS + AFMSUBD + AFNMADDS + AFNMADDD + AFNMSUBS + AFNMSUBD + AFMINNMS + AFMINNMD + AFCVTDH + AFCVTHS + AFCVTHD + AFCVTSH + AAESD + AAESE + AAESIMC + AAESMC + ASHA1C + ASHA1H + ASHA1M + ASHA1P + ASHA1SU0 + ASHA1SU1 + ASHA256H + ASHA256H2 + ASHA256SU0 + ASHA256SU1 + ASHA512H + ASHA512H2 + ASHA512SU0 + ASHA512SU1 + AVADD + AVADDP + AVAND + AVBIF + AVBCAX + AVCMEQ + AVCNT + AVEOR + AVEOR3 + AVMOV + AVLD1 + AVLD2 + AVLD3 + AVLD4 + AVLD1R + AVLD2R + AVLD3R + AVLD4R + AVORR + AVREV16 + AVREV32 + AVREV64 + AVST1 + AVST2 + AVST3 + AVST4 + AVDUP + AVADDV + AVMOVI + AVUADDLV + AVSUB + AVFMLA + AVFMLS + AVPMULL + AVPMULL2 + AVEXT + AVRBIT + AVRAX1 + AVUMAX + AVUMIN + AVUSHR + AVUSHLL + AVUSHLL2 + AVUXTL + AVUXTL2 + AVUZP1 + AVUZP2 + AVSHL + AVSRI + AVSLI + AVBSL + AVBIT + AVTBL + AVTBX + AVXAR + AVZIP1 + AVZIP2 + AVCMTST + AVUADDW2 + AVUADDW + AVUSRA + AVTRN1 + AVTRN2 + ALAST + AB = obj.AJMP + ABL = obj.ACALL +) + +const ( + // shift types + SHIFT_LL = 0 << 22 + SHIFT_LR = 1 << 22 + SHIFT_AR = 2 << 22 + SHIFT_ROR = 3 << 22 +) + +// Arrangement for ARM64 SIMD instructions +const ( + // arrangement types + ARNG_8B = iota + ARNG_16B + ARNG_1D + ARNG_4H + ARNG_8H + ARNG_2S + ARNG_4S + ARNG_2D + ARNG_1Q + ARNG_B + ARNG_H + ARNG_S + ARNG_D +) + +//go:generate stringer -type SpecialOperand -trimprefix SPOP_ +type SpecialOperand int + +const ( + // PRFM + SPOP_PLDL1KEEP SpecialOperand = iota // must be the first one + SPOP_BEGIN SpecialOperand = iota - 1 // set as the lower bound + SPOP_PLDL1STRM + SPOP_PLDL2KEEP + SPOP_PLDL2STRM + SPOP_PLDL3KEEP + SPOP_PLDL3STRM + SPOP_PLIL1KEEP + SPOP_PLIL1STRM + SPOP_PLIL2KEEP + SPOP_PLIL2STRM + SPOP_PLIL3KEEP + SPOP_PLIL3STRM + SPOP_PSTL1KEEP + SPOP_PSTL1STRM + SPOP_PSTL2KEEP + SPOP_PSTL2STRM + SPOP_PSTL3KEEP + SPOP_PSTL3STRM + + // TLBI + SPOP_VMALLE1IS + SPOP_VAE1IS + SPOP_ASIDE1IS + SPOP_VAAE1IS + SPOP_VALE1IS + SPOP_VAALE1IS + SPOP_VMALLE1 + SPOP_VAE1 + SPOP_ASIDE1 + SPOP_VAAE1 + SPOP_VALE1 + SPOP_VAALE1 + SPOP_IPAS2E1IS + SPOP_IPAS2LE1IS + SPOP_ALLE2IS + SPOP_VAE2IS + SPOP_ALLE1IS + SPOP_VALE2IS + SPOP_VMALLS12E1IS + SPOP_IPAS2E1 + SPOP_IPAS2LE1 + SPOP_ALLE2 + SPOP_VAE2 + SPOP_ALLE1 + SPOP_VALE2 + SPOP_VMALLS12E1 + SPOP_ALLE3IS + SPOP_VAE3IS + SPOP_VALE3IS + SPOP_ALLE3 + SPOP_VAE3 + SPOP_VALE3 + SPOP_VMALLE1OS + SPOP_VAE1OS + SPOP_ASIDE1OS + SPOP_VAAE1OS + SPOP_VALE1OS + SPOP_VAALE1OS + SPOP_RVAE1IS + SPOP_RVAAE1IS + SPOP_RVALE1IS + SPOP_RVAALE1IS + SPOP_RVAE1OS + SPOP_RVAAE1OS + SPOP_RVALE1OS + SPOP_RVAALE1OS + SPOP_RVAE1 + SPOP_RVAAE1 + SPOP_RVALE1 + SPOP_RVAALE1 + SPOP_RIPAS2E1IS + SPOP_RIPAS2LE1IS + SPOP_ALLE2OS + SPOP_VAE2OS + SPOP_ALLE1OS + SPOP_VALE2OS + SPOP_VMALLS12E1OS + SPOP_RVAE2IS + SPOP_RVALE2IS + SPOP_IPAS2E1OS + SPOP_RIPAS2E1 + SPOP_RIPAS2E1OS + SPOP_IPAS2LE1OS + SPOP_RIPAS2LE1 + SPOP_RIPAS2LE1OS + SPOP_RVAE2OS + SPOP_RVALE2OS + SPOP_RVAE2 + SPOP_RVALE2 + SPOP_ALLE3OS + SPOP_VAE3OS + SPOP_VALE3OS + SPOP_RVAE3IS + SPOP_RVALE3IS + SPOP_RVAE3OS + SPOP_RVALE3OS + SPOP_RVAE3 + SPOP_RVALE3 + + // DC + SPOP_IVAC + SPOP_ISW + SPOP_CSW + SPOP_CISW + SPOP_ZVA + SPOP_CVAC + SPOP_CVAU + SPOP_CIVAC + SPOP_IGVAC + SPOP_IGSW + SPOP_IGDVAC + SPOP_IGDSW + SPOP_CGSW + SPOP_CGDSW + SPOP_CIGSW + SPOP_CIGDSW + SPOP_GVA + SPOP_GZVA + SPOP_CGVAC + SPOP_CGDVAC + SPOP_CGVAP + SPOP_CGDVAP + SPOP_CGVADP + SPOP_CGDVADP + SPOP_CIGVAC + SPOP_CIGDVAC + SPOP_CVAP + SPOP_CVADP + + // PSTATE fields + SPOP_DAIFSet + SPOP_DAIFClr + + // Condition code, EQ, NE, etc. Their relative order to EQ is matter. + SPOP_EQ + SPOP_NE + SPOP_HS + SPOP_LO + SPOP_MI + SPOP_PL + SPOP_VS + SPOP_VC + SPOP_HI + SPOP_LS + SPOP_GE + SPOP_LT + SPOP_GT + SPOP_LE + SPOP_AL + SPOP_NV + // Condition code end. + + SPOP_END +) diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go new file mode 100644 index 0000000..03222f9 --- /dev/null +++ b/src/cmd/internal/obj/arm64/anames.go @@ -0,0 +1,544 @@ +// Code generated by stringer -i a.out.go -o anames.go -p arm64; DO NOT EDIT. + +package arm64 + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "ADC", + "ADCS", + "ADCSW", + "ADCW", + "ADD", + "ADDS", + "ADDSW", + "ADDW", + "ADR", + "ADRP", + "AND", + "ANDS", + "ANDSW", + "ANDW", + "ASR", + "ASRW", + "AT", + "BFI", + "BFIW", + "BFM", + "BFMW", + "BFXIL", + "BFXILW", + "BIC", + "BICS", + "BICSW", + "BICW", + "BRK", + "CBNZ", + "CBNZW", + "CBZ", + "CBZW", + "CCMN", + "CCMNW", + "CCMP", + "CCMPW", + "CINC", + "CINCW", + "CINV", + "CINVW", + "CLREX", + "CLS", + "CLSW", + "CLZ", + "CLZW", + "CMN", + "CMNW", + "CMP", + "CMPW", + "CNEG", + "CNEGW", + "CRC32B", + "CRC32CB", + "CRC32CH", + "CRC32CW", + "CRC32CX", + "CRC32H", + "CRC32W", + "CRC32X", + "CSEL", + "CSELW", + "CSET", + "CSETM", + "CSETMW", + "CSETW", + "CSINC", + "CSINCW", + "CSINV", + "CSINVW", + "CSNEG", + "CSNEGW", + "DC", + "DCPS1", + "DCPS2", + "DCPS3", + "DMB", + "DRPS", + "DSB", + "EON", + "EONW", + "EOR", + "EORW", + "ERET", + "EXTR", + "EXTRW", + "HINT", + "HLT", + "HVC", + "IC", + "ISB", + "LDADDAB", + "LDADDAD", + "LDADDAH", + "LDADDAW", + "LDADDALB", + "LDADDALD", + "LDADDALH", + "LDADDALW", + "LDADDB", + "LDADDD", + "LDADDH", + "LDADDW", + "LDADDLB", + "LDADDLD", + "LDADDLH", + "LDADDLW", + "LDAR", + "LDARB", + "LDARH", + "LDARW", + "LDAXP", + "LDAXPW", + "LDAXR", + "LDAXRB", + "LDAXRH", + "LDAXRW", + "LDCLRAB", + "LDCLRAD", + "LDCLRAH", + "LDCLRAW", + "LDCLRALB", + "LDCLRALD", + "LDCLRALH", + "LDCLRALW", + "LDCLRB", + "LDCLRD", + "LDCLRH", + "LDCLRW", + "LDCLRLB", + "LDCLRLD", + "LDCLRLH", + "LDCLRLW", + "LDEORAB", + "LDEORAD", + "LDEORAH", + "LDEORAW", + "LDEORALB", + "LDEORALD", + "LDEORALH", + "LDEORALW", + "LDEORB", + "LDEORD", + "LDEORH", + "LDEORW", + "LDEORLB", + "LDEORLD", + "LDEORLH", + "LDEORLW", + "LDORAB", + "LDORAD", + "LDORAH", + "LDORAW", + "LDORALB", + "LDORALD", + "LDORALH", + "LDORALW", + "LDORB", + "LDORD", + "LDORH", + "LDORW", + "LDORLB", + "LDORLD", + "LDORLH", + "LDORLW", + "LDP", + "LDPW", + "LDPSW", + "LDXR", + "LDXRB", + "LDXRH", + "LDXRW", + "LDXP", + "LDXPW", + "LSL", + "LSLW", + "LSR", + "LSRW", + "MADD", + "MADDW", + "MNEG", + "MNEGW", + "MOVK", + "MOVKW", + "MOVN", + "MOVNW", + "MOVZ", + "MOVZW", + "MRS", + "MSR", + "MSUB", + "MSUBW", + "MUL", + "MULW", + "MVN", + "MVNW", + "NEG", + "NEGS", + "NEGSW", + "NEGW", + "NGC", + "NGCS", + "NGCSW", + "NGCW", + "NOOP", + "ORN", + "ORNW", + "ORR", + "ORRW", + "PRFM", + "PRFUM", + "RBIT", + "RBITW", + "REM", + "REMW", + "REV", + "REV16", + "REV16W", + "REV32", + "REVW", + "ROR", + "RORW", + "SBC", + "SBCS", + "SBCSW", + "SBCW", + "SBFIZ", + "SBFIZW", + "SBFM", + "SBFMW", + "SBFX", + "SBFXW", + "SDIV", + "SDIVW", + "SEV", + "SEVL", + "SMADDL", + "SMC", + "SMNEGL", + "SMSUBL", + "SMULH", + "SMULL", + "STXR", + "STXRB", + "STXRH", + "STXP", + "STXPW", + "STXRW", + "STLP", + "STLPW", + "STLR", + "STLRB", + "STLRH", + "STLRW", + "STLXP", + "STLXPW", + "STLXR", + "STLXRB", + "STLXRH", + "STLXRW", + "STP", + "STPW", + "SUB", + "SUBS", + "SUBSW", + "SUBW", + "SVC", + "SXTB", + "SXTBW", + "SXTH", + "SXTHW", + "SXTW", + "SYS", + "SYSL", + "TBNZ", + "TBZ", + "TLBI", + "TST", + "TSTW", + "UBFIZ", + "UBFIZW", + "UBFM", + "UBFMW", + "UBFX", + "UBFXW", + "UDIV", + "UDIVW", + "UMADDL", + "UMNEGL", + "UMSUBL", + "UMULH", + "UMULL", + "UREM", + "UREMW", + "UXTB", + "UXTH", + "UXTW", + "UXTBW", + "UXTHW", + "WFE", + "WFI", + "YIELD", + "MOVB", + "MOVBU", + "MOVH", + "MOVHU", + "MOVW", + "MOVWU", + "MOVD", + "MOVNP", + "MOVNPW", + "MOVP", + "MOVPD", + "MOVPQ", + "MOVPS", + "MOVPSW", + "MOVPW", + "SWPAD", + "SWPAW", + "SWPAH", + "SWPAB", + "SWPALD", + "SWPALW", + "SWPALH", + "SWPALB", + "SWPD", + "SWPW", + "SWPH", + "SWPB", + "SWPLD", + "SWPLW", + "SWPLH", + "SWPLB", + "CASD", + "CASW", + "CASH", + "CASB", + "CASAD", + "CASAW", + "CASLD", + "CASLW", + "CASALD", + "CASALW", + "CASALH", + "CASALB", + "CASPD", + "CASPW", + "BEQ", + "BNE", + "BCS", + "BHS", + "BCC", + "BLO", + "BMI", + "BPL", + "BVS", + "BVC", + "BHI", + "BLS", + "BGE", + "BLT", + "BGT", + "BLE", + "FABSD", + "FABSS", + "FADDD", + "FADDS", + "FCCMPD", + "FCCMPED", + "FCCMPS", + "FCCMPES", + "FCMPD", + "FCMPED", + "FCMPES", + "FCMPS", + "FCVTSD", + "FCVTDS", + "FCVTZSD", + "FCVTZSDW", + "FCVTZSS", + "FCVTZSSW", + "FCVTZUD", + "FCVTZUDW", + "FCVTZUS", + "FCVTZUSW", + "FDIVD", + "FDIVS", + "FLDPD", + "FLDPQ", + "FLDPS", + "FMOVQ", + "FMOVD", + "FMOVS", + "VMOVQ", + "VMOVD", + "VMOVS", + "FMULD", + "FMULS", + "FNEGD", + "FNEGS", + "FSQRTD", + "FSQRTS", + "FSTPD", + "FSTPQ", + "FSTPS", + "FSUBD", + "FSUBS", + "SCVTFD", + "SCVTFS", + "SCVTFWD", + "SCVTFWS", + "UCVTFD", + "UCVTFS", + "UCVTFWD", + "UCVTFWS", + "WORD", + "DWORD", + "FCSELS", + "FCSELD", + "FMAXS", + "FMINS", + "FMAXD", + "FMIND", + "FMAXNMS", + "FMAXNMD", + "FNMULS", + "FNMULD", + "FRINTNS", + "FRINTND", + "FRINTPS", + "FRINTPD", + "FRINTMS", + "FRINTMD", + "FRINTZS", + "FRINTZD", + "FRINTAS", + "FRINTAD", + "FRINTXS", + "FRINTXD", + "FRINTIS", + "FRINTID", + "FMADDS", + "FMADDD", + "FMSUBS", + "FMSUBD", + "FNMADDS", + "FNMADDD", + "FNMSUBS", + "FNMSUBD", + "FMINNMS", + "FMINNMD", + "FCVTDH", + "FCVTHS", + "FCVTHD", + "FCVTSH", + "AESD", + "AESE", + "AESIMC", + "AESMC", + "SHA1C", + "SHA1H", + "SHA1M", + "SHA1P", + "SHA1SU0", + "SHA1SU1", + "SHA256H", + "SHA256H2", + "SHA256SU0", + "SHA256SU1", + "SHA512H", + "SHA512H2", + "SHA512SU0", + "SHA512SU1", + "VADD", + "VADDP", + "VAND", + "VBIF", + "VBCAX", + "VCMEQ", + "VCNT", + "VEOR", + "VEOR3", + "VMOV", + "VLD1", + "VLD2", + "VLD3", + "VLD4", + "VLD1R", + "VLD2R", + "VLD3R", + "VLD4R", + "VORR", + "VREV16", + "VREV32", + "VREV64", + "VST1", + "VST2", + "VST3", + "VST4", + "VDUP", + "VADDV", + "VMOVI", + "VUADDLV", + "VSUB", + "VFMLA", + "VFMLS", + "VPMULL", + "VPMULL2", + "VEXT", + "VRBIT", + "VRAX1", + "VUMAX", + "VUMIN", + "VUSHR", + "VUSHLL", + "VUSHLL2", + "VUXTL", + "VUXTL2", + "VUZP1", + "VUZP2", + "VSHL", + "VSRI", + "VSLI", + "VBSL", + "VBIT", + "VTBL", + "VTBX", + "VXAR", + "VZIP1", + "VZIP2", + "VCMTST", + "VUADDW2", + "VUADDW", + "VUSRA", + "VTRN1", + "VTRN2", + "LAST", +} diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go new file mode 100644 index 0000000..2f20dfe --- /dev/null +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -0,0 +1,124 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package arm64 + +// This order should be strictly consistent to that in a.out.go +var cnames7 = []string{ + "NONE", + "REG", + "ZREG", + "RSP", + "FREG", + "VREG", + "PAIR", + "SHIFT", + "EXTREG", + "SPR", + "SPOP", + "COND", + "ARNG", + "ELEM", + "LIST", + "ZCON", + "ABCON0", + "ADDCON0", + "ABCON", + "AMCON", + "ADDCON", + "MBCON", + "MOVCON", + "BITCON", + "ADDCON2", + "LCON", + "MOVCON2", + "MOVCON3", + "VCON", + "FCON", + "VCONADDR", + "AACON", + "AACON2", + "LACON", + "AECON", + "SBRA", + "LBRA", + "ZAUTO", + "NSAUTO_16", + "NSAUTO_8", + "NSAUTO_4", + "NSAUTO", + "NPAUTO_16", + "NPAUTO", + "NQAUTO_16", + "NAUTO4K", + "PSAUTO_16", + "PSAUTO_8", + "PSAUTO_4", + "PSAUTO", + "PPAUTO_16", + "PPAUTO", + "PQAUTO_16", + "UAUTO4K_16", + "UAUTO4K_8", + "UAUTO4K_4", + "UAUTO4K_2", + "UAUTO4K", + "UAUTO8K_16", + "UAUTO8K_8", + "UAUTO8K_4", + "UAUTO8K", + "UAUTO16K_16", + "UAUTO16K_8", + "UAUTO16K", + "UAUTO32K_8", + "UAUTO32K", + "UAUTO64K", + "LAUTO", + "SEXT1", + "SEXT2", + "SEXT4", + "SEXT8", + "SEXT16", + "LEXT", + "ZOREG", + "NSOREG_16", + "NSOREG_8", + "NSOREG_4", + "NSOREG", + "NPOREG_16", + "NPOREG", + "NQOREG_16", + "NOREG4K", + "PSOREG_16", + "PSOREG_8", + "PSOREG_4", + "PSOREG", + "PPOREG_16", + "PPOREG", + "PQOREG_16", + "UOREG4K_16", + "UOREG4K_8", + "UOREG4K_4", + "UOREG4K_2", + "UOREG4K", + "UOREG8K_16", + "UOREG8K_8", + "UOREG8K_4", + "UOREG8K", + "UOREG16K_16", + "UOREG16K_8", + "UOREG16K", + "UOREG32K_16", + "UOREG32K", + "UOREG64K", + "LOREG", + "ADDR", + "GOTADDR", + "TLS_LE", + "TLS_IE", + "ROFF", + "GOK", + "TEXTSIZE", + "NCLASS", +} diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go new file mode 100644 index 0000000..db18bc8 --- /dev/null +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -0,0 +1,7657 @@ +// cmd/7l/asm.c, cmd/7l/asmout.c, cmd/7l/optab.c, cmd/7l/span.c, cmd/ld/sub.c, cmd/ld/mod.c, from Vita Nuova. +// https://code.google.com/p/ken-cc/source/browse/ +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm64 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "fmt" + "log" + "math" + "sort" + "strings" +) + +// ctxt7 holds state while assembling a single function. +// Each function gets a fresh ctxt7. +// This allows for multiple functions to be safely concurrently assembled. +type ctxt7 struct { + ctxt *obj.Link + newprog obj.ProgAlloc + cursym *obj.LSym + blitrl *obj.Prog + elitrl *obj.Prog + autosize int32 + extrasize int32 + instoffset int64 + pc int64 + pool struct { + start uint32 + size uint32 + } +} + +const ( + funcAlign = 16 +) + +const ( + REGFROM = 1 +) + +type Optab struct { + as obj.As + a1 uint8 + a2 uint8 + a3 uint8 + a4 uint8 + type_ int8 + size_ int8 // the value of this field is not static, use the size() method to return the value + param int16 + flag int8 + scond uint16 +} + +func IsAtomicInstruction(as obj.As) bool { + if _, ok := atomicLDADD[as]; ok { + return true + } + if _, ok := atomicSWP[as]; ok { + return true + } + return false +} + +// known field values of an instruction. +var atomicLDADD = map[obj.As]uint32{ + ALDADDAD: 3<<30 | 0x1c5<<21 | 0x00<<10, + ALDADDAW: 2<<30 | 0x1c5<<21 | 0x00<<10, + ALDADDAH: 1<<30 | 0x1c5<<21 | 0x00<<10, + ALDADDAB: 0<<30 | 0x1c5<<21 | 0x00<<10, + ALDADDALD: 3<<30 | 0x1c7<<21 | 0x00<<10, + ALDADDALW: 2<<30 | 0x1c7<<21 | 0x00<<10, + ALDADDALH: 1<<30 | 0x1c7<<21 | 0x00<<10, + ALDADDALB: 0<<30 | 0x1c7<<21 | 0x00<<10, + ALDADDD: 3<<30 | 0x1c1<<21 | 0x00<<10, + ALDADDW: 2<<30 | 0x1c1<<21 | 0x00<<10, + ALDADDH: 1<<30 | 0x1c1<<21 | 0x00<<10, + ALDADDB: 0<<30 | 0x1c1<<21 | 0x00<<10, + ALDADDLD: 3<<30 | 0x1c3<<21 | 0x00<<10, + ALDADDLW: 2<<30 | 0x1c3<<21 | 0x00<<10, + ALDADDLH: 1<<30 | 0x1c3<<21 | 0x00<<10, + ALDADDLB: 0<<30 | 0x1c3<<21 | 0x00<<10, + ALDCLRAD: 3<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAW: 2<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAH: 1<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAB: 0<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRALD: 3<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALW: 2<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALH: 1<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALB: 0<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRD: 3<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRW: 2<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRH: 1<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRB: 0<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRLD: 3<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLW: 2<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLH: 1<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLB: 0<<30 | 0x1c3<<21 | 0x04<<10, + ALDEORAD: 3<<30 | 0x1c5<<21 | 0x08<<10, + ALDEORAW: 2<<30 | 0x1c5<<21 | 0x08<<10, + ALDEORAH: 1<<30 | 0x1c5<<21 | 0x08<<10, + ALDEORAB: 0<<30 | 0x1c5<<21 | 0x08<<10, + ALDEORALD: 3<<30 | 0x1c7<<21 | 0x08<<10, + ALDEORALW: 2<<30 | 0x1c7<<21 | 0x08<<10, + ALDEORALH: 1<<30 | 0x1c7<<21 | 0x08<<10, + ALDEORALB: 0<<30 | 0x1c7<<21 | 0x08<<10, + ALDEORD: 3<<30 | 0x1c1<<21 | 0x08<<10, + ALDEORW: 2<<30 | 0x1c1<<21 | 0x08<<10, + ALDEORH: 1<<30 | 0x1c1<<21 | 0x08<<10, + ALDEORB: 0<<30 | 0x1c1<<21 | 0x08<<10, + ALDEORLD: 3<<30 | 0x1c3<<21 | 0x08<<10, + ALDEORLW: 2<<30 | 0x1c3<<21 | 0x08<<10, + ALDEORLH: 1<<30 | 0x1c3<<21 | 0x08<<10, + ALDEORLB: 0<<30 | 0x1c3<<21 | 0x08<<10, + ALDORAD: 3<<30 | 0x1c5<<21 | 0x0c<<10, + ALDORAW: 2<<30 | 0x1c5<<21 | 0x0c<<10, + ALDORAH: 1<<30 | 0x1c5<<21 | 0x0c<<10, + ALDORAB: 0<<30 | 0x1c5<<21 | 0x0c<<10, + ALDORALD: 3<<30 | 0x1c7<<21 | 0x0c<<10, + ALDORALW: 2<<30 | 0x1c7<<21 | 0x0c<<10, + ALDORALH: 1<<30 | 0x1c7<<21 | 0x0c<<10, + ALDORALB: 0<<30 | 0x1c7<<21 | 0x0c<<10, + ALDORD: 3<<30 | 0x1c1<<21 | 0x0c<<10, + ALDORW: 2<<30 | 0x1c1<<21 | 0x0c<<10, + ALDORH: 1<<30 | 0x1c1<<21 | 0x0c<<10, + ALDORB: 0<<30 | 0x1c1<<21 | 0x0c<<10, + ALDORLD: 3<<30 | 0x1c3<<21 | 0x0c<<10, + ALDORLW: 2<<30 | 0x1c3<<21 | 0x0c<<10, + ALDORLH: 1<<30 | 0x1c3<<21 | 0x0c<<10, + ALDORLB: 0<<30 | 0x1c3<<21 | 0x0c<<10, +} + +var atomicSWP = map[obj.As]uint32{ + ASWPAD: 3<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAW: 2<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAH: 1<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAB: 0<<30 | 0x1c5<<21 | 0x20<<10, + ASWPALD: 3<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALW: 2<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALH: 1<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALB: 0<<30 | 0x1c7<<21 | 0x20<<10, + ASWPD: 3<<30 | 0x1c1<<21 | 0x20<<10, + ASWPW: 2<<30 | 0x1c1<<21 | 0x20<<10, + ASWPH: 1<<30 | 0x1c1<<21 | 0x20<<10, + ASWPB: 0<<30 | 0x1c1<<21 | 0x20<<10, + ASWPLD: 3<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLW: 2<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLH: 1<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLB: 0<<30 | 0x1c3<<21 | 0x20<<10, + ACASD: 3<<30 | 0x45<<21 | 0x1f<<10, + ACASW: 2<<30 | 0x45<<21 | 0x1f<<10, + ACASH: 1<<30 | 0x45<<21 | 0x1f<<10, + ACASB: 0<<30 | 0x45<<21 | 0x1f<<10, + ACASAD: 3<<30 | 0x47<<21 | 0x1f<<10, + ACASAW: 2<<30 | 0x47<<21 | 0x1f<<10, + ACASLD: 3<<30 | 0x45<<21 | 0x3f<<10, + ACASLW: 2<<30 | 0x45<<21 | 0x3f<<10, + ACASALD: 3<<30 | 0x47<<21 | 0x3f<<10, + ACASALW: 2<<30 | 0x47<<21 | 0x3f<<10, + ACASALH: 1<<30 | 0x47<<21 | 0x3f<<10, + ACASALB: 0<<30 | 0x47<<21 | 0x3f<<10, +} +var atomicCASP = map[obj.As]uint32{ + ACASPD: 1<<30 | 0x41<<21 | 0x1f<<10, + ACASPW: 0<<30 | 0x41<<21 | 0x1f<<10, +} + +var oprange [ALAST & obj.AMask][]Optab + +var xcmp [C_NCLASS][C_NCLASS]bool + +const ( + S32 = 0 << 31 + S64 = 1 << 31 + Sbit = 1 << 29 + LSL0_32 = 2 << 13 + LSL0_64 = 3 << 13 +) + +func OPDP2(x uint32) uint32 { + return 0<<30 | 0<<29 | 0xd6<<21 | x<<10 +} + +func OPDP3(sf uint32, op54 uint32, op31 uint32, o0 uint32) uint32 { + return sf<<31 | op54<<29 | 0x1B<<24 | op31<<21 | o0<<15 +} + +func OPBcc(x uint32) uint32 { + return 0x2A<<25 | 0<<24 | 0<<4 | x&15 +} + +func OPBLR(x uint32) uint32 { + /* x=0, JMP; 1, CALL; 2, RET */ + return 0x6B<<25 | 0<<23 | x<<21 | 0x1F<<16 | 0<<10 +} + +func SYSOP(l uint32, op0 uint32, op1 uint32, crn uint32, crm uint32, op2 uint32, rt uint32) uint32 { + return 0x354<<22 | l<<21 | op0<<19 | op1<<16 | crn&15<<12 | crm&15<<8 | op2<<5 | rt +} + +func SYSHINT(x uint32) uint32 { + return SYSOP(0, 0, 3, 2, 0, x, 0x1F) +} + +func LDSTR(sz uint32, v uint32, opc uint32) uint32 { + return sz<<30 | 7<<27 | v<<26 | opc<<22 +} + +func LD2STR(o uint32) uint32 { + return o &^ (3 << 22) +} + +func LDSTX(sz uint32, o2 uint32, l uint32, o1 uint32, o0 uint32) uint32 { + return sz<<30 | 0x8<<24 | o2<<23 | l<<22 | o1<<21 | o0<<15 +} + +func FPCMP(m uint32, s uint32, type_ uint32, op uint32, op2 uint32) uint32 { + return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | op<<14 | 8<<10 | op2 +} + +func FPCCMP(m uint32, s uint32, type_ uint32, op uint32) uint32 { + return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | 1<<10 | op<<4 +} + +func FPOP1S(m uint32, s uint32, type_ uint32, op uint32) uint32 { + return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | op<<15 | 0x10<<10 +} + +func FPOP2S(m uint32, s uint32, type_ uint32, op uint32) uint32 { + return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | op<<12 | 2<<10 +} + +func FPOP3S(m uint32, s uint32, type_ uint32, op uint32, op2 uint32) uint32 { + return m<<31 | s<<29 | 0x1F<<24 | type_<<22 | op<<21 | op2<<15 +} + +func FPCVTI(sf uint32, s uint32, type_ uint32, rmode uint32, op uint32) uint32 { + return sf<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | rmode<<19 | op<<16 | 0<<10 +} + +func ADR(p uint32, o uint32, rt uint32) uint32 { + return p<<31 | (o&3)<<29 | 0x10<<24 | ((o>>2)&0x7FFFF)<<5 | rt&31 +} + +func OPBIT(x uint32) uint32 { + return 1<<30 | 0<<29 | 0xD6<<21 | 0<<16 | x<<10 +} + +func MOVCONST(d int64, s int, rt int) uint32 { + return uint32(((d>>uint(s*16))&0xFFFF)<<5) | uint32(s)&3<<21 | uint32(rt&31) +} + +const ( + // Optab.flag + LFROM = 1 << iota // p.From uses constant pool + LFROM128 // p.From3<<64+p.From forms a 128-bit constant in literal pool + LTO // p.To uses constant pool + NOTUSETMP // p expands to multiple instructions, but does NOT use REGTMP + BRANCH14BITS // branch instruction encodes 14 bits + BRANCH19BITS // branch instruction encodes 19 bits +) + +var optab = []Optab{ + /* struct Optab: + OPCODE, from, prog->reg, from3, to, type,size,param,flag,scond */ + {obj.ATEXT, C_ADDR, C_NONE, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0}, + + /* arithmetic operations */ + {AADD, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AADD, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AADC, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AADC, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {ANEG, C_ZREG, C_NONE, C_NONE, C_ZREG, 25, 4, 0, 0, 0}, + {ANEG, C_NONE, C_NONE, C_NONE, C_ZREG, 25, 4, 0, 0, 0}, + {ANGC, C_ZREG, C_NONE, C_NONE, C_ZREG, 17, 4, 0, 0, 0}, + {ACMP, C_ZREG, C_ZREG, C_NONE, C_NONE, 1, 4, 0, 0, 0}, + {AADD, C_ADDCON, C_RSP, C_NONE, C_RSP, 2, 4, 0, 0, 0}, + {AADD, C_ADDCON, C_NONE, C_NONE, C_RSP, 2, 4, 0, 0, 0}, + {ACMP, C_ADDCON, C_RSP, C_NONE, C_NONE, 2, 4, 0, 0, 0}, + {AADD, C_MOVCON, C_RSP, C_NONE, C_RSP, 62, 8, 0, 0, 0}, + {AADD, C_MOVCON, C_NONE, C_NONE, C_RSP, 62, 8, 0, 0, 0}, + {ACMP, C_MOVCON, C_RSP, C_NONE, C_NONE, 62, 8, 0, 0, 0}, + {AADD, C_BITCON, C_RSP, C_NONE, C_RSP, 62, 8, 0, 0, 0}, + {AADD, C_BITCON, C_NONE, C_NONE, C_RSP, 62, 8, 0, 0, 0}, + {ACMP, C_BITCON, C_RSP, C_NONE, C_NONE, 62, 8, 0, 0, 0}, + {AADD, C_ADDCON2, C_RSP, C_NONE, C_RSP, 48, 8, 0, NOTUSETMP, 0}, + {AADD, C_ADDCON2, C_NONE, C_NONE, C_RSP, 48, 8, 0, NOTUSETMP, 0}, + {AADD, C_MOVCON2, C_RSP, C_NONE, C_RSP, 13, 12, 0, 0, 0}, + {AADD, C_MOVCON2, C_NONE, C_NONE, C_RSP, 13, 12, 0, 0, 0}, + {AADD, C_MOVCON3, C_RSP, C_NONE, C_RSP, 13, 16, 0, 0, 0}, + {AADD, C_MOVCON3, C_NONE, C_NONE, C_RSP, 13, 16, 0, 0, 0}, + {AADD, C_VCON, C_RSP, C_NONE, C_RSP, 13, 20, 0, 0, 0}, + {AADD, C_VCON, C_NONE, C_NONE, C_RSP, 13, 20, 0, 0, 0}, + {ACMP, C_MOVCON2, C_ZREG, C_NONE, C_NONE, 13, 12, 0, 0, 0}, + {ACMP, C_MOVCON3, C_ZREG, C_NONE, C_NONE, 13, 16, 0, 0, 0}, + {ACMP, C_VCON, C_ZREG, C_NONE, C_NONE, 13, 20, 0, 0, 0}, + {AADD, C_SHIFT, C_ZREG, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AADD, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AMVN, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {ACMP, C_SHIFT, C_ZREG, C_NONE, C_NONE, 3, 4, 0, 0, 0}, + {ANEG, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AADD, C_ZREG, C_RSP, C_NONE, C_RSP, 27, 4, 0, 0, 0}, + {AADD, C_ZREG, C_NONE, C_NONE, C_RSP, 27, 4, 0, 0, 0}, + {ACMP, C_ZREG, C_RSP, C_NONE, C_NONE, 27, 4, 0, 0, 0}, + {AADD, C_EXTREG, C_RSP, C_NONE, C_RSP, 27, 4, 0, 0, 0}, + {AADD, C_EXTREG, C_NONE, C_NONE, C_RSP, 27, 4, 0, 0, 0}, + {ACMP, C_EXTREG, C_RSP, C_NONE, C_NONE, 27, 4, 0, 0, 0}, + {AADD, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AADD, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AMUL, C_ZREG, C_ZREG, C_NONE, C_ZREG, 15, 4, 0, 0, 0}, + {AMUL, C_ZREG, C_NONE, C_NONE, C_ZREG, 15, 4, 0, 0, 0}, + {AMADD, C_ZREG, C_ZREG, C_ZREG, C_ZREG, 15, 4, 0, 0, 0}, + {AREM, C_ZREG, C_ZREG, C_NONE, C_ZREG, 16, 8, 0, 0, 0}, + {AREM, C_ZREG, C_NONE, C_NONE, C_ZREG, 16, 8, 0, 0, 0}, + {ASDIV, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {ASDIV, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + + {AFADDS, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFADDS, C_FREG, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMSUBD, C_FREG, C_FREG, C_FREG, C_FREG, 15, 4, 0, 0, 0}, + {AFCMPS, C_FREG, C_FREG, C_NONE, C_NONE, 56, 4, 0, 0, 0}, + {AFCMPS, C_FCON, C_FREG, C_NONE, C_NONE, 56, 4, 0, 0, 0}, + {AVADDP, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVADD, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVADD, C_VREG, C_VREG, C_NONE, C_VREG, 89, 4, 0, 0, 0}, + {AVADD, C_VREG, C_NONE, C_NONE, C_VREG, 89, 4, 0, 0, 0}, + {AVADDV, C_ARNG, C_NONE, C_NONE, C_VREG, 85, 4, 0, 0, 0}, + + /* logical operations */ + {AAND, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AAND, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AANDS, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AANDS, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {ATST, C_ZREG, C_ZREG, C_NONE, C_NONE, 1, 4, 0, 0, 0}, + {AAND, C_MBCON, C_ZREG, C_NONE, C_RSP, 53, 4, 0, 0, 0}, + {AAND, C_MBCON, C_NONE, C_NONE, C_RSP, 53, 4, 0, 0, 0}, + {AANDS, C_MBCON, C_ZREG, C_NONE, C_ZREG, 53, 4, 0, 0, 0}, + {AANDS, C_MBCON, C_NONE, C_NONE, C_ZREG, 53, 4, 0, 0, 0}, + {ATST, C_MBCON, C_ZREG, C_NONE, C_NONE, 53, 4, 0, 0, 0}, + {AAND, C_BITCON, C_ZREG, C_NONE, C_RSP, 53, 4, 0, 0, 0}, + {AAND, C_BITCON, C_NONE, C_NONE, C_RSP, 53, 4, 0, 0, 0}, + {AANDS, C_BITCON, C_ZREG, C_NONE, C_ZREG, 53, 4, 0, 0, 0}, + {AANDS, C_BITCON, C_NONE, C_NONE, C_ZREG, 53, 4, 0, 0, 0}, + {ATST, C_BITCON, C_ZREG, C_NONE, C_NONE, 53, 4, 0, 0, 0}, + {AAND, C_MOVCON, C_ZREG, C_NONE, C_ZREG, 62, 8, 0, 0, 0}, + {AAND, C_MOVCON, C_NONE, C_NONE, C_ZREG, 62, 8, 0, 0, 0}, + {AANDS, C_MOVCON, C_ZREG, C_NONE, C_ZREG, 62, 8, 0, 0, 0}, + {AANDS, C_MOVCON, C_NONE, C_NONE, C_ZREG, 62, 8, 0, 0, 0}, + {ATST, C_MOVCON, C_ZREG, C_NONE, C_NONE, 62, 8, 0, 0, 0}, + {AAND, C_MOVCON2, C_ZREG, C_NONE, C_ZREG, 28, 12, 0, 0, 0}, + {AAND, C_MOVCON2, C_NONE, C_NONE, C_ZREG, 28, 12, 0, 0, 0}, + {AAND, C_MOVCON3, C_ZREG, C_NONE, C_ZREG, 28, 16, 0, 0, 0}, + {AAND, C_MOVCON3, C_NONE, C_NONE, C_ZREG, 28, 16, 0, 0, 0}, + {AAND, C_VCON, C_ZREG, C_NONE, C_ZREG, 28, 20, 0, 0, 0}, + {AAND, C_VCON, C_NONE, C_NONE, C_ZREG, 28, 20, 0, 0, 0}, + {AANDS, C_MOVCON2, C_ZREG, C_NONE, C_ZREG, 28, 12, 0, 0, 0}, + {AANDS, C_MOVCON2, C_NONE, C_NONE, C_ZREG, 28, 12, 0, 0, 0}, + {AANDS, C_MOVCON3, C_ZREG, C_NONE, C_ZREG, 28, 16, 0, 0, 0}, + {AANDS, C_MOVCON3, C_NONE, C_NONE, C_ZREG, 28, 16, 0, 0, 0}, + {AANDS, C_VCON, C_ZREG, C_NONE, C_ZREG, 28, 20, 0, 0, 0}, + {AANDS, C_VCON, C_NONE, C_NONE, C_ZREG, 28, 20, 0, 0, 0}, + {ATST, C_MOVCON2, C_ZREG, C_NONE, C_NONE, 28, 12, 0, 0, 0}, + {ATST, C_MOVCON3, C_ZREG, C_NONE, C_NONE, 28, 16, 0, 0, 0}, + {ATST, C_VCON, C_ZREG, C_NONE, C_NONE, 28, 20, 0, 0, 0}, + {AAND, C_SHIFT, C_ZREG, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AAND, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AANDS, C_SHIFT, C_ZREG, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AANDS, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {ATST, C_SHIFT, C_ZREG, C_NONE, C_NONE, 3, 4, 0, 0, 0}, + {AMOVD, C_RSP, C_NONE, C_NONE, C_RSP, 24, 4, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_ZREG, 24, 4, 0, 0, 0}, + {AMVN, C_ZREG, C_NONE, C_NONE, C_ZREG, 24, 4, 0, 0, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_ZREG, 45, 4, 0, 0, 0}, /* also MOVBU */ + {AMOVH, C_ZREG, C_NONE, C_NONE, C_ZREG, 45, 4, 0, 0, 0}, /* also MOVHU */ + {AMOVW, C_ZREG, C_NONE, C_NONE, C_ZREG, 45, 4, 0, 0, 0}, /* also MOVWU */ + /* TODO: MVN C_SHIFT */ + + /* MOVs that become MOVK/MOVN/MOVZ/ADD/SUB/OR */ + {AMOVW, C_MBCON, C_NONE, C_NONE, C_ZREG, 32, 4, 0, 0, 0}, + {AMOVD, C_MBCON, C_NONE, C_NONE, C_ZREG, 32, 4, 0, 0, 0}, + {AMOVW, C_MOVCON, C_NONE, C_NONE, C_ZREG, 32, 4, 0, 0, 0}, + {AMOVD, C_MOVCON, C_NONE, C_NONE, C_ZREG, 32, 4, 0, 0, 0}, + {AMOVW, C_BITCON, C_NONE, C_NONE, C_RSP, 32, 4, 0, 0, 0}, + {AMOVD, C_BITCON, C_NONE, C_NONE, C_RSP, 32, 4, 0, 0, 0}, + {AMOVW, C_MOVCON2, C_NONE, C_NONE, C_ZREG, 12, 8, 0, NOTUSETMP, 0}, + {AMOVD, C_MOVCON2, C_NONE, C_NONE, C_ZREG, 12, 8, 0, NOTUSETMP, 0}, + {AMOVD, C_MOVCON3, C_NONE, C_NONE, C_ZREG, 12, 12, 0, NOTUSETMP, 0}, + {AMOVD, C_VCON, C_NONE, C_NONE, C_ZREG, 12, 16, 0, NOTUSETMP, 0}, + + {AMOVK, C_VCON, C_NONE, C_NONE, C_ZREG, 33, 4, 0, 0, 0}, + {AMOVD, C_AACON, C_NONE, C_NONE, C_RSP, 4, 4, REGFROM, 0, 0}, + {AMOVD, C_AACON2, C_NONE, C_NONE, C_RSP, 4, 8, REGFROM, NOTUSETMP, 0}, + + /* load long effective stack address (load int32 offset and add) */ + {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, + + // Move a large constant to a vector register. + {AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM128, 0}, + {AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + + /* jump operations */ + {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, + {ABL, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, + {AB, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, + {ABL, C_NONE, C_NONE, C_NONE, C_ZREG, 6, 4, 0, 0, 0}, + {ABL, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, + {obj.ARET, C_NONE, C_NONE, C_NONE, C_ZREG, 6, 4, 0, 0, 0}, + {obj.ARET, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, + {ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 7, 4, 0, BRANCH19BITS, 0}, + {ACBZ, C_ZREG, C_NONE, C_NONE, C_SBRA, 39, 4, 0, BRANCH19BITS, 0}, + {ATBZ, C_VCON, C_ZREG, C_NONE, C_SBRA, 40, 4, 0, BRANCH14BITS, 0}, + {AERET, C_NONE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0}, + + // get a PC-relative address + {AADRP, C_SBRA, C_NONE, C_NONE, C_ZREG, 60, 4, 0, 0, 0}, + {AADR, C_SBRA, C_NONE, C_NONE, C_ZREG, 61, 4, 0, 0, 0}, + + {ACLREX, C_NONE, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0}, + {ACLREX, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0}, + {ABFM, C_VCON, C_ZREG, C_VCON, C_ZREG, 42, 4, 0, 0, 0}, + {ABFI, C_VCON, C_ZREG, C_VCON, C_ZREG, 43, 4, 0, 0, 0}, + {AEXTR, C_VCON, C_ZREG, C_ZREG, C_ZREG, 44, 4, 0, 0, 0}, + {ASXTB, C_ZREG, C_NONE, C_NONE, C_ZREG, 45, 4, 0, 0, 0}, + {ACLS, C_ZREG, C_NONE, C_NONE, C_ZREG, 46, 4, 0, 0, 0}, + {ALSL, C_VCON, C_ZREG, C_NONE, C_ZREG, 8, 4, 0, 0, 0}, + {ALSL, C_VCON, C_NONE, C_NONE, C_ZREG, 8, 4, 0, 0, 0}, + {ALSL, C_ZREG, C_NONE, C_NONE, C_ZREG, 9, 4, 0, 0, 0}, + {ALSL, C_ZREG, C_ZREG, C_NONE, C_ZREG, 9, 4, 0, 0, 0}, + {ASVC, C_VCON, C_NONE, C_NONE, C_NONE, 10, 4, 0, 0, 0}, + {ASVC, C_NONE, C_NONE, C_NONE, C_NONE, 10, 4, 0, 0, 0}, + {ADWORD, C_NONE, C_NONE, C_NONE, C_VCON, 11, 8, 0, NOTUSETMP, 0}, + {ADWORD, C_NONE, C_NONE, C_NONE, C_LEXT, 11, 8, 0, NOTUSETMP, 0}, + {ADWORD, C_NONE, C_NONE, C_NONE, C_ADDR, 11, 8, 0, NOTUSETMP, 0}, + {ADWORD, C_NONE, C_NONE, C_NONE, C_LACON, 11, 8, 0, NOTUSETMP, 0}, + {AWORD, C_NONE, C_NONE, C_NONE, C_LCON, 14, 4, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_NONE, C_LEXT, 14, 4, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_NONE, C_ADDR, 14, 4, 0, 0, 0}, + {AMOVW, C_VCONADDR, C_NONE, C_NONE, C_ZREG, 68, 8, 0, NOTUSETMP, 0}, + {AMOVD, C_VCONADDR, C_NONE, C_NONE, C_ZREG, 68, 8, 0, NOTUSETMP, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AMOVB, C_ADDR, C_NONE, C_NONE, C_ZREG, 65, 12, 0, 0, 0}, + {AMOVH, C_ADDR, C_NONE, C_NONE, C_ZREG, 65, 12, 0, 0, 0}, + {AMOVW, C_ADDR, C_NONE, C_NONE, C_ZREG, 65, 12, 0, 0, 0}, + {AMOVD, C_ADDR, C_NONE, C_NONE, C_ZREG, 65, 12, 0, 0, 0}, + {AMOVD, C_GOTADDR, C_NONE, C_NONE, C_ZREG, 71, 8, 0, 0, 0}, + {AMOVD, C_TLS_LE, C_NONE, C_NONE, C_ZREG, 69, 4, 0, 0, 0}, + {AMOVD, C_TLS_IE, C_NONE, C_NONE, C_ZREG, 70, 8, 0, 0, 0}, + + {AFMOVS, C_FREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AFMOVS, C_ADDR, C_NONE, C_NONE, C_FREG, 65, 12, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AFMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, 65, 12, 0, 0, 0}, + {AFMOVS, C_FCON, C_NONE, C_NONE, C_FREG, 55, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMOVD, C_FCON, C_NONE, C_NONE, C_FREG, 55, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMOVS, C_ZREG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_ZREG, 29, 4, 0, 0, 0}, + {AFMOVD, C_ZREG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_ZREG, 29, 4, 0, 0, 0}, + {AFCVTZSD, C_FREG, C_NONE, C_NONE, C_ZREG, 29, 4, 0, 0, 0}, + {ASCVTFD, C_ZREG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFCVTSD, C_FREG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_NONE, C_ZREG, 73, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_NONE, C_ELEM, 92, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_NONE, C_VREG, 80, 4, 0, 0, 0}, + {AVMOV, C_ZREG, C_NONE, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, + {AVMOV, C_ZREG, C_NONE, C_NONE, C_ELEM, 78, 4, 0, 0, 0}, + {AVMOV, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, + {AVDUP, C_ELEM, C_NONE, C_NONE, C_ARNG, 79, 4, 0, 0, 0}, + {AVDUP, C_ELEM, C_NONE, C_NONE, C_VREG, 80, 4, 0, 0, 0}, + {AVDUP, C_ZREG, C_NONE, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, + {AVMOVI, C_ADDCON, C_NONE, C_NONE, C_ARNG, 86, 4, 0, 0, 0}, + {AVFMLA, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVEXT, C_VCON, C_ARNG, C_ARNG, C_ARNG, 94, 4, 0, 0, 0}, + {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0}, + {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0}, + {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, + {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, + {AVUADDW, C_ARNG, C_ARNG, C_NONE, C_ARNG, 105, 4, 0, 0, 0}, + + /* conditional operations */ + {ACSEL, C_COND, C_ZREG, C_ZREG, C_ZREG, 18, 4, 0, 0, 0}, + {ACINC, C_COND, C_ZREG, C_NONE, C_ZREG, 18, 4, 0, 0, 0}, + {ACSET, C_COND, C_NONE, C_NONE, C_ZREG, 18, 4, 0, 0, 0}, + {AFCSELD, C_COND, C_FREG, C_FREG, C_FREG, 18, 4, 0, 0, 0}, + {ACCMN, C_COND, C_ZREG, C_ZREG, C_VCON, 19, 4, 0, 0, 0}, + {ACCMN, C_COND, C_ZREG, C_VCON, C_VCON, 19, 4, 0, 0, 0}, + {AFCCMPS, C_COND, C_FREG, C_FREG, C_VCON, 57, 4, 0, 0, 0}, + + /* scaled 12-bit unsigned displacement store */ + {AMOVB, C_ZREG, C_NONE, C_NONE, C_UAUTO4K, 20, 4, REGSP, 0, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_UOREG4K, 20, 4, 0, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_UAUTO8K, 20, 4, REGSP, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_UOREG8K, 20, 4, 0, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, + + {AFMOVS, C_FREG, C_NONE, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UAUTO64K, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UOREG64K, 20, 4, 0, 0, 0}, + + /* unscaled 9-bit signed displacement store */ + {AMOVB, C_ZREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + + {AFMOVS, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + + /* scaled 12-bit unsigned displacement load */ + {AMOVB, C_UAUTO4K, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVB, C_UOREG4K, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVH, C_UAUTO8K, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVH, C_UOREG8K, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVW, C_UAUTO16K, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVW, C_UOREG16K, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVD, C_UAUTO32K, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVD, C_UOREG32K, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + + {AFMOVS, C_UAUTO16K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVS, C_UOREG16K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVD, C_UAUTO32K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVD, C_UOREG32K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_UAUTO64K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_UOREG64K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + + /* unscaled 9-bit signed displacement load */ + {AMOVB, C_NSAUTO, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVB, C_NSOREG, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVH, C_NSAUTO, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVH, C_NSOREG, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVW, C_NSAUTO, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVW, C_NSOREG, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVD, C_NSAUTO, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVD, C_NSOREG, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + + {AFMOVS, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVS, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVD, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVD, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + + /* long displacement store */ + {AMOVB, C_ZREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + + {AFMOVS, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + + /* long displacement load */ + {AMOVB, C_LAUTO, C_NONE, C_NONE, C_ZREG, 31, 8, REGSP, LFROM, 0}, + {AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, 31, 8, 0, LFROM, 0}, + {AMOVH, C_LAUTO, C_NONE, C_NONE, C_ZREG, 31, 8, REGSP, LFROM, 0}, + {AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, 31, 8, 0, LFROM, 0}, + {AMOVW, C_LAUTO, C_NONE, C_NONE, C_ZREG, 31, 8, REGSP, LFROM, 0}, + {AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, 31, 8, 0, LFROM, 0}, + {AMOVD, C_LAUTO, C_NONE, C_NONE, C_ZREG, 31, 8, REGSP, LFROM, 0}, + {AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, 31, 8, 0, LFROM, 0}, + + {AFMOVS, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + {AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + {AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + + /* pre/post-indexed load (unscaled, signed 9-bit offset) */ + {AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPOST}, + {AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPOST}, + {AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPOST}, + {AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + + {AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPRE}, + {AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPRE}, + {AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPRE}, + {AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + + /* pre/post-indexed store (unscaled, signed 9-bit offset) */ + {AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + + {AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + + /* load with shifted or extended register offset */ + {AMOVD, C_ROFF, C_NONE, C_NONE, C_ZREG, 98, 4, 0, 0, 0}, + {AMOVW, C_ROFF, C_NONE, C_NONE, C_ZREG, 98, 4, 0, 0, 0}, + {AMOVH, C_ROFF, C_NONE, C_NONE, C_ZREG, 98, 4, 0, 0, 0}, + {AMOVB, C_ROFF, C_NONE, C_NONE, C_ZREG, 98, 4, 0, 0, 0}, + {AFMOVS, C_ROFF, C_NONE, C_NONE, C_FREG, 98, 4, 0, 0, 0}, + {AFMOVD, C_ROFF, C_NONE, C_NONE, C_FREG, 98, 4, 0, 0, 0}, + + /* store with extended register offset */ + {AMOVD, C_ZREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + + /* pre/post-indexed/signed-offset load/store register pair + (unscaled, signed 10-bit quad-aligned and long offset). + The pre/post-indexed format only supports OREG cases because + the RSP and pseudo registers are not allowed to be modified + in this way. */ + {AFLDPQ, C_NQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {AFLDPQ, C_PQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {AFLDPQ, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {AFLDPQ, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {AFLDPQ, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0}, + {AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {AFLDPQ, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {AFLDPQ, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {AFLDPQ, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0}, + {AFLDPQ, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0}, + + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQAUTO_16, 67, 4, REGSP, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQAUTO_16, 67, 4, REGSP, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 8, REGSP, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, C_XPRE}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, C_XPOST}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, C_XPRE}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, C_XPOST}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, + + {ALDP, C_NPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {ALDP, C_PPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {ALDP, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {ALDP, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {ALDP, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0}, + {ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {ALDP, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {ALDP, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {ALDP, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0}, + {ALDP, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0}, + + {ASTP, C_PAIR, C_NONE, C_NONE, C_NPAUTO, 67, 4, REGSP, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_PPAUTO, 67, 4, REGSP, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 8, REGSP, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, C_XPRE}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, C_XPOST}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, 67, 4, 0, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, 67, 4, 0, 0, C_XPRE}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, 67, 4, 0, 0, C_XPOST}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, + + // differ from LDP/STP for C_NSAUTO_4/C_PSAUTO_4/C_NSOREG_4/C_PSOREG_4 + {ALDPW, C_NSAUTO_4, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {ALDPW, C_PSAUTO_4, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {ALDPW, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {ALDPW, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {ALDPW, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0}, + {ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {ALDPW, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {ALDPW, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {ALDPW, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0}, + {ALDPW, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0}, + + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NSAUTO_4, 67, 4, REGSP, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_PSAUTO_4, 67, 4, REGSP, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 8, REGSP, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, C_XPRE}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, C_XPOST}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, 67, 4, 0, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, 67, 4, 0, 0, C_XPRE}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, 67, 4, 0, 0, C_XPOST}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, + + {ASWPD, C_ZREG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0}, // RegTo2=C_REG + {ASWPD, C_ZREG, C_NONE, C_NONE, C_ZAUTO, 47, 4, REGSP, 0, 0}, // RegTo2=C_REG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZOREG, 106, 4, 0, 0, 0}, // RegTo2=C_REGREG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZAUTO, 106, 4, REGSP, 0, 0}, // RegTo2=C_REGREG + {ALDAR, C_ZOREG, C_NONE, C_NONE, C_ZREG, 58, 4, 0, 0, 0}, + {ALDXR, C_ZOREG, C_NONE, C_NONE, C_ZREG, 58, 4, 0, 0, 0}, + {ALDAXR, C_ZOREG, C_NONE, C_NONE, C_ZREG, 58, 4, 0, 0, 0}, + {ALDXP, C_ZOREG, C_NONE, C_NONE, C_PAIR, 58, 4, 0, 0, 0}, + {ASTLR, C_ZREG, C_NONE, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, // RegTo2=C_NONE + {ASTXR, C_ZREG, C_NONE, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, // RegTo2=C_REG + {ASTLXR, C_ZREG, C_NONE, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, // RegTo2=C_REG + {ASTXP, C_PAIR, C_NONE, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, + + /* VLD[1-4]/VST[1-4] */ + {AVLD1, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, + {AVLD1, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1R, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, + {AVLD1R, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1R, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, + {AVLD1, C_ROFF, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, + {AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, 0}, + {AVST1, C_LIST, C_NONE, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST1, C_LIST, C_NONE, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVST1, C_LIST, C_NONE, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST2, C_LIST, C_NONE, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST2, C_LIST, C_NONE, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVST2, C_LIST, C_NONE, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST3, C_LIST, C_NONE, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST3, C_LIST, C_NONE, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVST3, C_LIST, C_NONE, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST4, C_LIST, C_NONE, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST4, C_LIST, C_NONE, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVST4, C_LIST, C_NONE, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_NONE, C_LOREG, 96, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_NONE, C_ROFF, 96, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_NONE, C_LOREG, 96, 4, 0, 0, 0}, + + /* special */ + {AMOVD, C_SPR, C_NONE, C_NONE, C_ZREG, 35, 4, 0, 0, 0}, + {AMRS, C_SPR, C_NONE, C_NONE, C_ZREG, 35, 4, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_SPR, 36, 4, 0, 0, 0}, + {AMSR, C_ZREG, C_NONE, C_NONE, C_SPR, 36, 4, 0, 0, 0}, + {AMOVD, C_VCON, C_NONE, C_NONE, C_SPR, 37, 4, 0, 0, 0}, + {AMSR, C_VCON, C_NONE, C_NONE, C_SPR, 37, 4, 0, 0, 0}, + {AMSR, C_VCON, C_NONE, C_NONE, C_SPOP, 37, 4, 0, 0, 0}, + {APRFM, C_UOREG32K, C_NONE, C_NONE, C_SPOP, 91, 4, 0, 0, 0}, + {APRFM, C_UOREG32K, C_NONE, C_NONE, C_LCON, 91, 4, 0, 0, 0}, + {ADMB, C_VCON, C_NONE, C_NONE, C_NONE, 51, 4, 0, 0, 0}, + {AHINT, C_VCON, C_NONE, C_NONE, C_NONE, 52, 4, 0, 0, 0}, + {ASYS, C_VCON, C_NONE, C_NONE, C_NONE, 50, 4, 0, 0, 0}, + {ASYS, C_VCON, C_NONE, C_NONE, C_ZREG, 50, 4, 0, 0, 0}, + {ASYSL, C_VCON, C_NONE, C_NONE, C_ZREG, 50, 4, 0, 0, 0}, + {ATLBI, C_SPOP, C_NONE, C_NONE, C_NONE, 107, 4, 0, 0, 0}, + {ATLBI, C_SPOP, C_NONE, C_NONE, C_ZREG, 107, 4, 0, 0, 0}, + + /* encryption instructions */ + {AAESD, C_VREG, C_NONE, C_NONE, C_VREG, 29, 4, 0, 0, 0}, // for compatibility with old code + {AAESD, C_ARNG, C_NONE, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, // recommend using the new one for better readability + {ASHA1C, C_VREG, C_ZREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, + {ASHA1C, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, + {ASHA1H, C_VREG, C_NONE, C_NONE, C_VREG, 29, 4, 0, 0, 0}, + {ASHA1SU0, C_ARNG, C_ARNG, C_NONE, C_ARNG, 1, 4, 0, 0, 0}, + {ASHA256H, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, + {AVREV32, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, + {AVPMULL, C_ARNG, C_ARNG, C_NONE, C_ARNG, 93, 4, 0, 0, 0}, + {AVEOR3, C_ARNG, C_ARNG, C_ARNG, C_ARNG, 103, 4, 0, 0, 0}, + {AVXAR, C_VCON, C_ARNG, C_ARNG, C_ARNG, 104, 4, 0, 0, 0}, + + {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, + {obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0}, + {obj.AFUNCDATA, C_VCON, C_NONE, C_NONE, C_ADDR, 0, 0, 0, 0, 0}, + {obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_ZREG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ANOP, C_VREG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as AB/ABL + {obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as AB/ABL + {obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // align code + + {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0}, +} + +// Valid pstate field values, and value to use in instruction. +// Doesn't include special registers. +var pstatefield = []struct { + opd SpecialOperand + enc uint32 +}{ + {SPOP_DAIFSet, 3<<16 | 4<<12 | 6<<5}, + {SPOP_DAIFClr, 3<<16 | 4<<12 | 7<<5}, +} + +var prfopfield = map[SpecialOperand]uint32{ + SPOP_PLDL1KEEP: 0, + SPOP_PLDL1STRM: 1, + SPOP_PLDL2KEEP: 2, + SPOP_PLDL2STRM: 3, + SPOP_PLDL3KEEP: 4, + SPOP_PLDL3STRM: 5, + SPOP_PLIL1KEEP: 8, + SPOP_PLIL1STRM: 9, + SPOP_PLIL2KEEP: 10, + SPOP_PLIL2STRM: 11, + SPOP_PLIL3KEEP: 12, + SPOP_PLIL3STRM: 13, + SPOP_PSTL1KEEP: 16, + SPOP_PSTL1STRM: 17, + SPOP_PSTL2KEEP: 18, + SPOP_PSTL2STRM: 19, + SPOP_PSTL3KEEP: 20, + SPOP_PSTL3STRM: 21, +} + +// sysInstFields helps convert SYS alias instructions to SYS instructions. +// For example, the format of TLBI is: TLBI <tlbi_op>{, <Xt>}. +// It's equivalent to: SYS #<op1>, C8, <Cm>, #<op2>{, <Xt>}. +// The field hasOperand2 indicates whether Xt is required. It helps to check +// some combinations that may be undefined, such as TLBI VMALLE1IS, R0. +var sysInstFields = map[SpecialOperand]struct { + op1 uint8 + cn uint8 + cm uint8 + op2 uint8 + hasOperand2 bool +}{ + // TLBI + SPOP_VMALLE1IS: {0, 8, 3, 0, false}, + SPOP_VAE1IS: {0, 8, 3, 1, true}, + SPOP_ASIDE1IS: {0, 8, 3, 2, true}, + SPOP_VAAE1IS: {0, 8, 3, 3, true}, + SPOP_VALE1IS: {0, 8, 3, 5, true}, + SPOP_VAALE1IS: {0, 8, 3, 7, true}, + SPOP_VMALLE1: {0, 8, 7, 0, false}, + SPOP_VAE1: {0, 8, 7, 1, true}, + SPOP_ASIDE1: {0, 8, 7, 2, true}, + SPOP_VAAE1: {0, 8, 7, 3, true}, + SPOP_VALE1: {0, 8, 7, 5, true}, + SPOP_VAALE1: {0, 8, 7, 7, true}, + SPOP_IPAS2E1IS: {4, 8, 0, 1, true}, + SPOP_IPAS2LE1IS: {4, 8, 0, 5, true}, + SPOP_ALLE2IS: {4, 8, 3, 0, false}, + SPOP_VAE2IS: {4, 8, 3, 1, true}, + SPOP_ALLE1IS: {4, 8, 3, 4, false}, + SPOP_VALE2IS: {4, 8, 3, 5, true}, + SPOP_VMALLS12E1IS: {4, 8, 3, 6, false}, + SPOP_IPAS2E1: {4, 8, 4, 1, true}, + SPOP_IPAS2LE1: {4, 8, 4, 5, true}, + SPOP_ALLE2: {4, 8, 7, 0, false}, + SPOP_VAE2: {4, 8, 7, 1, true}, + SPOP_ALLE1: {4, 8, 7, 4, false}, + SPOP_VALE2: {4, 8, 7, 5, true}, + SPOP_VMALLS12E1: {4, 8, 7, 6, false}, + SPOP_ALLE3IS: {6, 8, 3, 0, false}, + SPOP_VAE3IS: {6, 8, 3, 1, true}, + SPOP_VALE3IS: {6, 8, 3, 5, true}, + SPOP_ALLE3: {6, 8, 7, 0, false}, + SPOP_VAE3: {6, 8, 7, 1, true}, + SPOP_VALE3: {6, 8, 7, 5, true}, + SPOP_VMALLE1OS: {0, 8, 1, 0, false}, + SPOP_VAE1OS: {0, 8, 1, 1, true}, + SPOP_ASIDE1OS: {0, 8, 1, 2, true}, + SPOP_VAAE1OS: {0, 8, 1, 3, true}, + SPOP_VALE1OS: {0, 8, 1, 5, true}, + SPOP_VAALE1OS: {0, 8, 1, 7, true}, + SPOP_RVAE1IS: {0, 8, 2, 1, true}, + SPOP_RVAAE1IS: {0, 8, 2, 3, true}, + SPOP_RVALE1IS: {0, 8, 2, 5, true}, + SPOP_RVAALE1IS: {0, 8, 2, 7, true}, + SPOP_RVAE1OS: {0, 8, 5, 1, true}, + SPOP_RVAAE1OS: {0, 8, 5, 3, true}, + SPOP_RVALE1OS: {0, 8, 5, 5, true}, + SPOP_RVAALE1OS: {0, 8, 5, 7, true}, + SPOP_RVAE1: {0, 8, 6, 1, true}, + SPOP_RVAAE1: {0, 8, 6, 3, true}, + SPOP_RVALE1: {0, 8, 6, 5, true}, + SPOP_RVAALE1: {0, 8, 6, 7, true}, + SPOP_RIPAS2E1IS: {4, 8, 0, 2, true}, + SPOP_RIPAS2LE1IS: {4, 8, 0, 6, true}, + SPOP_ALLE2OS: {4, 8, 1, 0, false}, + SPOP_VAE2OS: {4, 8, 1, 1, true}, + SPOP_ALLE1OS: {4, 8, 1, 4, false}, + SPOP_VALE2OS: {4, 8, 1, 5, true}, + SPOP_VMALLS12E1OS: {4, 8, 1, 6, false}, + SPOP_RVAE2IS: {4, 8, 2, 1, true}, + SPOP_RVALE2IS: {4, 8, 2, 5, true}, + SPOP_IPAS2E1OS: {4, 8, 4, 0, true}, + SPOP_RIPAS2E1: {4, 8, 4, 2, true}, + SPOP_RIPAS2E1OS: {4, 8, 4, 3, true}, + SPOP_IPAS2LE1OS: {4, 8, 4, 4, true}, + SPOP_RIPAS2LE1: {4, 8, 4, 6, true}, + SPOP_RIPAS2LE1OS: {4, 8, 4, 7, true}, + SPOP_RVAE2OS: {4, 8, 5, 1, true}, + SPOP_RVALE2OS: {4, 8, 5, 5, true}, + SPOP_RVAE2: {4, 8, 6, 1, true}, + SPOP_RVALE2: {4, 8, 6, 5, true}, + SPOP_ALLE3OS: {6, 8, 1, 0, false}, + SPOP_VAE3OS: {6, 8, 1, 1, true}, + SPOP_VALE3OS: {6, 8, 1, 5, true}, + SPOP_RVAE3IS: {6, 8, 2, 1, true}, + SPOP_RVALE3IS: {6, 8, 2, 5, true}, + SPOP_RVAE3OS: {6, 8, 5, 1, true}, + SPOP_RVALE3OS: {6, 8, 5, 5, true}, + SPOP_RVAE3: {6, 8, 6, 1, true}, + SPOP_RVALE3: {6, 8, 6, 5, true}, + // DC + SPOP_IVAC: {0, 7, 6, 1, true}, + SPOP_ISW: {0, 7, 6, 2, true}, + SPOP_CSW: {0, 7, 10, 2, true}, + SPOP_CISW: {0, 7, 14, 2, true}, + SPOP_ZVA: {3, 7, 4, 1, true}, + SPOP_CVAC: {3, 7, 10, 1, true}, + SPOP_CVAU: {3, 7, 11, 1, true}, + SPOP_CIVAC: {3, 7, 14, 1, true}, + SPOP_IGVAC: {0, 7, 6, 3, true}, + SPOP_IGSW: {0, 7, 6, 4, true}, + SPOP_IGDVAC: {0, 7, 6, 5, true}, + SPOP_IGDSW: {0, 7, 6, 6, true}, + SPOP_CGSW: {0, 7, 10, 4, true}, + SPOP_CGDSW: {0, 7, 10, 6, true}, + SPOP_CIGSW: {0, 7, 14, 4, true}, + SPOP_CIGDSW: {0, 7, 14, 6, true}, + SPOP_GVA: {3, 7, 4, 3, true}, + SPOP_GZVA: {3, 7, 4, 4, true}, + SPOP_CGVAC: {3, 7, 10, 3, true}, + SPOP_CGDVAC: {3, 7, 10, 5, true}, + SPOP_CGVAP: {3, 7, 12, 3, true}, + SPOP_CGDVAP: {3, 7, 12, 5, true}, + SPOP_CGVADP: {3, 7, 13, 3, true}, + SPOP_CGDVADP: {3, 7, 13, 5, true}, + SPOP_CIGVAC: {3, 7, 14, 3, true}, + SPOP_CIGDVAC: {3, 7, 14, 5, true}, + SPOP_CVAP: {3, 7, 12, 1, true}, + SPOP_CVADP: {3, 7, 13, 1, true}, +} + +// Used for padinng NOOP instruction +const OP_NOOP = 0xd503201f + +// align code to a certain length by padding bytes. +func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int { + if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) { + ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue) + } + return int(-pc & (alignedValue - 1)) +} + +// size returns the size of the sequence of machine instructions when p is encoded with o. +// Usually it just returns o.size directly, in some cases it checks whether the optimization +// conditions are met, and if so returns the size of the optimized instruction sequence. +// These optimizations need to be synchronized with the asmout function. +func (o *Optab) size(ctxt *obj.Link, p *obj.Prog) int { + // Optimize adrp+add+ld/st to adrp+ld/st(offset). + sz := movesize(p.As) + if sz != -1 { + // Relocations R_AARCH64_LDST{64,32,16,8}_ABS_LO12_NC can only generate 8-byte, 4-byte, + // 2-byte and 1-byte aligned addresses, so the address of load/store must be aligned. + // Also symbols with prefix of "go:string." are Go strings, which will go into + // the symbol table, their addresses are not necessary aligned, rule this out. + align := int64(1 << sz) + if o.a1 == C_ADDR && p.From.Offset%align == 0 && !strings.HasPrefix(p.From.Sym.Name, "go:string.") || + o.a4 == C_ADDR && p.To.Offset%align == 0 && !strings.HasPrefix(p.To.Sym.Name, "go:string.") { + return 8 + } + } + return int(o.size_) +} + +func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on arm64") + ctxt.Retpoline = false // don't keep printing + } + + p := cursym.Func().Text + if p == nil || p.Link == nil { // handle external functions and ELF section symbols + return + } + + if oprange[AAND&obj.AMask] == nil { + ctxt.Diag("arm64 ops not initialized, call arm64.buildop first") + } + + c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)} + p.To.Offset &= 0xffffffff // extrasize is no longer needed + + bflag := 1 + pc := int64(0) + p.Pc = pc + var m int + var o *Optab + for p = p.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + m = o.size(c.ctxt, p) + if m == 0 { + switch p.As { + case obj.APCALIGN: + alignedValue := p.From.Offset + m = pcAlignPadLength(pc, alignedValue, ctxt) + // Update the current text symbol alignment value. + if int32(alignedValue) > cursym.Func().Align { + cursym.Func().Align = int32(alignedValue) + } + break + case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: + continue + default: + c.ctxt.Diag("zero-width instruction\n%v", p) + } + } + pc += int64(m) + + if o.flag&LFROM != 0 { + c.addpool(p, &p.From) + } + if o.flag&LFROM128 != 0 { + c.addpool128(p, &p.From, p.GetFrom3()) + } + if o.flag<O != 0 { + c.addpool(p, &p.To) + } + if c.blitrl != nil { + c.checkpool(p) + } + } + + c.cursym.Size = pc + + /* + * if any procedure is large enough to + * generate a large SBRA branch, then + * generate extra passes putting branches + * around jmps to fix. this is rare. + */ + for bflag != 0 { + bflag = 0 + pc = 0 + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + + /* very large branches */ + if (o.flag&BRANCH14BITS != 0 || o.flag&BRANCH19BITS != 0) && p.To.Target() != nil { + otxt := p.To.Target().Pc - pc + var toofar bool + if o.flag&BRANCH14BITS != 0 { // branch instruction encodes 14 bits + toofar = otxt <= -(1<<15)+10 || otxt >= (1<<15)-10 + } else if o.flag&BRANCH19BITS != 0 { // branch instruction encodes 19 bits + toofar = otxt <= -(1<<20)+10 || otxt >= (1<<20)-10 + } + if toofar { + q := c.newprog() + q.Link = p.Link + p.Link = q + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.To.Target()) + p.To.SetTarget(q) + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(q.Link.Link) + bflag = 1 + } + } + m = o.size(c.ctxt, p) + + if m == 0 { + switch p.As { + case obj.APCALIGN: + alignedValue := p.From.Offset + m = pcAlignPadLength(pc, alignedValue, ctxt) + break + case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: + continue + default: + c.ctxt.Diag("zero-width instruction\n%v", p) + } + } + + pc += int64(m) + } + } + + pc += -pc & (funcAlign - 1) + c.cursym.Size = pc + + /* + * lay out the code, emitting code and data relocations. + */ + c.cursym.Grow(c.cursym.Size) + bp := c.cursym.P + psz := int32(0) + var i int + var out [6]uint32 + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { + c.pc = p.Pc + o = c.oplook(p) + sz := o.size(c.ctxt, p) + if sz > 4*len(out) { + log.Fatalf("out array in span7 is too small, need at least %d for %v", sz/4, p) + } + if p.As == obj.APCALIGN { + alignedValue := p.From.Offset + v := pcAlignPadLength(p.Pc, alignedValue, c.ctxt) + for i = 0; i < int(v/4); i++ { + // emit ANOOP instruction by the padding size + c.ctxt.Arch.ByteOrder.PutUint32(bp, OP_NOOP) + bp = bp[4:] + psz += 4 + } + } else { + c.asmout(p, o, out[:]) + for i = 0; i < sz/4; i++ { + c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i]) + bp = bp[4:] + psz += 4 + } + } + } + + // Mark nonpreemptible instruction sequences. + // We use REGTMP as a scratch register during call injection, + // so instruction sequences that use REGTMP are unsafe to + // preempt asynchronously. + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) + + // Now that we know byte offsets, we can generate jump table entries. + for _, jt := range cursym.Func().JumpTables { + for i, p := range jt.Targets { + // The ith jumptable entry points to the p.Pc'th + // byte in the function symbol s. + // TODO: try using relative PCs. + jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc) + } + } +} + +// isUnsafePoint returns whether p is an unsafe point. +func (c *ctxt7) isUnsafePoint(p *obj.Prog) bool { + // If p explicitly uses REGTMP, it's unsafe to preempt, because the + // preemption sequence clobbers REGTMP. + return p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP || + p.From.Type == obj.TYPE_REGREG && p.From.Offset == REGTMP || + p.To.Type == obj.TYPE_REGREG && p.To.Offset == REGTMP +} + +// isRestartable returns whether p is a multi-instruction sequence that, +// if preempted, can be restarted. +func (c *ctxt7) isRestartable(p *obj.Prog) bool { + if c.isUnsafePoint(p) { + return false + } + // If p is a multi-instruction sequence with uses REGTMP inserted by + // the assembler in order to materialize a large constant/offset, we + // can restart p (at the start of the instruction sequence), recompute + // the content of REGTMP, upon async preemption. Currently, all cases + // of assembler-inserted REGTMP fall into this category. + // If p doesn't use REGTMP, it can be simply preempted, so we don't + // mark it. + o := c.oplook(p) + return o.size(c.ctxt, p) > 4 && o.flag&NOTUSETMP == 0 +} + +/* + * when the first reference to the literal pool threatens + * to go out of range of a 1Mb PC-relative offset + * drop the pool now. + */ +func (c *ctxt7) checkpool(p *obj.Prog) { + // If the pool is going to go out of range or p is the last instruction of the function, + // flush the pool. + if c.pool.size >= 0xffff0 || !ispcdisp(int32(p.Pc+4+int64(c.pool.size)-int64(c.pool.start)+8)) || p.Link == nil { + c.flushpool(p) + } +} + +func (c *ctxt7) flushpool(p *obj.Prog) { + // Needs to insert a branch before flushing the pool. + // We don't need the jump if following an unconditional branch. + // TODO: other unconditional operations. + if !(p.As == AB || p.As == obj.ARET || p.As == AERET) { + if c.ctxt.Debugvlog { + fmt.Printf("note: flush literal pool at %#x: len=%d ref=%x\n", uint64(p.Pc+4), c.pool.size, c.pool.start) + } + q := c.newprog() + if p.Link == nil { + // If p is the last instruction of the function, insert an UNDEF instruction in case the + // exection fall through to the pool. + q.As = obj.AUNDEF + } else { + // Else insert a branch to the next instruction of p. + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.Link) + } + q.Link = c.blitrl + q.Pos = p.Pos + c.blitrl = q + } + + // The line number for constant pool entries doesn't really matter. + // We set it to the line number of the preceding instruction so that + // there are no deltas to encode in the pc-line tables. + for q := c.blitrl; q != nil; q = q.Link { + q.Pos = p.Pos + } + + c.elitrl.Link = p.Link + p.Link = c.blitrl + + c.blitrl = nil /* BUG: should refer back to values until out-of-range */ + c.elitrl = nil + c.pool.size = 0 + c.pool.start = 0 +} + +// addpool128 adds a 128-bit constant to literal pool by two consecutive DWORD +// instructions, the 128-bit constant is formed by ah.Offset<<64+al.Offset. +func (c *ctxt7) addpool128(p *obj.Prog, al, ah *obj.Addr) { + q := c.newprog() + q.As = ADWORD + q.To.Type = obj.TYPE_CONST + q.To.Offset = al.Offset // q.Pc is lower than t.Pc, so al.Offset is stored in q. + + t := c.newprog() + t.As = ADWORD + t.To.Type = obj.TYPE_CONST + t.To.Offset = ah.Offset + + q.Link = t + + if c.blitrl == nil { + c.blitrl = q + c.pool.start = uint32(p.Pc) + } else { + c.elitrl.Link = q + } + + c.elitrl = t + c.pool.size = roundUp(c.pool.size, 16) + c.pool.size += 16 + p.Pool = q +} + +/* + * MOVD foo(SB), R is actually + * MOVD addr, REGTMP + * MOVD REGTMP, R + * where addr is the address of the DWORD containing the address of foo. + * + * TODO: hash + */ +func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { + cls := c.aclass(a) + lit := c.instoffset + t := c.newprog() + t.As = AWORD + sz := 4 + + if a.Type == obj.TYPE_CONST { + if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD { + // out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit. + t.As = ADWORD + sz = 8 + } // else store 32-bit + } else if p.As == AMOVD && a.Type != obj.TYPE_MEM || cls == C_ADDR || cls == C_VCON || lit != int64(int32(lit)) || uint64(lit) != uint64(uint32(lit)) { + // conservative: don't know if we want signed or unsigned extension. + // in case of ambiguity, store 64-bit + t.As = ADWORD + sz = 8 + } + + t.To.Type = obj.TYPE_CONST + t.To.Offset = lit + + for q := c.blitrl; q != nil; q = q.Link { /* could hash on t.t0.offset */ + if q.To == t.To { + p.Pool = q + return + } + } + + if c.blitrl == nil { + c.blitrl = t + c.pool.start = uint32(p.Pc) + } else { + c.elitrl.Link = t + } + c.elitrl = t + if t.As == ADWORD { + // make DWORD 8-byte aligned, this is not required by ISA, + // just to avoid performance penalties when loading from + // the constant pool across a cache line. + c.pool.size = roundUp(c.pool.size, 8) + } + c.pool.size += uint32(sz) + p.Pool = t +} + +// roundUp rounds up x to "to". +func roundUp(x, to uint32) uint32 { + if to == 0 || to&(to-1) != 0 { + log.Fatalf("rounded up to a value that is not a power of 2: %d\n", to) + } + return (x + to - 1) &^ (to - 1) +} + +func (c *ctxt7) regoff(a *obj.Addr) uint32 { + c.instoffset = 0 + c.aclass(a) + return uint32(c.instoffset) +} + +func isSTLXRop(op obj.As) bool { + switch op { + case ASTLXR, ASTLXRW, ASTLXRB, ASTLXRH, + ASTXR, ASTXRW, ASTXRB, ASTXRH: + return true + } + return false +} + +func isSTXPop(op obj.As) bool { + switch op { + case ASTXP, ASTLXP, ASTXPW, ASTLXPW: + return true + } + return false +} + +func isANDop(op obj.As) bool { + switch op { + case AAND, AORR, AEOR, AANDS, ATST, + ABIC, AEON, AORN, ABICS: + return true + } + return false +} + +func isANDWop(op obj.As) bool { + switch op { + case AANDW, AORRW, AEORW, AANDSW, ATSTW, + ABICW, AEONW, AORNW, ABICSW: + return true + } + return false +} + +func isADDop(op obj.As) bool { + switch op { + case AADD, AADDS, ASUB, ASUBS, ACMN, ACMP: + return true + } + return false +} + +func isADDWop(op obj.As) bool { + switch op { + case AADDW, AADDSW, ASUBW, ASUBSW, ACMNW, ACMPW: + return true + } + return false +} + +func isADDSop(op obj.As) bool { + switch op { + case AADDS, AADDSW, ASUBS, ASUBSW: + return true + } + return false +} + +func isNEGop(op obj.As) bool { + switch op { + case ANEG, ANEGW, ANEGS, ANEGSW: + return true + } + return false +} + +func isRegShiftOrExt(a *obj.Addr) bool { + return (a.Index-obj.RBaseARM64)®_EXT != 0 || (a.Index-obj.RBaseARM64)®_LSL != 0 +} + +// Maximum PC-relative displacement. +// The actual limit is ±2²⁰, but we are conservative +// to avoid needing to recompute the literal pool flush points +// as span-dependent jumps are enlarged. +const maxPCDisp = 512 * 1024 + +// ispcdisp reports whether v is a valid PC-relative displacement. +func ispcdisp(v int32) bool { + return -maxPCDisp < v && v < maxPCDisp && v&3 == 0 +} + +func isaddcon(v int64) bool { + /* uimm12 or uimm24? */ + if v < 0 { + return false + } + if (v & 0xFFF) == 0 { + v >>= 12 + } + return v <= 0xFFF +} + +func isaddcon2(v int64) bool { + return 0 <= v && v <= 0xFFFFFF +} + +// isbitcon reports whether a constant can be encoded into a logical instruction. +// bitcon has a binary form of repetition of a bit sequence of length 2, 4, 8, 16, 32, or 64, +// which itself is a rotate (w.r.t. the length of the unit) of a sequence of ones. +// special cases: 0 and -1 are not bitcon. +// this function needs to run against virtually all the constants, so it needs to be fast. +// for this reason, bitcon testing and bitcon encoding are separate functions. +func isbitcon(x uint64) bool { + if x == 1<<64-1 || x == 0 { + return false + } + // determine the period and sign-extend a unit to 64 bits + switch { + case x != x>>32|x<<32: + // period is 64 + // nothing to do + case x != x>>16|x<<48: + // period is 32 + x = uint64(int64(int32(x))) + case x != x>>8|x<<56: + // period is 16 + x = uint64(int64(int16(x))) + case x != x>>4|x<<60: + // period is 8 + x = uint64(int64(int8(x))) + default: + // period is 4 or 2, always true + // 0001, 0010, 0100, 1000 -- 0001 rotate + // 0011, 0110, 1100, 1001 -- 0011 rotate + // 0111, 1011, 1101, 1110 -- 0111 rotate + // 0101, 1010 -- 01 rotate, repeat + return true + } + return sequenceOfOnes(x) || sequenceOfOnes(^x) +} + +// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros. +func sequenceOfOnes(x uint64) bool { + y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2 + y += x + return (y-1)&y == 0 +} + +// bitconEncode returns the encoding of a bitcon used in logical instructions +// x is known to be a bitcon +// a bitcon is a sequence of n ones at low bits (i.e. 1<<n-1), right rotated +// by R bits, and repeated with period of 64, 32, 16, 8, 4, or 2. +// it is encoded in logical instructions with 3 bitfields +// N (1 bit) : R (6 bits) : S (6 bits), where +// N=1 -- period=64 +// N=0, S=0xxxxx -- period=32 +// N=0, S=10xxxx -- period=16 +// N=0, S=110xxx -- period=8 +// N=0, S=1110xx -- period=4 +// N=0, S=11110x -- period=2 +// R is the shift amount, low bits of S = n-1 +func bitconEncode(x uint64, mode int) uint32 { + if mode == 32 { + x &= 0xffffffff + x = x<<32 | x + } + var period uint32 + // determine the period and sign-extend a unit to 64 bits + switch { + case x != x>>32|x<<32: + period = 64 + case x != x>>16|x<<48: + period = 32 + x = uint64(int64(int32(x))) + case x != x>>8|x<<56: + period = 16 + x = uint64(int64(int16(x))) + case x != x>>4|x<<60: + period = 8 + x = uint64(int64(int8(x))) + case x != x>>2|x<<62: + period = 4 + x = uint64(int64(x<<60) >> 60) + default: + period = 2 + x = uint64(int64(x<<62) >> 62) + } + neg := false + if int64(x) < 0 { + x = ^x + neg = true + } + y := x & -x // lowest set bit of x. + s := log2(y) + n := log2(x+y) - s // x (or ^x) is a sequence of n ones left shifted by s bits + if neg { + // ^x is a sequence of n ones left shifted by s bits + // adjust n, s for x + s = n + s + n = period - n + } + + N := uint32(0) + if mode == 64 && period == 64 { + N = 1 + } + R := (period - s) & (period - 1) & uint32(mode-1) // shift amount of right rotate + S := (n - 1) | 63&^(period<<1-1) // low bits = #ones - 1, high bits encodes period + return N<<22 | R<<16 | S<<10 +} + +func log2(x uint64) uint32 { + if x == 0 { + panic("log2 of 0") + } + n := uint32(0) + if x >= 1<<32 { + x >>= 32 + n += 32 + } + if x >= 1<<16 { + x >>= 16 + n += 16 + } + if x >= 1<<8 { + x >>= 8 + n += 8 + } + if x >= 1<<4 { + x >>= 4 + n += 4 + } + if x >= 1<<2 { + x >>= 2 + n += 2 + } + if x >= 1<<1 { + x >>= 1 + n += 1 + } + return n +} + +func autoclass(l int64) int { + if l == 0 { + return C_ZAUTO + } + + if l < 0 { + if l >= -256 && (l&15) == 0 { + return C_NSAUTO_16 + } + if l >= -256 && (l&7) == 0 { + return C_NSAUTO_8 + } + if l >= -256 && (l&3) == 0 { + return C_NSAUTO_4 + } + if l >= -256 { + return C_NSAUTO + } + if l >= -512 && (l&15) == 0 { + return C_NPAUTO_16 + } + if l >= -512 && (l&7) == 0 { + return C_NPAUTO + } + if l >= -1024 && (l&15) == 0 { + return C_NQAUTO_16 + } + if l >= -4095 { + return C_NAUTO4K + } + return C_LAUTO + } + + if l <= 255 { + if (l & 15) == 0 { + return C_PSAUTO_16 + } + if (l & 7) == 0 { + return C_PSAUTO_8 + } + if (l & 3) == 0 { + return C_PSAUTO_4 + } + return C_PSAUTO + } + if l <= 504 { + if l&15 == 0 { + return C_PPAUTO_16 + } + if l&7 == 0 { + return C_PPAUTO + } + } + if l <= 1008 { + if l&15 == 0 { + return C_PQAUTO_16 + } + } + if l <= 4095 { + if l&15 == 0 { + return C_UAUTO4K_16 + } + if l&7 == 0 { + return C_UAUTO4K_8 + } + if l&3 == 0 { + return C_UAUTO4K_4 + } + if l&1 == 0 { + return C_UAUTO4K_2 + } + return C_UAUTO4K + } + if l <= 8190 { + if l&15 == 0 { + return C_UAUTO8K_16 + } + if l&7 == 0 { + return C_UAUTO8K_8 + } + if l&3 == 0 { + return C_UAUTO8K_4 + } + if l&1 == 0 { + return C_UAUTO8K + } + } + if l <= 16380 { + if l&15 == 0 { + return C_UAUTO16K_16 + } + if l&7 == 0 { + return C_UAUTO16K_8 + } + if l&3 == 0 { + return C_UAUTO16K + } + } + if l <= 32760 { + if l&15 == 0 { + return C_UAUTO32K_16 + } + if l&7 == 0 { + return C_UAUTO32K + } + } + if l <= 65520 && (l&15) == 0 { + return C_UAUTO64K + } + return C_LAUTO +} + +func oregclass(l int64) int { + return autoclass(l) - C_ZAUTO + C_ZOREG +} + +/* + * given an offset v and a class c (see above) + * return the offset value to use in the instruction, + * scaled if necessary + */ +func (c *ctxt7) offsetshift(p *obj.Prog, v int64, cls int) int64 { + s := 0 + if cls >= C_SEXT1 && cls <= C_SEXT16 { + s = cls - C_SEXT1 + } else { + switch cls { + case C_UAUTO4K, C_UOREG4K, C_ZOREG: + s = 0 + case C_UAUTO8K, C_UOREG8K: + s = 1 + case C_UAUTO16K, C_UOREG16K: + s = 2 + case C_UAUTO32K, C_UOREG32K: + s = 3 + case C_UAUTO64K, C_UOREG64K: + s = 4 + default: + c.ctxt.Diag("bad class: %v\n%v", DRconv(cls), p) + } + } + vs := v >> uint(s) + if vs<<uint(s) != v { + c.ctxt.Diag("odd offset: %d\n%v", v, p) + } + return vs +} + +/* + * if v contains a single 16-bit value aligned + * on a 16-bit field, and thus suitable for movk/movn, + * return the field index 0 to 3; otherwise return -1. + */ +func movcon(v int64) int { + for s := 0; s < 64; s += 16 { + if (uint64(v) &^ (uint64(0xFFFF) << uint(s))) == 0 { + return s / 16 + } + } + return -1 +} + +func rclass(r int16) int { + switch { + case REG_R0 <= r && r <= REG_R30: // not 31 + return C_REG + case r == REGZERO: + return C_ZREG + case REG_F0 <= r && r <= REG_F31: + return C_FREG + case REG_V0 <= r && r <= REG_V31: + return C_VREG + case r == REGSP: + return C_RSP + case r >= REG_ARNG && r < REG_ELEM: + return C_ARNG + case r >= REG_ELEM && r < REG_ELEM_END: + return C_ELEM + case r >= REG_UXTB && r < REG_SPECIAL, + r >= REG_LSL && r < REG_ARNG: + return C_EXTREG + case r >= REG_SPECIAL: + return C_SPR + } + return C_GOK +} + +// con32class reclassifies the constant of 32-bit instruction. Because the constant type is 32-bit, +// but saved in Offset which type is int64, con32class treats it as uint32 type and reclassifies it. +func (c *ctxt7) con32class(a *obj.Addr) int { + v := uint32(a.Offset) + // For 32-bit instruction with constant, rewrite + // the high 32-bit to be a repetition of the low + // 32-bit, so that the BITCON test can be shared + // for both 32-bit and 64-bit. 32-bit ops will + // zero the high 32-bit of the destination register + // anyway. + vbitcon := uint64(v)<<32 | uint64(v) + if v == 0 { + return C_ZCON + } + if isaddcon(int64(v)) { + if v <= 0xFFF { + if isbitcon(vbitcon) { + return C_ABCON0 + } + return C_ADDCON0 + } + if isbitcon(vbitcon) { + return C_ABCON + } + if movcon(int64(v)) >= 0 { + return C_AMCON + } + if movcon(int64(^v)) >= 0 { + return C_AMCON + } + return C_ADDCON + } + + t := movcon(int64(v)) + if t >= 0 { + if isbitcon(vbitcon) { + return C_MBCON + } + return C_MOVCON + } + + t = movcon(int64(^v)) + if t >= 0 { + if isbitcon(vbitcon) { + return C_MBCON + } + return C_MOVCON + } + + if isbitcon(vbitcon) { + return C_BITCON + } + + if 0 <= v && v <= 0xffffff { + return C_ADDCON2 + } + return C_LCON +} + +// con64class reclassifies the constant of C_VCON and C_LCON class. +func (c *ctxt7) con64class(a *obj.Addr) int { + zeroCount := 0 + negCount := 0 + for i := uint(0); i < 4; i++ { + immh := uint32(a.Offset >> (i * 16) & 0xffff) + if immh == 0 { + zeroCount++ + } else if immh == 0xffff { + negCount++ + } + } + if zeroCount >= 3 || negCount >= 3 { + return C_MOVCON + } else if zeroCount == 2 || negCount == 2 { + return C_MOVCON2 + } else if zeroCount == 1 || negCount == 1 { + return C_MOVCON3 + } else { + return C_VCON + } +} + +func (c *ctxt7) aclass(a *obj.Addr) int { + switch a.Type { + case obj.TYPE_NONE: + return C_NONE + + case obj.TYPE_REG: + return rclass(a.Reg) + + case obj.TYPE_REGREG: + return C_PAIR + + case obj.TYPE_SHIFT: + return C_SHIFT + + case obj.TYPE_REGLIST: + return C_LIST + + case obj.TYPE_MEM: + // The base register should be an integer register. + if int16(REG_F0) <= a.Reg && a.Reg <= int16(REG_V31) { + break + } + switch a.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + if a.Sym == nil { + break + } + c.instoffset = a.Offset + if a.Sym != nil { // use relocation + if a.Sym.Type == objabi.STLSBSS { + if c.ctxt.Flag_shared { + return C_TLS_IE + } else { + return C_TLS_LE + } + } + return C_ADDR + } + return C_LEXT + + case obj.NAME_GOTREF: + return C_GOTADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) + return autoclass(c.instoffset) + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + 8 + return autoclass(c.instoffset) + + case obj.NAME_NONE: + if a.Index != 0 { + if a.Offset != 0 { + if isRegShiftOrExt(a) { + // extended or shifted register offset, (Rn)(Rm.UXTW<<2) or (Rn)(Rm<<2). + return C_ROFF + } + return C_GOK + } + // register offset, (Rn)(Rm) + return C_ROFF + } + c.instoffset = a.Offset + return oregclass(c.instoffset) + } + return C_GOK + + case obj.TYPE_FCONST: + return C_FCON + + case obj.TYPE_TEXTSIZE: + return C_TEXTSIZE + + case obj.TYPE_CONST, obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Reg != 0 && a.Reg != REGZERO { + break + } + v := c.instoffset + if v == 0 { + return C_ZCON + } + if isaddcon(v) { + if v <= 0xFFF { + if isbitcon(uint64(v)) { + return C_ABCON0 + } + return C_ADDCON0 + } + if isbitcon(uint64(v)) { + return C_ABCON + } + if movcon(v) >= 0 { + return C_AMCON + } + if movcon(^v) >= 0 { + return C_AMCON + } + return C_ADDCON + } + + t := movcon(v) + if t >= 0 { + if isbitcon(uint64(v)) { + return C_MBCON + } + return C_MOVCON + } + + t = movcon(^v) + if t >= 0 { + if isbitcon(uint64(v)) { + return C_MBCON + } + return C_MOVCON + } + + if isbitcon(uint64(v)) { + return C_BITCON + } + + if 0 <= v && v <= 0xffffff { + return C_ADDCON2 + } + + if uint64(v) == uint64(uint32(v)) || v == int64(int32(v)) { + return C_LCON + } + return C_VCON + + case obj.NAME_EXTERN, obj.NAME_STATIC: + if a.Sym == nil { + return C_GOK + } + if a.Sym.Type == objabi.STLSBSS { + c.ctxt.Diag("taking address of TLS variable is not supported") + } + c.instoffset = a.Offset + return C_VCONADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + 8 + default: + return C_GOK + } + cf := c.instoffset + if isaddcon(cf) || isaddcon(-cf) { + return C_AACON + } + if isaddcon2(cf) { + return C_AACON2 + } + + return C_LACON + + case obj.TYPE_BRANCH: + return C_SBRA + + case obj.TYPE_SPECIAL: + opd := SpecialOperand(a.Offset) + if SPOP_EQ <= opd && opd <= SPOP_NV { + return C_COND + } + return C_SPOP + } + return C_GOK +} + +func oclass(a *obj.Addr) int { + return int(a.Class) - 1 +} + +func (c *ctxt7) oplook(p *obj.Prog) *Optab { + a1 := int(p.Optab) + if a1 != 0 { + return &optab[a1-1] + } + a1 = int(p.From.Class) + if a1 == 0 { + a0 := c.aclass(&p.From) + // do not break C_ADDCON2 when S bit is set + if (p.As == AADDS || p.As == AADDSW || p.As == ASUBS || p.As == ASUBSW) && a0 == C_ADDCON2 { + a0 = C_LCON + } + a1 = a0 + 1 + p.From.Class = int8(a1) + if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE { + if p.As == AMOVW || isADDWop(p.As) || isANDWop(p.As) { + // For 32-bit instruction with constant, we need to + // treat its offset value as 32 bits to classify it. + ra0 := c.con32class(&p.From) + // do not break C_ADDCON2 when S bit is set + if (p.As == AADDSW || p.As == ASUBSW) && ra0 == C_ADDCON2 { + ra0 = C_LCON + } + a1 = ra0 + 1 + p.From.Class = int8(a1) + } + if ((p.As == AMOVD) || isANDop(p.As) || isADDop(p.As)) && (a0 == C_LCON || a0 == C_VCON) { + // more specific classification of 64-bit integers + a1 = c.con64class(&p.From) + 1 + p.From.Class = int8(a1) + } + } + } + + a1-- + a3 := C_NONE + 1 + if p.GetFrom3() != nil && p.RestArgs[0].Pos == 0 { + a3 = int(p.GetFrom3().Class) + if a3 == 0 { + a3 = c.aclass(p.GetFrom3()) + 1 + p.GetFrom3().Class = int8(a3) + } + } + + a3-- + a4 := int(p.To.Class) + if a4 == 0 { + a4 = c.aclass(&p.To) + 1 + p.To.Class = int8(a4) + } + + a4-- + a2 := C_NONE + if p.Reg != 0 { + a2 = rclass(p.Reg) + } + + if false { + fmt.Printf("oplook %v %d %d %d %d\n", p.As, a1, a2, a3, a4) + fmt.Printf("\t\t%d %d\n", p.From.Type, p.To.Type) + } + + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] + c2 := &xcmp[a2] + c3 := &xcmp[a3] + c4 := &xcmp[a4] + c5 := &xcmp[p.Scond>>5] + for i := range ops { + op := &ops[i] + if (int(op.a2) == a2 || c2[op.a2]) && c5[op.scond>>5] && c1[op.a1] && c3[op.a3] && c4[op.a4] { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } + } + + c.ctxt.Diag("illegal combination: %v %v %v %v %v, %d %d", p, DRconv(a1), DRconv(a2), DRconv(a3), DRconv(a4), p.From.Type, p.To.Type) + // Turn illegal instruction into an UNDEF, avoid crashing in asmout + return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0} +} + +func cmp(a int, b int) bool { + if a == b { + return true + } + switch a { + case C_RSP: + if b == C_REG { + return true + } + + case C_ZREG: + if b == C_REG { + return true + } + + case C_ADDCON0: + if b == C_ZCON || b == C_ABCON0 { + return true + } + + case C_ADDCON: + if b == C_ZCON || b == C_ABCON0 || b == C_ADDCON0 || b == C_ABCON || b == C_AMCON { + return true + } + + case C_MBCON: + if b == C_ABCON0 { + return true + } + + case C_BITCON: + if b == C_ABCON0 || b == C_ABCON || b == C_MBCON { + return true + } + + case C_MOVCON: + if b == C_MBCON || b == C_ZCON || b == C_ADDCON0 || b == C_ABCON0 || b == C_AMCON { + return true + } + + case C_ADDCON2: + if b == C_ZCON || b == C_ADDCON || b == C_ADDCON0 { + return true + } + + case C_LCON: + if b == C_ZCON || b == C_BITCON || b == C_ADDCON || b == C_ADDCON0 || b == C_ABCON || b == C_ABCON0 || b == C_MBCON || b == C_MOVCON || b == C_ADDCON2 || b == C_AMCON { + return true + } + + case C_MOVCON2: + return cmp(C_LCON, b) + + case C_VCON: + return cmp(C_LCON, b) + + case C_LACON: + if b == C_AACON || b == C_AACON2 { + return true + } + + case C_SEXT2: + if b == C_SEXT1 { + return true + } + + case C_SEXT4: + if b == C_SEXT1 || b == C_SEXT2 { + return true + } + + case C_SEXT8: + if b >= C_SEXT1 && b <= C_SEXT4 { + return true + } + + case C_SEXT16: + if b >= C_SEXT1 && b <= C_SEXT8 { + return true + } + + case C_LEXT: + if b >= C_SEXT1 && b <= C_SEXT16 { + return true + } + + case C_NSAUTO_8: + if b == C_NSAUTO_16 { + return true + } + + case C_NSAUTO_4: + if b == C_NSAUTO_16 || b == C_NSAUTO_8 { + return true + } + + case C_NSAUTO: + switch b { + case C_NSAUTO_4, C_NSAUTO_8, C_NSAUTO_16: + return true + } + + case C_NPAUTO_16: + switch b { + case C_NSAUTO_16: + return true + } + + case C_NPAUTO: + switch b { + case C_NSAUTO_16, C_NSAUTO_8, C_NPAUTO_16: + return true + } + + case C_NQAUTO_16: + switch b { + case C_NSAUTO_16, C_NPAUTO_16: + return true + } + + case C_NAUTO4K: + switch b { + case C_NSAUTO_16, C_NSAUTO_8, C_NSAUTO_4, C_NSAUTO, C_NPAUTO_16, + C_NPAUTO, C_NQAUTO_16: + return true + } + + case C_PSAUTO_16: + if b == C_ZAUTO { + return true + } + + case C_PSAUTO_8: + if b == C_ZAUTO || b == C_PSAUTO_16 { + return true + } + + case C_PSAUTO_4: + switch b { + case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8: + return true + } + + case C_PSAUTO: + switch b { + case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8, C_PSAUTO_4: + return true + } + + case C_PPAUTO_16: + switch b { + case C_ZAUTO, C_PSAUTO_16: + return true + } + + case C_PPAUTO: + switch b { + case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8, C_PPAUTO_16: + return true + } + + case C_PQAUTO_16: + switch b { + case C_ZAUTO, C_PSAUTO_16, C_PPAUTO_16: + return true + } + + case C_UAUTO4K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16: + return true + } + + case C_UAUTO8K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16: + return true + } + + case C_UAUTO16K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16: + return true + } + + case C_UAUTO32K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K_16: + return true + } + + case C_UAUTO64K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO_16, C_PQAUTO_16, C_UAUTO4K_16, C_UAUTO8K_16, C_UAUTO16K_16, + C_UAUTO32K_16: + return true + } + + case C_LAUTO: + switch b { + case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NSAUTO_16, C_NPAUTO_16, C_NPAUTO, C_NQAUTO_16, C_NAUTO4K, + C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K, C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K, C_UAUTO32K_16, + C_UAUTO64K: + return true + } + + case C_NSOREG_8: + if b == C_NSOREG_16 { + return true + } + + case C_NSOREG_4: + if b == C_NSOREG_8 || b == C_NSOREG_16 { + return true + } + + case C_NSOREG: + switch b { + case C_NSOREG_4, C_NSOREG_8, C_NSOREG_16: + return true + } + + case C_NPOREG_16: + switch b { + case C_NSOREG_16: + return true + } + + case C_NPOREG: + switch b { + case C_NSOREG_16, C_NSOREG_8, C_NPOREG_16: + return true + } + + case C_NQOREG_16: + switch b { + case C_NSOREG_16, C_NPOREG_16: + return true + } + + case C_NOREG4K: + switch b { + case C_NSOREG_16, C_NSOREG_8, C_NSOREG_4, C_NSOREG, C_NPOREG_16, C_NPOREG, C_NQOREG_16: + return true + } + + case C_PSOREG_16: + if b == C_ZOREG { + return true + } + + case C_PSOREG_8: + if b == C_ZOREG || b == C_PSOREG_16 { + return true + } + + case C_PSOREG_4: + switch b { + case C_ZOREG, C_PSOREG_16, C_PSOREG_8: + return true + } + + case C_PSOREG: + switch b { + case C_ZOREG, C_PSOREG_16, C_PSOREG_8, C_PSOREG_4: + return true + } + + case C_PPOREG_16: + switch b { + case C_ZOREG, C_PSOREG_16: + return true + } + + case C_PPOREG: + switch b { + case C_ZOREG, C_PSOREG_16, C_PSOREG_8, C_PPOREG_16: + return true + } + + case C_PQOREG_16: + switch b { + case C_ZOREG, C_PSOREG_16, C_PPOREG_16: + return true + } + + case C_UOREG4K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16: + return true + } + + case C_UOREG8K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16: + return true + } + + case C_UOREG16K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16: + return true + } + + case C_UOREG32K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K_16: + return true + } + + case C_UOREG64K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG_16, C_PQOREG_16, C_UOREG4K_16, C_UOREG8K_16, C_UOREG16K_16, + C_UOREG32K_16: + return true + } + + case C_LOREG: + switch b { + case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NSOREG_16, C_NPOREG, C_NPOREG_16, C_NQOREG_16, C_NOREG4K, + C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K, C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K, C_UOREG32K_16, + C_UOREG64K: + return true + } + + case C_LBRA: + if b == C_SBRA { + return true + } + } + + return false +} + +type ocmp []Optab + +func (x ocmp) Len() int { + return len(x) +} + +func (x ocmp) Swap(i, j int) { + x[i], x[j] = x[j], x[i] +} + +func (x ocmp) Less(i, j int) bool { + p1 := &x[i] + p2 := &x[j] + if p1.as != p2.as { + return p1.as < p2.as + } + if p1.a1 != p2.a1 { + return p1.a1 < p2.a1 + } + if p1.a2 != p2.a2 { + return p1.a2 < p2.a2 + } + if p1.a3 != p2.a3 { + return p1.a3 < p2.a3 + } + if p1.a4 != p2.a4 { + return p1.a4 < p2.a4 + } + if p1.scond != p2.scond { + return p1.scond < p2.scond + } + return false +} + +func oprangeset(a obj.As, t []Optab) { + oprange[a&obj.AMask] = t +} + +func buildop(ctxt *obj.Link) { + if oprange[AAND&obj.AMask] != nil { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + var n int + for i := 0; i < C_GOK; i++ { + for n = 0; n < C_GOK; n++ { + if cmp(n, i) { + xcmp[i][n] = true + } + } + } + for n = 0; optab[n].as != obj.AXXX; n++ { + } + sort.Sort(ocmp(optab[:n])) + for i := 0; i < n; i++ { + r := optab[i].as + start := i + for optab[i].as == r { + i++ + } + t := optab[start:i] + i-- + oprangeset(r, t) + switch r { + default: + ctxt.Diag("unknown op in build: %v", r) + ctxt.DiagFlush() + log.Fatalf("bad code") + + case AADD: + oprangeset(AADDS, t) + oprangeset(ASUB, t) + oprangeset(ASUBS, t) + oprangeset(AADDW, t) + oprangeset(AADDSW, t) + oprangeset(ASUBW, t) + oprangeset(ASUBSW, t) + + case AAND: /* logical immediate, logical shifted register */ + oprangeset(AANDW, t) + oprangeset(AEOR, t) + oprangeset(AEORW, t) + oprangeset(AORR, t) + oprangeset(AORRW, t) + oprangeset(ABIC, t) + oprangeset(ABICW, t) + oprangeset(AEON, t) + oprangeset(AEONW, t) + oprangeset(AORN, t) + oprangeset(AORNW, t) + + case AANDS: /* logical immediate, logical shifted register, set flags, cannot target RSP */ + oprangeset(AANDSW, t) + oprangeset(ABICS, t) + oprangeset(ABICSW, t) + + case ANEG: + oprangeset(ANEGS, t) + oprangeset(ANEGSW, t) + oprangeset(ANEGW, t) + + case AADC: /* rn=Rd */ + oprangeset(AADCW, t) + + oprangeset(AADCS, t) + oprangeset(AADCSW, t) + oprangeset(ASBC, t) + oprangeset(ASBCW, t) + oprangeset(ASBCS, t) + oprangeset(ASBCSW, t) + + case ANGC: /* rn=REGZERO */ + oprangeset(ANGCW, t) + + oprangeset(ANGCS, t) + oprangeset(ANGCSW, t) + + case ACMP: + oprangeset(ACMPW, t) + oprangeset(ACMN, t) + oprangeset(ACMNW, t) + + case ATST: + oprangeset(ATSTW, t) + + /* register/register, and shifted */ + case AMVN: + oprangeset(AMVNW, t) + + case AMOVK: + oprangeset(AMOVKW, t) + oprangeset(AMOVN, t) + oprangeset(AMOVNW, t) + oprangeset(AMOVZ, t) + oprangeset(AMOVZW, t) + + case ASWPD: + for i := range atomicLDADD { + oprangeset(i, t) + } + for i := range atomicSWP { + if i == ASWPD { + continue + } + oprangeset(i, t) + } + + case ACASPD: + oprangeset(ACASPW, t) + case ABEQ: + oprangeset(ABNE, t) + oprangeset(ABCS, t) + oprangeset(ABHS, t) + oprangeset(ABCC, t) + oprangeset(ABLO, t) + oprangeset(ABMI, t) + oprangeset(ABPL, t) + oprangeset(ABVS, t) + oprangeset(ABVC, t) + oprangeset(ABHI, t) + oprangeset(ABLS, t) + oprangeset(ABGE, t) + oprangeset(ABLT, t) + oprangeset(ABGT, t) + oprangeset(ABLE, t) + + case ALSL: + oprangeset(ALSLW, t) + oprangeset(ALSR, t) + oprangeset(ALSRW, t) + oprangeset(AASR, t) + oprangeset(AASRW, t) + oprangeset(AROR, t) + oprangeset(ARORW, t) + + case ACLS: + oprangeset(ACLSW, t) + oprangeset(ACLZ, t) + oprangeset(ACLZW, t) + oprangeset(ARBIT, t) + oprangeset(ARBITW, t) + oprangeset(AREV, t) + oprangeset(AREVW, t) + oprangeset(AREV16, t) + oprangeset(AREV16W, t) + oprangeset(AREV32, t) + + case ASDIV: + oprangeset(ASDIVW, t) + oprangeset(AUDIV, t) + oprangeset(AUDIVW, t) + oprangeset(ACRC32B, t) + oprangeset(ACRC32CB, t) + oprangeset(ACRC32CH, t) + oprangeset(ACRC32CW, t) + oprangeset(ACRC32CX, t) + oprangeset(ACRC32H, t) + oprangeset(ACRC32W, t) + oprangeset(ACRC32X, t) + + case AMADD: + oprangeset(AMADDW, t) + oprangeset(AMSUB, t) + oprangeset(AMSUBW, t) + oprangeset(ASMADDL, t) + oprangeset(ASMSUBL, t) + oprangeset(AUMADDL, t) + oprangeset(AUMSUBL, t) + + case AREM: + oprangeset(AREMW, t) + oprangeset(AUREM, t) + oprangeset(AUREMW, t) + + case AMUL: + oprangeset(AMULW, t) + oprangeset(AMNEG, t) + oprangeset(AMNEGW, t) + oprangeset(ASMNEGL, t) + oprangeset(ASMULL, t) + oprangeset(ASMULH, t) + oprangeset(AUMNEGL, t) + oprangeset(AUMULH, t) + oprangeset(AUMULL, t) + + case AMOVB: + oprangeset(AMOVBU, t) + + case AMOVH: + oprangeset(AMOVHU, t) + + case AMOVW: + oprangeset(AMOVWU, t) + + case ABFM: + oprangeset(ABFMW, t) + oprangeset(ASBFM, t) + oprangeset(ASBFMW, t) + oprangeset(AUBFM, t) + oprangeset(AUBFMW, t) + + case ABFI: + oprangeset(ABFIW, t) + oprangeset(ABFXIL, t) + oprangeset(ABFXILW, t) + oprangeset(ASBFIZ, t) + oprangeset(ASBFIZW, t) + oprangeset(ASBFX, t) + oprangeset(ASBFXW, t) + oprangeset(AUBFIZ, t) + oprangeset(AUBFIZW, t) + oprangeset(AUBFX, t) + oprangeset(AUBFXW, t) + + case AEXTR: + oprangeset(AEXTRW, t) + + case ASXTB: + oprangeset(ASXTBW, t) + oprangeset(ASXTH, t) + oprangeset(ASXTHW, t) + oprangeset(ASXTW, t) + oprangeset(AUXTB, t) + oprangeset(AUXTH, t) + oprangeset(AUXTW, t) + oprangeset(AUXTBW, t) + oprangeset(AUXTHW, t) + + case ACCMN: + oprangeset(ACCMNW, t) + oprangeset(ACCMP, t) + oprangeset(ACCMPW, t) + + case ACSEL: + oprangeset(ACSELW, t) + oprangeset(ACSINC, t) + oprangeset(ACSINCW, t) + oprangeset(ACSINV, t) + oprangeset(ACSINVW, t) + oprangeset(ACSNEG, t) + oprangeset(ACSNEGW, t) + + case ACINC: + // aliases Rm=Rn, !cond + oprangeset(ACINCW, t) + oprangeset(ACINV, t) + oprangeset(ACINVW, t) + oprangeset(ACNEG, t) + oprangeset(ACNEGW, t) + + // aliases, Rm=Rn=REGZERO, !cond + case ACSET: + oprangeset(ACSETW, t) + + oprangeset(ACSETM, t) + oprangeset(ACSETMW, t) + + case AMOVD, + AB, + ABL, + AWORD, + ADWORD, + obj.ARET, + obj.ATEXT: + break + + case AFLDPQ: + break + case AFSTPQ: + break + case ALDP: + oprangeset(AFLDPD, t) + + case ASTP: + oprangeset(AFSTPD, t) + + case ASTPW: + oprangeset(AFSTPS, t) + + case ALDPW: + oprangeset(ALDPSW, t) + oprangeset(AFLDPS, t) + + case AERET: + oprangeset(AWFE, t) + oprangeset(AWFI, t) + oprangeset(AYIELD, t) + oprangeset(ASEV, t) + oprangeset(ASEVL, t) + oprangeset(ANOOP, t) + oprangeset(ADRPS, t) + + case ACBZ: + oprangeset(ACBZW, t) + oprangeset(ACBNZ, t) + oprangeset(ACBNZW, t) + + case ATBZ: + oprangeset(ATBNZ, t) + + case AADR, AADRP: + break + + case ACLREX: + break + + case ASVC: + oprangeset(AHVC, t) + oprangeset(AHLT, t) + oprangeset(ASMC, t) + oprangeset(ABRK, t) + oprangeset(ADCPS1, t) + oprangeset(ADCPS2, t) + oprangeset(ADCPS3, t) + + case AFADDS: + oprangeset(AFADDD, t) + oprangeset(AFSUBS, t) + oprangeset(AFSUBD, t) + oprangeset(AFMULS, t) + oprangeset(AFMULD, t) + oprangeset(AFNMULS, t) + oprangeset(AFNMULD, t) + oprangeset(AFDIVS, t) + oprangeset(AFMAXD, t) + oprangeset(AFMAXS, t) + oprangeset(AFMIND, t) + oprangeset(AFMINS, t) + oprangeset(AFMAXNMD, t) + oprangeset(AFMAXNMS, t) + oprangeset(AFMINNMD, t) + oprangeset(AFMINNMS, t) + oprangeset(AFDIVD, t) + + case AFMSUBD: + oprangeset(AFMSUBS, t) + oprangeset(AFMADDS, t) + oprangeset(AFMADDD, t) + oprangeset(AFNMSUBS, t) + oprangeset(AFNMSUBD, t) + oprangeset(AFNMADDS, t) + oprangeset(AFNMADDD, t) + + case AFCVTSD: + oprangeset(AFCVTDS, t) + oprangeset(AFABSD, t) + oprangeset(AFABSS, t) + oprangeset(AFNEGD, t) + oprangeset(AFNEGS, t) + oprangeset(AFSQRTD, t) + oprangeset(AFSQRTS, t) + oprangeset(AFRINTNS, t) + oprangeset(AFRINTND, t) + oprangeset(AFRINTPS, t) + oprangeset(AFRINTPD, t) + oprangeset(AFRINTMS, t) + oprangeset(AFRINTMD, t) + oprangeset(AFRINTZS, t) + oprangeset(AFRINTZD, t) + oprangeset(AFRINTAS, t) + oprangeset(AFRINTAD, t) + oprangeset(AFRINTXS, t) + oprangeset(AFRINTXD, t) + oprangeset(AFRINTIS, t) + oprangeset(AFRINTID, t) + oprangeset(AFCVTDH, t) + oprangeset(AFCVTHS, t) + oprangeset(AFCVTHD, t) + oprangeset(AFCVTSH, t) + + case AFCMPS: + oprangeset(AFCMPD, t) + oprangeset(AFCMPES, t) + oprangeset(AFCMPED, t) + + case AFCCMPS: + oprangeset(AFCCMPD, t) + oprangeset(AFCCMPES, t) + oprangeset(AFCCMPED, t) + + case AFCSELD: + oprangeset(AFCSELS, t) + + case AFMOVQ, AFMOVD, AFMOVS, + AVMOVQ, AVMOVD, AVMOVS: + break + + case AFCVTZSD: + oprangeset(AFCVTZSDW, t) + oprangeset(AFCVTZSS, t) + oprangeset(AFCVTZSSW, t) + oprangeset(AFCVTZUD, t) + oprangeset(AFCVTZUDW, t) + oprangeset(AFCVTZUS, t) + oprangeset(AFCVTZUSW, t) + + case ASCVTFD: + oprangeset(ASCVTFS, t) + oprangeset(ASCVTFWD, t) + oprangeset(ASCVTFWS, t) + oprangeset(AUCVTFD, t) + oprangeset(AUCVTFS, t) + oprangeset(AUCVTFWD, t) + oprangeset(AUCVTFWS, t) + + case ASYS: + oprangeset(AAT, t) + oprangeset(AIC, t) + + case ATLBI: + oprangeset(ADC, t) + + case ASYSL, AHINT: + break + + case ADMB: + oprangeset(ADSB, t) + oprangeset(AISB, t) + + case AMRS, AMSR: + break + + case ALDAR: + oprangeset(ALDARW, t) + oprangeset(ALDARB, t) + oprangeset(ALDARH, t) + fallthrough + + case ALDXR: + oprangeset(ALDXRB, t) + oprangeset(ALDXRH, t) + oprangeset(ALDXRW, t) + + case ALDAXR: + oprangeset(ALDAXRB, t) + oprangeset(ALDAXRH, t) + oprangeset(ALDAXRW, t) + + case ALDXP: + oprangeset(ALDXPW, t) + oprangeset(ALDAXP, t) + oprangeset(ALDAXPW, t) + + case ASTLR: + oprangeset(ASTLRB, t) + oprangeset(ASTLRH, t) + oprangeset(ASTLRW, t) + + case ASTXR: + oprangeset(ASTXRB, t) + oprangeset(ASTXRH, t) + oprangeset(ASTXRW, t) + + case ASTLXR: + oprangeset(ASTLXRB, t) + oprangeset(ASTLXRH, t) + oprangeset(ASTLXRW, t) + + case ASTXP: + oprangeset(ASTLXP, t) + oprangeset(ASTLXPW, t) + oprangeset(ASTXPW, t) + + case AVADDP: + oprangeset(AVAND, t) + oprangeset(AVCMEQ, t) + oprangeset(AVORR, t) + oprangeset(AVEOR, t) + oprangeset(AVBSL, t) + oprangeset(AVBIT, t) + oprangeset(AVCMTST, t) + oprangeset(AVUMAX, t) + oprangeset(AVUMIN, t) + oprangeset(AVUZP1, t) + oprangeset(AVUZP2, t) + oprangeset(AVBIF, t) + + case AVADD: + oprangeset(AVSUB, t) + oprangeset(AVRAX1, t) + + case AAESD: + oprangeset(AAESE, t) + oprangeset(AAESMC, t) + oprangeset(AAESIMC, t) + oprangeset(ASHA1SU1, t) + oprangeset(ASHA256SU0, t) + oprangeset(ASHA512SU0, t) + + case ASHA1C: + oprangeset(ASHA1P, t) + oprangeset(ASHA1M, t) + + case ASHA256H: + oprangeset(ASHA256H2, t) + oprangeset(ASHA512H, t) + oprangeset(ASHA512H2, t) + + case ASHA1SU0: + oprangeset(ASHA256SU1, t) + oprangeset(ASHA512SU1, t) + + case AVADDV: + oprangeset(AVUADDLV, t) + + case AVFMLA: + oprangeset(AVFMLS, t) + + case AVPMULL: + oprangeset(AVPMULL2, t) + + case AVUSHR: + oprangeset(AVSHL, t) + oprangeset(AVSRI, t) + oprangeset(AVSLI, t) + oprangeset(AVUSRA, t) + + case AVREV32: + oprangeset(AVCNT, t) + oprangeset(AVRBIT, t) + oprangeset(AVREV64, t) + oprangeset(AVREV16, t) + + case AVZIP1: + oprangeset(AVZIP2, t) + oprangeset(AVTRN1, t) + oprangeset(AVTRN2, t) + + case AVUXTL: + oprangeset(AVUXTL2, t) + + case AVUSHLL: + oprangeset(AVUSHLL2, t) + + case AVLD1R: + oprangeset(AVLD2, t) + oprangeset(AVLD2R, t) + oprangeset(AVLD3, t) + oprangeset(AVLD3R, t) + oprangeset(AVLD4, t) + oprangeset(AVLD4R, t) + + case AVEOR3: + oprangeset(AVBCAX, t) + + case AVUADDW: + oprangeset(AVUADDW2, t) + + case AVTBL: + oprangeset(AVTBX, t) + + case ASHA1H, + AVCNT, + AVMOV, + AVLD1, + AVST1, + AVST2, + AVST3, + AVST4, + AVDUP, + AVMOVI, + APRFM, + AVEXT, + AVXAR: + break + + case obj.ANOP, + obj.AUNDEF, + obj.AFUNCDATA, + obj.APCALIGN, + obj.APCDATA, + obj.ADUFFZERO, + obj.ADUFFCOPY: + break + } + } +} + +// chipfloat7() checks if the immediate constants available in FMOVS/FMOVD instructions. +// For details of the range of constants available, see +// http://infocenter.arm.com/help/topic/com.arm.doc.dui0473m/dom1359731199385.html. +func (c *ctxt7) chipfloat7(e float64) int { + ei := math.Float64bits(e) + l := uint32(int32(ei)) + h := uint32(int32(ei >> 32)) + + if l != 0 || h&0xffff != 0 { + return -1 + } + h1 := h & 0x7fc00000 + if h1 != 0x40000000 && h1 != 0x3fc00000 { + return -1 + } + n := 0 + + // sign bit (a) + if h&0x80000000 != 0 { + n |= 1 << 7 + } + + // exp sign bit (b) + if h1 == 0x3fc00000 { + n |= 1 << 6 + } + + // rest of exp and mantissa (cd-efgh) + n |= int((h >> 16) & 0x3f) + + //print("match %.8lux %.8lux %d\n", l, h, n); + return n +} + +/* form offset parameter to SYS; special register number */ +func SYSARG5(op0 int, op1 int, Cn int, Cm int, op2 int) int { + return op0<<19 | op1<<16 | Cn<<12 | Cm<<8 | op2<<5 +} + +func SYSARG4(op1 int, Cn int, Cm int, op2 int) int { + return SYSARG5(0, op1, Cn, Cm, op2) +} + +// checkUnpredictable checks if the source and transfer registers are the same register. +// ARM64 manual says it is "constrained unpredictable" if the src and dst registers of STP/LDP are same. +func (c *ctxt7) checkUnpredictable(p *obj.Prog, isload bool, wback bool, rn int16, rt1 int16, rt2 int16) { + if wback && rn != REGSP && (rn == rt1 || rn == rt2) { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + if isload && rt1 == rt2 { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } +} + +/* checkindex checks if index >= 0 && index <= maxindex */ +func (c *ctxt7) checkindex(p *obj.Prog, index, maxindex int) { + if index < 0 || index > maxindex { + c.ctxt.Diag("register element index out of range 0 to %d: %v", maxindex, p) + } +} + +/* checkoffset checks whether the immediate offset is valid for VLD[1-4].P and VST[1-4].P */ +func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) { + var offset, list, n, expect int64 + switch as { + case AVLD1, AVLD2, AVLD3, AVLD4, AVLD1R, AVLD2R, AVLD3R, AVLD4R: + offset = p.From.Offset + list = p.To.Offset + case AVST1, AVST2, AVST3, AVST4: + offset = p.To.Offset + list = p.From.Offset + default: + c.ctxt.Diag("invalid operation on op %v", p.As) + } + opcode := (list >> 12) & 15 + q := (list >> 30) & 1 + size := (list >> 10) & 3 + if offset == 0 { + return + } + switch opcode { + case 0x7: + n = 1 // one register + case 0xa: + n = 2 // two registers + case 0x6: + n = 3 // three registers + case 0x2: + n = 4 // four registers + default: + c.ctxt.Diag("invalid register numbers in ARM64 register list: %v", p) + } + + switch as { + case AVLD1R, AVLD2R, AVLD3R, AVLD4R: + if offset != n*(1<<uint(size)) { + c.ctxt.Diag("invalid post-increment offset: %v", p) + } + default: + if !(q == 0 && offset == n*8) && !(q == 1 && offset == n*16) { + c.ctxt.Diag("invalid post-increment offset: %v", p) + } + } + + switch as { + case AVLD1, AVST1: + return + case AVLD1R: + expect = 1 + case AVLD2, AVST2, AVLD2R: + expect = 2 + case AVLD3, AVST3, AVLD3R: + expect = 3 + case AVLD4, AVST4, AVLD4R: + expect = 4 + } + + if expect != n { + c.ctxt.Diag("expected %d registers, got %d: %v.", expect, n, p) + } +} + +/* checkShiftAmount checks whether the index shift amount is valid */ +/* for load with register offset instructions */ +func (c *ctxt7) checkShiftAmount(p *obj.Prog, a *obj.Addr) { + var amount int16 + amount = (a.Index >> 5) & 7 + switch p.As { + case AMOVB, AMOVBU: + if amount != 0 { + c.ctxt.Diag("invalid index shift amount: %v", p) + } + case AMOVH, AMOVHU: + if amount != 1 && amount != 0 { + c.ctxt.Diag("invalid index shift amount: %v", p) + } + case AMOVW, AMOVWU, AFMOVS: + if amount != 2 && amount != 0 { + c.ctxt.Diag("invalid index shift amount: %v", p) + } + case AMOVD, AFMOVD: + if amount != 3 && amount != 0 { + c.ctxt.Diag("invalid index shift amount: %v", p) + } + default: + panic("invalid operation") + } +} + +func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { + var os [5]uint32 + o1 := uint32(0) + o2 := uint32(0) + o3 := uint32(0) + o4 := uint32(0) + o5 := uint32(0) + if false { /*debug['P']*/ + fmt.Printf("%x: %v\ttype %d\n", uint32(p.Pc), p, o.type_) + } + switch o.type_ { + default: + c.ctxt.Diag("%v: unknown asm %d", p, o.type_) + + case 0: /* pseudo ops */ + break + + case 1: /* op Rm,[Rn],Rd; default Rn=Rd -> op Rm<<0,[Rn,]Rd (shifted register) */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + if r == 0 { + r = rt + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 2: /* add/sub $(uimm12|uimm24)[,R],R; cmp $(uimm12|uimm24),R */ + if p.To.Reg == REG_RSP && isADDSop(p.As) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 = c.opirr(p, p.As) + + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + if (o1 & Sbit) == 0 { + c.ctxt.Diag("ineffective ZR destination\n%v", p) + } + rt = REGZERO + } + + r := int(p.Reg) + if r == 0 { + r = rt + } + v := int32(c.regoff(&p.From)) + o1 = c.oaddi(p, int32(o1), v, r, rt) + + case 3: /* op R<<n[,R],R (shifted register) */ + o1 = c.oprrr(p, p.As) + + amount := (p.From.Offset >> 10) & 63 + is64bit := o1 & (1 << 31) + if is64bit == 0 && amount >= 32 { + c.ctxt.Diag("shift amount out of range 0 to 31: %v", p) + } + shift := (p.From.Offset >> 22) & 3 + if (shift > 2 || shift < 0) && (isADDop(p.As) || isADDWop(p.As) || isNEGop(p.As)) { + c.ctxt.Diag("unsupported shift operator: %v", p) + } + o1 |= uint32(p.From.Offset) /* includes reg, op, etc */ + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if p.As == AMVN || p.As == AMVNW || isNEGop(p.As) { + r = REGZERO + } else if r == 0 { + r = rt + } + o1 |= (uint32(r&31) << 5) | uint32(rt&31) + + case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R; mov $addcon2, R */ + rt := int(p.To.Reg) + r := int(o.param) + + if r == 0 { + r = REGZERO + } else if r == REGFROM { + r = int(p.From.Reg) + } + if r == 0 { + r = REGSP + } + + v := int32(c.regoff(&p.From)) + var op int32 + if v < 0 { + v = -v + op = int32(c.opirr(p, ASUB)) + } else { + op = int32(c.opirr(p, AADD)) + } + + if int(o.size(c.ctxt, p)) == 8 { + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = c.oaddi(p, op, v&0xfff000, r, rt) + o2 = c.oaddi(p, op, v&0x000fff, rt, rt) + break + } + + o1 = c.oaddi(p, op, v, r, rt) + + case 5: /* b s; bl s */ + o1 = c.opbra(p, p.As) + + if p.To.Sym == nil { + o1 |= uint32(c.brdist(p, 0, 26, 2)) + break + } + + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_CALLARM64 + + case 6: /* b ,O(R); bl ,O(R) */ + o1 = c.opbrr(p, p.As) + o1 |= uint32(p.To.Reg&31) << 5 + if p.As == obj.ACALL { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 0 + rel.Type = objabi.R_CALLIND + } + + case 7: /* beq s */ + o1 = c.opbra(p, p.As) + + o1 |= uint32(c.brdist(p, 0, 19, 2) << 5) + + case 8: /* lsl $c,[R],R -> ubfm $(W-1)-c,$(-c MOD (W-1)),Rn,Rd */ + rt := int(p.To.Reg) + + rf := int(p.Reg) + if rf == 0 { + rf = rt + } + v := int32(p.From.Offset) + switch p.As { + case AASR: + o1 = c.opbfm(p, ASBFM, int(v), 63, rf, rt) + + case AASRW: + o1 = c.opbfm(p, ASBFMW, int(v), 31, rf, rt) + + case ALSL: + o1 = c.opbfm(p, AUBFM, int((64-v)&63), int(63-v), rf, rt) + + case ALSLW: + o1 = c.opbfm(p, AUBFMW, int((32-v)&31), int(31-v), rf, rt) + + case ALSR: + o1 = c.opbfm(p, AUBFM, int(v), 63, rf, rt) + + case ALSRW: + o1 = c.opbfm(p, AUBFMW, int(v), 31, rf, rt) + + case AROR: + o1 = c.opextr(p, AEXTR, v, rf, rf, rt) + + case ARORW: + o1 = c.opextr(p, AEXTRW, v, rf, rf, rt) + + default: + c.ctxt.Diag("bad shift $con\n%v", p) + break + } + + case 9: /* lsl Rm,[Rn],Rd -> lslv Rm, Rn, Rd */ + o1 = c.oprrr(p, p.As) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 |= (uint32(p.From.Reg&31) << 16) | (uint32(r&31) << 5) | uint32(p.To.Reg&31) + + case 10: /* brk/hvc/.../svc [$con] */ + o1 = c.opimm(p, p.As) + + if p.From.Type != obj.TYPE_NONE { + o1 |= uint32((p.From.Offset & 0xffff) << 5) + } + + case 11: /* dword */ + c.aclass(&p.To) + + o1 = uint32(c.instoffset) + o2 = uint32(c.instoffset >> 32) + if p.To.Sym != nil { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDR + o2 = 0 + o1 = o2 + } + + case 12: /* movT $vcon, reg */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + num := c.omovlconst(p.As, p, &p.From, int(p.To.Reg), os[:]) + if num == 0 { + c.ctxt.Diag("invalid constant: %v", p) + } + o1 = os[0] + o2 = os[1] + o3 = os[2] + o4 = os[3] + + case 13: /* addop $vcon, [R], R (64 bit literal); cmp $lcon,R -> addop $lcon,R, ZR */ + if p.Reg == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v\n", p) + } + if p.To.Reg == REG_RSP && isADDSop(p.As) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o := uint32(0) + num := uint8(0) + cls := oclass(&p.From) + if isADDWop(p.As) { + if !cmp(C_LCON, cls) { + c.ctxt.Diag("illegal combination: %v", p) + } + num = c.omovlconst(AMOVW, p, &p.From, REGTMP, os[:]) + } else { + num = c.omovlconst(AMOVD, p, &p.From, REGTMP, os[:]) + } + if num == 0 { + c.ctxt.Diag("invalid constant: %v", p) + } + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + if p.To.Type != obj.TYPE_NONE && (p.To.Reg == REGSP || r == REGSP) { + o = c.opxrrr(p, p.As, false) + o |= REGTMP & 31 << 16 + o |= LSL0_64 + } else { + o = c.oprrr(p, p.As) + o |= REGTMP & 31 << 16 /* shift is 0 */ + } + + o |= uint32(r&31) << 5 + o |= uint32(rt & 31) + + os[num] = o + o1 = os[0] + o2 = os[1] + o3 = os[2] + o4 = os[3] + o5 = os[4] + + case 14: /* word */ + if c.aclass(&p.To) == C_ADDR { + c.ctxt.Diag("address constant needs DWORD\n%v", p) + } + o1 = uint32(c.instoffset) + if p.To.Sym != nil { + // This case happens with words generated + // in the PC stream as part of the literal pool. + rel := obj.Addrel(c.cursym) + + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDR + o1 = 0 + } + + case 15: /* mul/mneg/umulh/umull r,[r,]r; madd/msub/fmadd/fmsub/fnmadd/fnmsub Rm,Ra,Rn,Rd */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + var r int + var ra int + if p.From3Type() == obj.TYPE_REG { + r = int(p.GetFrom3().Reg) + ra = int(p.Reg) + if ra == 0 { + ra = REGZERO + } + } else { + r = int(p.Reg) + if r == 0 { + r = rt + } + ra = REGZERO + } + + o1 |= (uint32(rf&31) << 16) | (uint32(ra&31) << 10) | (uint32(r&31) << 5) | uint32(rt&31) + + case 16: /* XremY R[,R],R -> XdivY; XmsubY */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | REGTMP&31 + o2 = c.oprrr(p, AMSUBW) + o2 |= o1 & (1 << 31) /* same size */ + o2 |= (uint32(rf&31) << 16) | (uint32(r&31) << 10) | (REGTMP & 31 << 5) | uint32(rt&31) + + case 17: /* op Rm,[Rn],Rd; default Rn=ZR */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + if r == 0 { + r = REGZERO + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 18: /* csel cond,Rn,Rm,Rd; cinc/cinv/cneg cond,Rn,Rd; cset cond,Rd */ + o1 = c.oprrr(p, p.As) + + cond := SpecialOperand(p.From.Offset) + if cond < SPOP_EQ || cond > SPOP_NV || (cond == SPOP_AL || cond == SPOP_NV) && p.From3Type() == obj.TYPE_NONE { + c.ctxt.Diag("invalid condition: %v", p) + } else { + cond -= SPOP_EQ + } + + r := int(p.Reg) + var rf int = r + if p.From3Type() == obj.TYPE_NONE { + /* CINC/CINV/CNEG or CSET/CSETM*/ + if r == 0 { + /* CSET/CSETM */ + rf = REGZERO + r = rf + } + cond ^= 1 + } else { + rf = int(p.GetFrom3().Reg) /* CSEL */ + } + + rt := int(p.To.Reg) + o1 |= (uint32(rf&31) << 16) | (uint32(cond&15) << 12) | (uint32(r&31) << 5) | uint32(rt&31) + + case 19: /* CCMN cond, (Rm|uimm5),Rn, uimm4 -> ccmn Rn,Rm,uimm4,cond */ + nzcv := int(p.To.Offset) + + cond := SpecialOperand(p.From.Offset) + if cond < SPOP_EQ || cond > SPOP_NV { + c.ctxt.Diag("invalid condition\n%v", p) + } else { + cond -= SPOP_EQ + } + var rf int + if p.GetFrom3().Type == obj.TYPE_REG { + o1 = c.oprrr(p, p.As) + rf = int(p.GetFrom3().Reg) /* Rm */ + } else { + o1 = c.opirr(p, p.As) + rf = int(p.GetFrom3().Offset & 0x1F) + } + + o1 |= (uint32(rf&31) << 16) | (uint32(cond&15) << 12) | (uint32(p.Reg&31) << 5) | uint32(nzcv) + + case 20: /* movT R,O(R) -> strT */ + v := int32(c.regoff(&p.To)) + sz := int32(1 << uint(movesize(p.As))) + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + if v < 0 || v%sz != 0 { /* unscaled 9-bit signed */ + o1 = c.olsr9s(p, int32(c.opstr(p, p.As)), v, r, int(p.From.Reg)) + } else { + v = int32(c.offsetshift(p, int64(v), int(o.a4))) + o1 = c.olsr12u(p, int32(c.opstr(p, p.As)), v, r, int(p.From.Reg)) + } + + case 21: /* movT O(R),R -> ldrT */ + v := int32(c.regoff(&p.From)) + sz := int32(1 << uint(movesize(p.As))) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + if v < 0 || v%sz != 0 { /* unscaled 9-bit signed */ + o1 = c.olsr9s(p, int32(c.opldr(p, p.As)), v, r, int(p.To.Reg)) + } else { + v = int32(c.offsetshift(p, int64(v), int(o.a1))) + //print("offset=%lld v=%ld a1=%d\n", instoffset, v, o->a1); + o1 = c.olsr12u(p, int32(c.opldr(p, p.As)), v, r, int(p.To.Reg)) + } + + case 22: /* movT (R)O!,R; movT O(R)!, R -> ldrT */ + if p.From.Reg != REGSP && p.From.Reg == p.To.Reg { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + + v := int32(p.From.Offset) + + if v < -256 || v > 255 { + c.ctxt.Diag("offset out of range [-256,255]: %v", p) + } + o1 = c.opldr(p, p.As) + if o.scond == C_XPOST { + o1 |= 1 << 10 + } else { + o1 |= 3 << 10 + } + o1 |= ((uint32(v) & 0x1FF) << 12) | (uint32(p.From.Reg&31) << 5) | uint32(p.To.Reg&31) + + case 23: /* movT R,(R)O!; movT O(R)!, R -> strT */ + if p.To.Reg != REGSP && p.From.Reg == p.To.Reg { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + + v := int32(p.To.Offset) + + if v < -256 || v > 255 { + c.ctxt.Diag("offset out of range [-256,255]: %v", p) + } + o1 = c.opstr(p, p.As) + if o.scond == C_XPOST { + o1 |= 1 << 10 + } else { + o1 |= 3 << 10 + } + o1 |= ((uint32(v) & 0x1FF) << 12) | (uint32(p.To.Reg&31) << 5) | uint32(p.From.Reg&31) + + case 24: /* mov/mvn Rs,Rd -> add $0,Rs,Rd or orr Rs,ZR,Rd */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + s := rf == REGSP || rt == REGSP + if p.As == AMVN || p.As == AMVNW { + if s { + c.ctxt.Diag("illegal SP reference\n%v", p) + } + o1 = c.oprrr(p, p.As) + o1 |= (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31) + } else if s { + o1 = c.opirr(p, p.As) + o1 |= (uint32(rf&31) << 5) | uint32(rt&31) + } else { + o1 = c.oprrr(p, p.As) + o1 |= (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31) + } + + case 25: /* negX Rs, Rd -> subX Rs<<0, ZR, Rd */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + if rf == C_NONE { + rf = int(p.To.Reg) + } + rt := int(p.To.Reg) + o1 |= (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31) + + case 27: /* op Rm<<n[,Rn],Rd (extended register) */ + if p.To.Reg == REG_RSP && isADDSop(p.As) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + if (p.From.Reg-obj.RBaseARM64)®_EXT != 0 || + (p.From.Reg >= REG_LSL && p.From.Reg < REG_ARNG) { + amount := (p.From.Reg >> 5) & 7 + if amount > 4 { + c.ctxt.Diag("shift amount out of range 0 to 4: %v", p) + } + o1 = c.opxrrr(p, p.As, true) + o1 |= c.encRegShiftOrExt(p, &p.From, p.From.Reg) /* includes reg, op, etc */ + } else { + o1 = c.opxrrr(p, p.As, false) + o1 |= uint32(p.From.Reg&31) << 16 + } + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 |= (uint32(r&31) << 5) | uint32(rt&31) + + case 28: /* logop $vcon, [R], R (64 bit literal) */ + if p.Reg == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v\n", p) + } + o := uint32(0) + num := uint8(0) + cls := oclass(&p.From) + if isANDWop(p.As) { + if !cmp(C_LCON, cls) { + c.ctxt.Diag("illegal combination: %v", p) + } + num = c.omovlconst(AMOVW, p, &p.From, REGTMP, os[:]) + } else { + num = c.omovlconst(AMOVD, p, &p.From, REGTMP, os[:]) + } + + if num == 0 { + c.ctxt.Diag("invalid constant: %v", p) + } + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + o = c.oprrr(p, p.As) + o |= REGTMP & 31 << 16 /* shift is 0 */ + o |= uint32(r&31) << 5 + o |= uint32(rt & 31) + + os[num] = o + o1 = os[0] + o2 = os[1] + o3 = os[2] + o4 = os[3] + o5 = os[4] + + case 29: /* op Rn, Rd */ + fc := c.aclass(&p.From) + tc := c.aclass(&p.To) + if (p.As == AFMOVD || p.As == AFMOVS) && (fc == C_REG || fc == C_ZREG || tc == C_REG) { + // FMOV Rx, Fy or FMOV Fy, Rx + o1 = FPCVTI(0, 0, 0, 0, 6) + if p.As == AFMOVD { + o1 |= 1<<31 | 1<<22 // 64-bit + } + if fc == C_REG || fc == C_ZREG { + o1 |= 1 << 16 // FMOV Rx, Fy + } + } else { + o1 = c.oprrr(p, p.As) + } + o1 |= uint32(p.From.Reg&31)<<5 | uint32(p.To.Reg&31) + + case 30: /* movT R,L(R) -> strT */ + // if offset L can be split into hi+lo, and both fit into instructions, do + // add $hi, R, Rtmp + // str R, lo(Rtmp) + // otherwise, use constant pool + // mov $L, Rtmp (from constant pool) + // str R, (R+Rtmp) + s := movesize(o.as) + if s < 0 { + c.ctxt.Diag("unexpected long move, op %v tab %v\n%v", p.As, o.as, p) + } + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + + v := int32(c.regoff(&p.To)) + var hi int32 + if v < 0 || (v&((1<<uint(s))-1)) != 0 { + // negative or unaligned offset, use constant pool + goto storeusepool + } + + hi = v - (v & (0xFFF << uint(s))) + if hi&0xFFF != 0 { + c.ctxt.Diag("internal: miscalculated offset %d [%d]\n%v", v, s, p) + } + if hi&^0xFFF000 != 0 { + // hi doesn't fit into an ADD instruction + goto storeusepool + } + + o1 = c.oaddi(p, int32(c.opirr(p, AADD)), hi, r, REGTMP) + o2 = c.olsr12u(p, int32(c.opstr(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.From.Reg)) + break + + storeusepool: + if r == REGTMP || p.From.Reg == REGTMP { + c.ctxt.Diag("REGTMP used in large offset store: %v", p) + } + o1 = c.omovlit(AMOVD, p, &p.To, REGTMP) + o2 = c.olsxrr(p, int32(c.opstrr(p, p.As, false)), int(p.From.Reg), r, REGTMP) + + case 31: /* movT L(R), R -> ldrT */ + // if offset L can be split into hi+lo, and both fit into instructions, do + // add $hi, R, Rtmp + // ldr lo(Rtmp), R + // otherwise, use constant pool + // mov $L, Rtmp (from constant pool) + // ldr (R+Rtmp), R + s := movesize(o.as) + if s < 0 { + c.ctxt.Diag("unexpected long move, op %v tab %v\n%v", p.As, o.as, p) + } + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + + v := int32(c.regoff(&p.From)) + var hi int32 + if v < 0 || (v&((1<<uint(s))-1)) != 0 { + // negative or unaligned offset, use constant pool + goto loadusepool + } + + hi = v - (v & (0xFFF << uint(s))) + if (hi & 0xFFF) != 0 { + c.ctxt.Diag("internal: miscalculated offset %d [%d]\n%v", v, s, p) + } + if hi&^0xFFF000 != 0 { + // hi doesn't fit into an ADD instruction + goto loadusepool + } + + o1 = c.oaddi(p, int32(c.opirr(p, AADD)), hi, r, REGTMP) + o2 = c.olsr12u(p, int32(c.opldr(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.To.Reg)) + break + + loadusepool: + if r == REGTMP || p.From.Reg == REGTMP { + c.ctxt.Diag("REGTMP used in large offset load: %v", p) + } + o1 = c.omovlit(AMOVD, p, &p.From, REGTMP) + o2 = c.olsxrr(p, int32(c.opldrr(p, p.As, false)), int(p.To.Reg), r, REGTMP) + + case 32: /* mov $con, R -> movz/movn */ + o1 = c.omovconst(p.As, p, &p.From, int(p.To.Reg)) + + case 33: /* movk $uimm16 << pos */ + o1 = c.opirr(p, p.As) + + d := p.From.Offset + if d == 0 { + c.ctxt.Diag("zero shifts cannot be handled correctly: %v", p) + } + s := movcon(d) + if s < 0 || s >= 4 { + c.ctxt.Diag("bad constant for MOVK: %#x\n%v", uint64(d), p) + } + if (o1&S64) == 0 && s >= 2 { + c.ctxt.Diag("illegal bit position\n%v", p) + } + if ((uint64(d) >> uint(s*16)) >> 16) != 0 { + c.ctxt.Diag("requires uimm16\n%v", p) + } + rt := int(p.To.Reg) + + o1 |= uint32((((d >> uint(s*16)) & 0xFFFF) << 5) | int64((uint32(s)&3)<<21) | int64(rt&31)) + + case 34: /* mov $lacon,R */ + o1 = c.omovlit(AMOVD, p, &p.From, REGTMP) + + if o1 == 0 { + break + } + o2 = c.opxrrr(p, AADD, false) + o2 |= REGTMP & 31 << 16 + o2 |= LSL0_64 + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 |= uint32(r&31) << 5 + o2 |= uint32(p.To.Reg & 31) + + case 35: /* mov SPR,R -> mrs */ + o1 = c.oprrr(p, AMRS) + + // SysRegEnc function returns the system register encoding and accessFlags. + _, v, accessFlags := SysRegEnc(p.From.Reg) + if v == 0 { + c.ctxt.Diag("illegal system register:\n%v", p) + } + if (o1 & (v &^ (3 << 19))) != 0 { + c.ctxt.Diag("MRS register value overlap\n%v", p) + } + if accessFlags&SR_READ == 0 { + c.ctxt.Diag("system register is not readable: %v", p) + } + + o1 |= v + o1 |= uint32(p.To.Reg & 31) + + case 36: /* mov R,SPR */ + o1 = c.oprrr(p, AMSR) + + // SysRegEnc function returns the system register encoding and accessFlags. + _, v, accessFlags := SysRegEnc(p.To.Reg) + if v == 0 { + c.ctxt.Diag("illegal system register:\n%v", p) + } + if (o1 & (v &^ (3 << 19))) != 0 { + c.ctxt.Diag("MSR register value overlap\n%v", p) + } + if accessFlags&SR_WRITE == 0 { + c.ctxt.Diag("system register is not writable: %v", p) + } + + o1 |= v + o1 |= uint32(p.From.Reg & 31) + + case 37: /* mov $con,PSTATEfield -> MSR [immediate] */ + if (uint64(p.From.Offset) &^ uint64(0xF)) != 0 { + c.ctxt.Diag("illegal immediate for PSTATE field\n%v", p) + } + o1 = c.opirr(p, AMSR) + o1 |= uint32((p.From.Offset & 0xF) << 8) /* Crm */ + v := uint32(0) + // PSTATEfield can be special registers and special operands. + if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SPSel { + v = 0<<16 | 4<<12 | 5<<5 + } else if p.To.Type == obj.TYPE_SPECIAL { + opd := SpecialOperand(p.To.Offset) + for _, pf := range pstatefield { + if pf.opd == opd { + v = pf.enc + break + } + } + } + + if v == 0 { + c.ctxt.Diag("illegal PSTATE field for immediate move\n%v", p) + } + o1 |= v + + case 38: /* clrex [$imm] */ + o1 = c.opimm(p, p.As) + + if p.To.Type == obj.TYPE_NONE { + o1 |= 0xF << 8 + } else { + o1 |= uint32((p.To.Offset & 0xF) << 8) + } + + case 39: /* cbz R, rel */ + o1 = c.opirr(p, p.As) + + o1 |= uint32(p.From.Reg & 31) + o1 |= uint32(c.brdist(p, 0, 19, 2) << 5) + + case 40: /* tbz */ + o1 = c.opirr(p, p.As) + + v := int32(p.From.Offset) + if v < 0 || v > 63 { + c.ctxt.Diag("illegal bit number\n%v", p) + } + o1 |= ((uint32(v) & 0x20) << (31 - 5)) | ((uint32(v) & 0x1F) << 19) + o1 |= uint32(c.brdist(p, 0, 14, 2) << 5) + o1 |= uint32(p.Reg & 31) + + case 41: /* eret, nop, others with no operands */ + o1 = c.op0(p, p.As) + + case 42: /* bfm R,r,s,R */ + o1 = c.opbfm(p, p.As, int(p.From.Offset), int(p.GetFrom3().Offset), int(p.Reg), int(p.To.Reg)) + + case 43: /* bfm aliases */ + r := int(p.From.Offset) + s := int(p.GetFrom3().Offset) + rf := int(p.Reg) + rt := int(p.To.Reg) + if rf == 0 { + rf = rt + } + switch p.As { + case ABFI: + if r != 0 { + r = 64 - r + } + o1 = c.opbfm(p, ABFM, r, s-1, rf, rt) + + case ABFIW: + if r != 0 { + r = 32 - r + } + o1 = c.opbfm(p, ABFMW, r, s-1, rf, rt) + + case ABFXIL: + o1 = c.opbfm(p, ABFM, r, r+s-1, rf, rt) + + case ABFXILW: + o1 = c.opbfm(p, ABFMW, r, r+s-1, rf, rt) + + case ASBFIZ: + if r != 0 { + r = 64 - r + } + o1 = c.opbfm(p, ASBFM, r, s-1, rf, rt) + + case ASBFIZW: + if r != 0 { + r = 32 - r + } + o1 = c.opbfm(p, ASBFMW, r, s-1, rf, rt) + + case ASBFX: + o1 = c.opbfm(p, ASBFM, r, r+s-1, rf, rt) + + case ASBFXW: + o1 = c.opbfm(p, ASBFMW, r, r+s-1, rf, rt) + + case AUBFIZ: + if r != 0 { + r = 64 - r + } + o1 = c.opbfm(p, AUBFM, r, s-1, rf, rt) + + case AUBFIZW: + if r != 0 { + r = 32 - r + } + o1 = c.opbfm(p, AUBFMW, r, s-1, rf, rt) + + case AUBFX: + o1 = c.opbfm(p, AUBFM, r, r+s-1, rf, rt) + + case AUBFXW: + o1 = c.opbfm(p, AUBFMW, r, r+s-1, rf, rt) + + default: + c.ctxt.Diag("bad bfm alias\n%v", p) + break + } + + case 44: /* extr $b, Rn, Rm, Rd */ + o1 = c.opextr(p, p.As, int32(p.From.Offset), int(p.GetFrom3().Reg), int(p.Reg), int(p.To.Reg)) + + case 45: /* sxt/uxt[bhw] R,R; movT R,R -> sxtT R,R */ + rf := int(p.From.Reg) + + rt := int(p.To.Reg) + as := p.As + if rf == REGZERO { + as = AMOVWU /* clearer in disassembly */ + } + switch as { + case AMOVB, ASXTB: + o1 = c.opbfm(p, ASBFM, 0, 7, rf, rt) + + case AMOVH, ASXTH: + o1 = c.opbfm(p, ASBFM, 0, 15, rf, rt) + + case AMOVW, ASXTW: + o1 = c.opbfm(p, ASBFM, 0, 31, rf, rt) + + case AMOVBU, AUXTB: + o1 = c.opbfm(p, AUBFM, 0, 7, rf, rt) + + case AMOVHU, AUXTH: + o1 = c.opbfm(p, AUBFM, 0, 15, rf, rt) + + case AMOVWU: + o1 = c.oprrr(p, as) | (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31) + + case AUXTW: + o1 = c.opbfm(p, AUBFM, 0, 31, rf, rt) + + case ASXTBW: + o1 = c.opbfm(p, ASBFMW, 0, 7, rf, rt) + + case ASXTHW: + o1 = c.opbfm(p, ASBFMW, 0, 15, rf, rt) + + case AUXTBW: + o1 = c.opbfm(p, AUBFMW, 0, 7, rf, rt) + + case AUXTHW: + o1 = c.opbfm(p, AUBFMW, 0, 15, rf, rt) + + default: + c.ctxt.Diag("bad sxt %v", as) + break + } + + case 46: /* cls */ + o1 = c.opbit(p, p.As) + + o1 |= uint32(p.From.Reg&31) << 5 + o1 |= uint32(p.To.Reg & 31) + + case 47: // SWPx/LDADDx/LDCLRx/LDEORx/LDORx/CASx Rs, (Rb), Rt + rs := p.From.Reg + rt := p.RegTo2 + rb := p.To.Reg + + // rt can't be sp. + if rt == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + if enc, ok := atomicLDADD[p.As]; ok { + // for LDADDx-like instructions, rt can't be r31 when field.enc A is 0, A bit is the 23rd bit. + if (rt == REGZERO) && (enc&(1<<23) == 0) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc + } else if enc, ok := atomicSWP[p.As]; ok { + o1 |= enc + } else { + c.ctxt.Diag("invalid atomic instructions: %v\n", p) + } + o1 |= uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) + + case 48: /* ADD $C_ADDCON2, Rm, Rd */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + op := c.opirr(p, p.As) + if op&Sbit != 0 { + c.ctxt.Diag("can not break addition/subtraction when S bit is set", p) + } + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 = c.oaddi(p, int32(op), int32(c.regoff(&p.From))&0x000fff, r, rt) + o2 = c.oaddi(p, int32(op), int32(c.regoff(&p.From))&0xfff000, rt, rt) + + case 50: /* sys/sysl */ + o1 = c.opirr(p, p.As) + + if (p.From.Offset &^ int64(SYSARG4(0x7, 0xF, 0xF, 0x7))) != 0 { + c.ctxt.Diag("illegal SYS argument\n%v", p) + } + o1 |= uint32(p.From.Offset) + if p.To.Type == obj.TYPE_REG { + o1 |= uint32(p.To.Reg & 31) + } else { + o1 |= 0x1F + } + + case 51: /* dmb */ + o1 = c.opirr(p, p.As) + + if p.From.Type == obj.TYPE_CONST { + o1 |= uint32((p.From.Offset & 0xF) << 8) + } + + case 52: /* hint */ + o1 = c.opirr(p, p.As) + + o1 |= uint32((p.From.Offset & 0x7F) << 5) + + case 53: /* and/or/eor/bic/tst/... $bitcon, Rn, Rd */ + a := p.As + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + if r == REG_RSP { + c.ctxt.Diag("illegal source register: %v", p) + break + } + mode := 64 + v := uint64(p.From.Offset) + switch p.As { + case AANDW, AORRW, AEORW, AANDSW, ATSTW: + mode = 32 + case ABIC, AORN, AEON, ABICS: + v = ^v + case ABICW, AORNW, AEONW, ABICSW: + v = ^v + mode = 32 + } + o1 = c.opirr(p, a) + o1 |= bitconEncode(v, mode) | uint32(r&31)<<5 | uint32(rt&31) + + case 54: /* floating point arith */ + o1 = c.oprrr(p, p.As) + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if (o1&(0x1F<<24)) == (0x1E<<24) && (o1&(1<<11)) == 0 { /* monadic */ + r = rf + rf = 0 + } else if r == 0 { + r = rt + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 55: /* floating-point constant */ + var rf int + o1 = 0xf<<25 | 1<<21 | 1<<12 + rf = c.chipfloat7(p.From.Val.(float64)) + if rf < 0 { + c.ctxt.Diag("invalid floating-point immediate\n%v", p) + } + if p.As == AFMOVD { + o1 |= 1 << 22 + } + o1 |= (uint32(rf&0xff) << 13) | uint32(p.To.Reg&31) + + case 56: /* floating point compare */ + o1 = c.oprrr(p, p.As) + + var rf int + if p.From.Type == obj.TYPE_FCONST { + o1 |= 8 /* zero */ + rf = 0 + } else { + rf = int(p.From.Reg) + } + rt := int(p.Reg) + o1 |= uint32(rf&31)<<16 | uint32(rt&31)<<5 + + case 57: /* floating point conditional compare */ + o1 = c.oprrr(p, p.As) + + cond := SpecialOperand(p.From.Offset) + if cond < SPOP_EQ || cond > SPOP_NV { + c.ctxt.Diag("invalid condition\n%v", p) + } else { + cond -= SPOP_EQ + } + + nzcv := int(p.To.Offset) + if nzcv&^0xF != 0 { + c.ctxt.Diag("implausible condition\n%v", p) + } + rf := int(p.Reg) + if p.GetFrom3() == nil || p.GetFrom3().Reg < REG_F0 || p.GetFrom3().Reg > REG_F31 { + c.ctxt.Diag("illegal FCCMP\n%v", p) + break + } + rt := int(p.GetFrom3().Reg) + o1 |= uint32(rf&31)<<16 | uint32(cond&15)<<12 | uint32(rt&31)<<5 | uint32(nzcv) + + case 58: /* ldar/ldarb/ldarh/ldaxp/ldxp/ldaxr/ldxr */ + o1 = c.opload(p, p.As) + + o1 |= 0x1F << 16 + o1 |= uint32(p.From.Reg&31) << 5 + if p.As == ALDXP || p.As == ALDXPW || p.As == ALDAXP || p.As == ALDAXPW { + if int(p.To.Reg) == int(p.To.Offset) { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + o1 |= uint32(p.To.Offset&31) << 10 + } else { + o1 |= 0x1F << 10 + } + o1 |= uint32(p.To.Reg & 31) + + case 59: /* stxr/stlxr/stxp/stlxp */ + s := p.RegTo2 + n := p.To.Reg + t := p.From.Reg + if isSTLXRop(p.As) { + if s == t || (s == n && n != REGSP) { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + } else if isSTXPop(p.As) { + t2 := int16(p.From.Offset) + if (s == t || s == t2) || (s == n && n != REGSP) { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + } + if s == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 = c.opstore(p, p.As) + + if p.RegTo2 != obj.REG_NONE { + o1 |= uint32(p.RegTo2&31) << 16 + } else { + o1 |= 0x1F << 16 + } + if isSTXPop(p.As) { + o1 |= uint32(p.From.Offset&31) << 10 + } + o1 |= uint32(p.To.Reg&31)<<5 | uint32(p.From.Reg&31) + + case 60: /* adrp label,r */ + d := c.brdist(p, 12, 21, 0) + + o1 = ADR(1, uint32(d), uint32(p.To.Reg)) + + case 61: /* adr label, r */ + d := c.brdist(p, 0, 21, 0) + + o1 = ADR(0, uint32(d), uint32(p.To.Reg)) + + case 62: /* op $movcon, [R], R -> mov $movcon, REGTMP + op REGTMP, [R], R */ + if p.Reg == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v\n", p) + } + if p.To.Reg == REG_RSP && isADDSop(p.As) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + lsl0 := LSL0_64 + if isADDWop(p.As) || isANDWop(p.As) { + o1 = c.omovconst(AMOVW, p, &p.From, REGTMP) + lsl0 = LSL0_32 + } else { + o1 = c.omovconst(AMOVD, p, &p.From, REGTMP) + } + + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + if p.To.Reg == REGSP || r == REGSP { + o2 = c.opxrrr(p, p.As, false) + o2 |= REGTMP & 31 << 16 + o2 |= uint32(lsl0) + } else { + o2 = c.oprrr(p, p.As) + o2 |= REGTMP & 31 << 16 /* shift is 0 */ + } + o2 |= uint32(r&31) << 5 + o2 |= uint32(rt & 31) + + /* reloc ops */ + case 64: /* movT R,addr -> adrp + movT R, (REGTMP) */ + if p.From.Reg == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v\n", p) + } + o1 = ADR(1, 0, REGTMP) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + // For unaligned access, fall back to adrp + add + movT R, (REGTMP). + if o.size(c.ctxt, p) != 8 { + o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31 + o3 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg)) + rel.Type = objabi.R_ADDRARM64 + break + } + o2 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg)) + rel.Type = c.addrRelocType(p) + + case 65: /* movT addr,R -> adrp + movT (REGTMP), R */ + o1 = ADR(1, 0, REGTMP) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + // For unaligned access, fall back to adrp + add + movT (REGTMP), R. + if o.size(c.ctxt, p) != 8 { + o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31 + o3 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg)) + rel.Type = objabi.R_ADDRARM64 + break + } + o2 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg)) + rel.Type = c.addrRelocType(p) + + case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */ + v := int32(c.regoff(&p.From)) + r := int(p.From.Reg) + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid ldp source: %v\n", p) + } + o1 |= c.opldpstp(p, o, v, uint32(r), uint32(p.To.Reg), uint32(p.To.Offset), 1) + + case 67: /* stp (r1, r2), O(R)!; stp (r1, r2), (R)O! */ + r := int(p.To.Reg) + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid stp destination: %v\n", p) + } + v := int32(c.regoff(&p.To)) + o1 = c.opldpstp(p, o, v, uint32(r), uint32(p.From.Reg), uint32(p.From.Offset), 0) + + case 68: /* movT $vconaddr(SB), reg -> adrp + add + reloc */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + if p.As == AMOVW { + c.ctxt.Diag("invalid load of 32-bit address: %v", p) + } + o1 = ADR(1, 0, uint32(p.To.Reg)) + o2 = c.opirr(p, AADD) | uint32(p.To.Reg&31)<<5 | uint32(p.To.Reg&31) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRARM64 + + case 69: /* LE model movd $tlsvar, reg -> movz reg, 0 + reloc */ + o1 = c.opirr(p, AMOVZ) + o1 |= uint32(p.To.Reg & 31) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Type = objabi.R_ARM64_TLS_LE + if p.From.Offset != 0 { + c.ctxt.Diag("invalid offset on MOVW $tlsvar") + } + + case 70: /* IE model movd $tlsvar, reg -> adrp REGTMP, 0; ldr reg, [REGTMP, #0] + relocs */ + o1 = ADR(1, 0, REGTMP) + o2 = c.olsr12u(p, int32(c.opldr(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = 0 + rel.Type = objabi.R_ARM64_TLS_IE + if p.From.Offset != 0 { + c.ctxt.Diag("invalid offset on MOVW $tlsvar") + } + + case 71: /* movd sym@GOT, reg -> adrp REGTMP, #0; ldr reg, [REGTMP, #0] + relocs */ + o1 = ADR(1, 0, REGTMP) + o2 = c.olsr12u(p, int32(c.opldr(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = 0 + rel.Type = objabi.R_ARM64_GOTPCREL + + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2/vrax1 Vm.<T>, Vn.<T>, Vd.<T> */ + af := int((p.From.Reg >> 5) & 15) + af3 := int((p.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + if af != af3 || af != at { + c.ctxt.Diag("operand mismatch: %v", p) + break + } + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + + Q := 0 + size := 0 + switch af { + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_2D: + Q = 1 + size = 3 + case ARNG_2S: + Q = 0 + size = 2 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_4S: + Q = 1 + size = 2 + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_8H: + Q = 1 + size = 1 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + + switch p.As { + case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF: + if af != ARNG_16B && af != ARNG_8B { + c.ctxt.Diag("invalid arrangement: %v", p) + } + case AVFMLA, AVFMLS: + if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S { + c.ctxt.Diag("invalid arrangement: %v", p) + } + case AVUMAX, AVUMIN: + if af == ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + } + switch p.As { + case AVAND, AVEOR: + size = 0 + case AVBSL: + size = 1 + case AVORR, AVBIT, AVBIF: + size = 2 + case AVFMLA, AVFMLS: + if af == ARNG_2D { + size = 1 + } else { + size = 0 + } + case AVRAX1: + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + size = 0 + Q = 0 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 73: /* vmov V.<T>[index], R */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + o1 = 7<<25 | 0xf<<10 + index := int(p.From.Index) + switch (p.From.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index, 15) + imm5 |= 1 + imm5 |= index << 1 + case ARNG_H: + c.checkindex(p, index, 7) + imm5 |= 2 + imm5 |= index << 2 + case ARNG_S: + c.checkindex(p, index, 3) + imm5 |= 4 + imm5 |= index << 3 + case ARNG_D: + c.checkindex(p, index, 1) + imm5 |= 8 + imm5 |= index << 4 + o1 |= 1 << 30 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 74: + // add $O, R, Rtmp or sub $O, R, Rtmp + // ldp (Rtmp), (R1, R2) + r := int(p.From.Reg) + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid ldp source: %v", p) + } + v := int32(c.regoff(&p.From)) + + if v > 0 { + if v > 4095 { + c.ctxt.Diag("offset out of range: %v", p) + } + o1 = c.oaddi(p, int32(c.opirr(p, AADD)), v, r, REGTMP) + } + if v < 0 { + if v < -4095 { + c.ctxt.Diag("offset out of range: %v", p) + } + o1 = c.oaddi(p, int32(c.opirr(p, ASUB)), -v, r, REGTMP) + } + o2 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.To.Reg), uint32(p.To.Offset), 1) + + case 75: + // mov $L, Rtmp (from constant pool) + // add Rtmp, R, Rtmp + // ldp (Rtmp), (R1, R2) + r := int(p.From.Reg) + if r == REGTMP { + c.ctxt.Diag("REGTMP used in large offset load: %v", p) + } + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid ldp source: %v", p) + } + o1 = c.omovlit(AMOVD, p, &p.From, REGTMP) + o2 = c.opxrrr(p, AADD, false) + o2 |= (REGTMP & 31) << 16 + o2 |= uint32(r&31) << 5 + o2 |= uint32(REGTMP & 31) + o3 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.To.Reg), uint32(p.To.Offset), 1) + + case 76: + // add $O, R, Rtmp or sub $O, R, Rtmp + // stp (R1, R2), (Rtmp) + if p.From.Reg == REGTMP || p.From.Offset == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v", p) + } + r := int(p.To.Reg) + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid stp destination: %v", p) + } + v := int32(c.regoff(&p.To)) + if v > 0 { + if v > 4095 { + c.ctxt.Diag("offset out of range: %v", p) + } + o1 = c.oaddi(p, int32(c.opirr(p, AADD)), v, r, REGTMP) + } + if v < 0 { + if v < -4095 { + c.ctxt.Diag("offset out of range: %v", p) + } + o1 = c.oaddi(p, int32(c.opirr(p, ASUB)), -v, r, REGTMP) + } + o2 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.From.Reg), uint32(p.From.Offset), 0) + + case 77: + // mov $L, Rtmp (from constant pool) + // add Rtmp, R, Rtmp + // stp (R1, R2), (Rtmp) + r := int(p.To.Reg) + if r == REGTMP || p.From.Reg == REGTMP || p.From.Offset == REGTMP { + c.ctxt.Diag("REGTMP used in large offset store: %v", p) + } + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid stp destination: %v", p) + } + o1 = c.omovlit(AMOVD, p, &p.To, REGTMP) + o2 = c.opxrrr(p, AADD, false) + o2 |= REGTMP & 31 << 16 + o2 |= uint32(r&31) << 5 + o2 |= uint32(REGTMP & 31) + o3 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.From.Reg), uint32(p.From.Offset), 0) + + case 78: /* vmov R, V.<T>[index] */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + o1 = 1<<30 | 7<<25 | 7<<10 + index := int(p.To.Index) + switch (p.To.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index, 15) + imm5 |= 1 + imm5 |= index << 1 + case ARNG_H: + c.checkindex(p, index, 7) + imm5 |= 2 + imm5 |= index << 2 + case ARNG_S: + c.checkindex(p, index, 3) + imm5 |= 4 + imm5 |= index << 3 + case ARNG_D: + c.checkindex(p, index, 1) + imm5 |= 8 + imm5 |= index << 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 79: /* vdup Vn.<T>[index], Vd.<T> */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + o1 = 7<<25 | 1<<10 + var imm5, Q int + index := int(p.From.Index) + switch (p.To.Reg >> 5) & 15 { + case ARNG_16B: + c.checkindex(p, index, 15) + Q = 1 + imm5 = 1 + imm5 |= index << 1 + case ARNG_2D: + c.checkindex(p, index, 1) + Q = 1 + imm5 = 8 + imm5 |= index << 4 + case ARNG_2S: + c.checkindex(p, index, 3) + Q = 0 + imm5 = 4 + imm5 |= index << 3 + case ARNG_4H: + c.checkindex(p, index, 7) + Q = 0 + imm5 = 2 + imm5 |= index << 2 + case ARNG_4S: + c.checkindex(p, index, 3) + Q = 1 + imm5 = 4 + imm5 |= index << 3 + case ARNG_8B: + c.checkindex(p, index, 15) + Q = 0 + imm5 = 1 + imm5 |= index << 1 + case ARNG_8H: + c.checkindex(p, index, 7) + Q = 1 + imm5 = 2 + imm5 |= index << 2 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(Q&1) << 30) | (uint32(imm5&0x1f) << 16) + o1 |= (uint32(rf&31) << 5) | uint32(rt&31) + + case 80: /* vmov/vdup V.<T>[index], Vn */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + index := int(p.From.Index) + switch p.As { + case AVMOV, AVDUP: + o1 = 1<<30 | 15<<25 | 1<<10 + switch (p.From.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index, 15) + imm5 |= 1 + imm5 |= index << 1 + case ARNG_H: + c.checkindex(p, index, 7) + imm5 |= 2 + imm5 |= index << 2 + case ARNG_S: + c.checkindex(p, index, 3) + imm5 |= 4 + imm5 |= index << 3 + case ARNG_D: + c.checkindex(p, index, 1) + imm5 |= 8 + imm5 |= index << 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + default: + c.ctxt.Diag("unsupported op %v", p.As) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 81: /* vld[1-4]|vld[1-4]r (Rn), [Vt1.<T>, Vt2.<T>, ...] */ + c.checkoffset(p, p.As) + r := int(p.From.Reg) + o1 = c.oprrr(p, p.As) + if o.scond == C_XPOST { + o1 |= 1 << 23 + if p.From.Index == 0 { + // immediate offset variant + o1 |= 0x1f << 16 + } else { + // register offset variant + if isRegShiftOrExt(&p.From) { + c.ctxt.Diag("invalid extended register op: %v\n", p) + } + o1 |= uint32(p.From.Index&0x1f) << 16 + } + } + o1 |= uint32(p.To.Offset) + // cmd/asm/internal/arch/arm64.go:ARM64RegisterListOffset + // add opcode(bit 12-15) for vld1, mask it off if it's not vld1 + o1 = c.maskOpvldvst(p, o1) + o1 |= uint32(r&31) << 5 + + case 82: /* vmov/vdup Rn, Vd.<T> */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + o1 = 7<<25 | 3<<10 + var imm5, Q uint32 + switch (p.To.Reg >> 5) & 15 { + case ARNG_16B: + Q = 1 + imm5 = 1 + case ARNG_2D: + Q = 1 + imm5 = 8 + case ARNG_2S: + Q = 0 + imm5 = 4 + case ARNG_4H: + Q = 0 + imm5 = 2 + case ARNG_4S: + Q = 1 + imm5 = 4 + case ARNG_8B: + Q = 0 + imm5 = 1 + case ARNG_8H: + Q = 1 + imm5 = 2 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 |= (Q & 1 << 30) | (imm5 & 0x1f << 16) + o1 |= (uint32(rf&31) << 5) | uint32(rt&31) + + case 83: /* vmov Vn.<T>, Vd.<T> */ + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + if af != at { + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + + var Q, size uint32 + switch af { + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_8H: + Q = 1 + size = 1 + case ARNG_2S: + Q = 0 + size = 2 + case ARNG_4S: + Q = 1 + size = 2 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + + if (p.As == AVMOV || p.As == AVRBIT || p.As == AVCNT) && (af != ARNG_16B && af != ARNG_8B) { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if p.As == AVREV32 && (af == ARNG_2S || af == ARNG_4S) { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if p.As == AVREV16 && af != ARNG_8B && af != ARNG_16B { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if p.As == AVMOV { + o1 |= uint32(rf&31) << 16 + } + + if p.As == AVRBIT { + size = 1 + } + + o1 |= (Q&1)<<30 | (size&3)<<22 | uint32(rf&31)<<5 | uint32(rt&31) + + case 84: /* vst[1-4] [Vt1.<T>, Vt2.<T>, ...], (Rn) */ + c.checkoffset(p, p.As) + r := int(p.To.Reg) + o1 = 3 << 26 + if o.scond == C_XPOST { + o1 |= 1 << 23 + if p.To.Index == 0 { + // immediate offset variant + o1 |= 0x1f << 16 + } else { + // register offset variant + if isRegShiftOrExt(&p.To) { + c.ctxt.Diag("invalid extended register: %v\n", p) + } + o1 |= uint32(p.To.Index&31) << 16 + } + } + o1 |= uint32(p.From.Offset) + // cmd/asm/internal/arch/arm64.go:ARM64RegisterListOffset + // add opcode(bit 12-15) for vst1, mask it off if it's not vst1 + o1 = c.maskOpvldvst(p, o1) + o1 |= uint32(r&31) << 5 + + case 85: /* vaddv/vuaddlv Vn.<T>, Vd*/ + af := int((p.From.Reg >> 5) & 15) + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + Q := 0 + size := 0 + switch af { + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_8H: + Q = 1 + size = 1 + case ARNG_4S: + Q = 1 + size = 2 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 86: /* vmovi $imm8, Vd.<T>*/ + at := int((p.To.Reg >> 5) & 15) + r := int(p.From.Offset) + if r > 255 || r < 0 { + c.ctxt.Diag("immediate constant out of range: %v\n", p) + } + rt := int((p.To.Reg) & 31) + Q := 0 + switch at { + case ARNG_8B: + Q = 0 + case ARNG_16B: + Q = 1 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 = 0xf<<24 | 0xe<<12 | 1<<10 + o1 |= (uint32(Q&1) << 30) | (uint32((r>>5)&7) << 16) | (uint32(r&0x1f) << 5) | uint32(rt&31) + + case 87: /* stp (r,r), addr(SB) -> adrp + add + stp */ + if p.From.Reg == REGTMP || p.From.Offset == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v", p) + } + o1 = ADR(1, 0, REGTMP) + o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31 + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDRARM64 + o3 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.From.Reg), uint32(p.From.Offset), 0) + + case 88: /* ldp addr(SB), (r,r) -> adrp + add + ldp */ + o1 = ADR(1, 0, REGTMP) + o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31 + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRARM64 + o3 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.To.Reg), uint32(p.To.Offset), 1) + + case 89: /* vadd/vsub Vm, Vn, Vd */ + switch p.As { + case AVADD: + o1 = 5<<28 | 7<<25 | 7<<21 | 1<<15 | 1<<10 + + case AVSUB: + o1 = 7<<28 | 7<<25 | 7<<21 | 1<<15 | 1<<10 + + default: + c.ctxt.Diag("bad opcode: %v\n", p) + break + } + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + // This is supposed to be something that stops execution. + // It's not supposed to be reached, ever, but if it is, we'd + // like to be able to tell how we got there. Assemble as + // 0xbea71700 which is guaranteed to raise undefined instruction + // exception. + case 90: + o1 = 0xbea71700 + + case 91: /* prfm imm(Rn), <prfop | $imm5> */ + imm := uint32(p.From.Offset) + r := p.From.Reg + var v uint32 + var ok bool + if p.To.Type == obj.TYPE_CONST { + v = uint32(p.To.Offset) + ok = v <= 31 + } else { + v, ok = prfopfield[SpecialOperand(p.To.Offset)] + } + if !ok { + c.ctxt.Diag("illegal prefetch operation:\n%v", p) + } + + o1 = c.opirr(p, p.As) + o1 |= (uint32(r&31) << 5) | (uint32((imm>>3)&0xfff) << 10) | (uint32(v & 31)) + + case 92: /* vmov Vn.<T>[index], Vd.<T>[index] */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm4 := 0 + imm5 := 0 + o1 = 3<<29 | 7<<25 | 1<<10 + index1 := int(p.To.Index) + index2 := int(p.From.Index) + if ((p.To.Reg >> 5) & 15) != ((p.From.Reg >> 5) & 15) { + c.ctxt.Diag("operand mismatch: %v", p) + } + switch (p.To.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index1, 15) + c.checkindex(p, index2, 15) + imm5 |= 1 + imm5 |= index1 << 1 + imm4 |= index2 + case ARNG_H: + c.checkindex(p, index1, 7) + c.checkindex(p, index2, 7) + imm5 |= 2 + imm5 |= index1 << 2 + imm4 |= index2 << 1 + case ARNG_S: + c.checkindex(p, index1, 3) + c.checkindex(p, index2, 3) + imm5 |= 4 + imm5 |= index1 << 3 + imm4 |= index2 << 2 + case ARNG_D: + c.checkindex(p, index1, 1) + c.checkindex(p, index2, 1) + imm5 |= 8 + imm5 |= index1 << 4 + imm4 |= index2 << 3 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(imm4&0xf) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 93: /* vpmull{2} Vm.<Tb>, Vn.<Tb>, Vd.<Ta> */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if af != a { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + var Q, size uint32 + if p.As == AVPMULL2 { + Q = 1 + } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): + size = 0 + case pack(0, ARNG_1Q, ARNG_1D), pack(1, ARNG_1Q, ARNG_2D): + size = 3 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 94: /* vext $imm4, Vm.<T>, Vn.<T>, Vd.<T> */ + af := int(((p.GetFrom3().Reg) >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + a := int((p.Reg >> 5) & 15) + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + var Q uint32 + var b int + if af == ARNG_8B { + Q = 0 + b = 7 + } else if af == ARNG_16B { + Q = 1 + b = 15 + } else { + c.ctxt.Diag("invalid arrangement, should be B8 or B16: %v", p) + break + } + + if index < 0 || index > b { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := int((p.GetFrom3().Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + + o1 |= ((Q & 1) << 30) | (uint32(r&31) << 16) | (uint32(index&15) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 95: /* vushr/vshl/vsri/vsli/vusra $shift, Vn.<T>, Vd.<T> */ + at := int((p.To.Reg >> 5) & 15) + af := int((p.Reg >> 5) & 15) + shift := int(p.From.Offset) + + if af != at { + c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p) + } + + var Q uint32 + var imax, esize int + + switch af { + case ARNG_8B, ARNG_4H, ARNG_2S: + Q = 0 + case ARNG_16B, ARNG_8H, ARNG_4S, ARNG_2D: + Q = 1 + default: + c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p) + } + + switch af { + case ARNG_8B, ARNG_16B: + imax = 15 + esize = 8 + case ARNG_4H, ARNG_8H: + imax = 31 + esize = 16 + case ARNG_2S, ARNG_4S: + imax = 63 + esize = 32 + case ARNG_2D: + imax = 127 + esize = 64 + } + + imm := 0 + switch p.As { + case AVUSHR, AVSRI, AVUSRA: + imm = esize*2 - shift + if imm < esize || imm > imax { + c.ctxt.Diag("shift out of range: %v", p) + } + case AVSHL, AVSLI: + imm = esize + shift + if imm > imax { + c.ctxt.Diag("shift out of range: %v", p) + } + default: + c.ctxt.Diag("invalid instruction %v\n", p) + } + + o1 = c.opirr(p, p.As) + rt := int((p.To.Reg) & 31) + rf := int((p.Reg) & 31) + + o1 |= ((Q & 1) << 30) | (uint32(imm&0x7f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 96: /* vst1 Vt1.<T>[index], offset(Rn) */ + af := int((p.From.Reg >> 5) & 15) + rt := int((p.From.Reg) & 31) + rf := int((p.To.Reg) & 31) + r := int(p.To.Index & 31) + index := int(p.From.Index) + offset := int32(c.regoff(&p.To)) + + if o.scond == C_XPOST { + if (p.To.Index != 0) && (offset != 0) { + c.ctxt.Diag("invalid offset: %v", p) + } + if p.To.Index == 0 && offset == 0 { + c.ctxt.Diag("invalid offset: %v", p) + } + } + + if offset != 0 { + r = 31 + } + + var Q, S, size int + var opcode uint32 + switch af { + case ARNG_B: + c.checkindex(p, index, 15) + if o.scond == C_XPOST && offset != 0 && offset != 1 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 3 + S = (index >> 2) & 1 + size = index & 3 + opcode = 0 + case ARNG_H: + c.checkindex(p, index, 7) + if o.scond == C_XPOST && offset != 0 && offset != 2 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 2 + S = (index >> 1) & 1 + size = (index & 1) << 1 + opcode = 2 + case ARNG_S: + c.checkindex(p, index, 3) + if o.scond == C_XPOST && offset != 0 && offset != 4 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 1 + S = index & 1 + size = 0 + opcode = 4 + case ARNG_D: + c.checkindex(p, index, 1) + if o.scond == C_XPOST && offset != 0 && offset != 8 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index + S = 0 + size = 1 + opcode = 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if o.scond == C_XPOST { + o1 |= 27 << 23 + } else { + o1 |= 26 << 23 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(r&31) << 16) | ((opcode & 7) << 13) | (uint32(S&1) << 12) | (uint32(size&3) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 97: /* vld1 offset(Rn), vt.<T>[index] */ + at := int((p.To.Reg >> 5) & 15) + rt := int((p.To.Reg) & 31) + rf := int((p.From.Reg) & 31) + r := int(p.From.Index & 31) + index := int(p.To.Index) + offset := int32(c.regoff(&p.From)) + + if o.scond == C_XPOST { + if (p.From.Index != 0) && (offset != 0) { + c.ctxt.Diag("invalid offset: %v", p) + } + if p.From.Index == 0 && offset == 0 { + c.ctxt.Diag("invalid offset: %v", p) + } + } + + if offset != 0 { + r = 31 + } + + Q := 0 + S := 0 + size := 0 + var opcode uint32 + switch at { + case ARNG_B: + c.checkindex(p, index, 15) + if o.scond == C_XPOST && offset != 0 && offset != 1 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 3 + S = (index >> 2) & 1 + size = index & 3 + opcode = 0 + case ARNG_H: + c.checkindex(p, index, 7) + if o.scond == C_XPOST && offset != 0 && offset != 2 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 2 + S = (index >> 1) & 1 + size = (index & 1) << 1 + opcode = 2 + case ARNG_S: + c.checkindex(p, index, 3) + if o.scond == C_XPOST && offset != 0 && offset != 4 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 1 + S = index & 1 + size = 0 + opcode = 4 + case ARNG_D: + c.checkindex(p, index, 1) + if o.scond == C_XPOST && offset != 0 && offset != 8 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index + S = 0 + size = 1 + opcode = 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if o.scond == C_XPOST { + o1 |= 110 << 21 + } else { + o1 |= 106 << 21 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(r&31) << 16) | ((opcode & 7) << 13) | (uint32(S&1) << 12) | (uint32(size&3) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 98: /* MOVD (Rn)(Rm.SXTW[<<amount]),Rd */ + if isRegShiftOrExt(&p.From) { + // extended or shifted offset register. + c.checkShiftAmount(p, &p.From) + + o1 = c.opldrr(p, p.As, true) + o1 |= c.encRegShiftOrExt(p, &p.From, p.From.Index) /* includes reg, op, etc */ + } else { + // (Rn)(Rm), no extension or shift. + o1 = c.opldrr(p, p.As, false) + o1 |= uint32(p.From.Index&31) << 16 + } + o1 |= uint32(p.From.Reg&31) << 5 + rt := int(p.To.Reg) + o1 |= uint32(rt & 31) + + case 99: /* MOVD Rt, (Rn)(Rm.SXTW[<<amount]) */ + if isRegShiftOrExt(&p.To) { + // extended or shifted offset register. + c.checkShiftAmount(p, &p.To) + + o1 = c.opstrr(p, p.As, true) + o1 |= c.encRegShiftOrExt(p, &p.To, p.To.Index) /* includes reg, op, etc */ + } else { + // (Rn)(Rm), no extension or shift. + o1 = c.opstrr(p, p.As, false) + o1 |= uint32(p.To.Index&31) << 16 + } + o1 |= uint32(p.To.Reg&31) << 5 + rf := int(p.From.Reg) + o1 |= uint32(rf & 31) + + case 100: /* VTBL/VTBX Vn.<T>, [Vt1.<T>, Vt2.<T>, ...], Vd.<T> */ + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + if af != at { + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + var q, len uint32 + switch af { + case ARNG_8B: + q = 0 + case ARNG_16B: + q = 1 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + rf := int(p.From.Reg) + rt := int(p.To.Reg) + offset := int(p.GetFrom3().Offset) + opcode := (offset >> 12) & 15 + switch opcode { + case 0x7: + len = 0 // one register + case 0xa: + len = 1 // two register + case 0x6: + len = 2 // three registers + case 0x2: + len = 3 // four registers + default: + c.ctxt.Diag("invalid register numbers in ARM64 register list: %v", p) + } + var op uint32 + switch p.As { + case AVTBL: + op = 0 + case AVTBX: + op = 1 + } + o1 = q<<30 | 0xe<<24 | len<<13 | op<<12 + o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) + + case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool. + o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) + + case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ + o1 = c.opirr(p, p.As) + rf := p.Reg + af := uint8((p.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + shift := int(p.From.Offset) + if p.As == AVUXTL || p.As == AVUXTL2 { + rf = p.From.Reg + af = uint8((p.From.Reg >> 5) & 15) + shift = 0 + } + + Q := (o1 >> 30) & 1 + var immh, width uint8 + switch pack(Q, af, at) { + case pack(0, ARNG_8B, ARNG_8H): + immh, width = 1, 8 + case pack(1, ARNG_16B, ARNG_8H): + immh, width = 1, 8 + case pack(0, ARNG_4H, ARNG_4S): + immh, width = 2, 16 + case pack(1, ARNG_8H, ARNG_4S): + immh, width = 2, 16 + case pack(0, ARNG_2S, ARNG_2D): + immh, width = 4, 32 + case pack(1, ARNG_4S, ARNG_2D): + immh, width = 4, 32 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + if !(0 <= shift && shift <= int(width-1)) { + c.ctxt.Diag("shift amount out of range: %v\n", p) + } + o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ + ta := (p.From.Reg >> 5) & 15 + tm := (p.Reg >> 5) & 15 + td := (p.To.Reg >> 5) & 15 + tn := ((p.GetFrom3().Reg) >> 5) & 15 + + if ta != tm || ta != tn || ta != td || ta != ARNG_16B { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + o1 = c.oprrr(p, p.As) + ra := int(p.From.Reg) + rm := int(p.Reg) + rn := int(p.GetFrom3().Reg) + rd := int(p.To.Reg) + o1 |= uint32(rm&31)<<16 | uint32(ra&31)<<10 | uint32(rn&31)<<5 | uint32(rd)&31 + + case 104: /* vxar $imm4, Vm.<T>, Vn.<T>, Vd.<T> */ + af := ((p.GetFrom3().Reg) >> 5) & 15 + at := (p.To.Reg >> 5) & 15 + a := (p.Reg >> 5) & 15 + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement, should be D2: %v", p) + break + } + + if index < 0 || index > 63 { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := (p.GetFrom3().Reg) & 31 + rt := (p.To.Reg) & 31 + r := (p.Reg) & 31 + + o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 105: /* vuaddw{2} Vm.<Tb>, Vn.<Ta>, Vd.<Ta> */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if at != a { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + var Q, size uint32 + if p.As == AVUADDW2 { + Q = 1 + } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): + size = 0 + case pack(0, ARNG_4S, ARNG_4H), pack(1, ARNG_4S, ARNG_8H): + size = 1 + case pack(0, ARNG_2D, ARNG_2S), pack(1, ARNG_2D, ARNG_4S): + size = 2 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 106: // CASPx (Rs, Rs+1), (Rb), (Rt, Rt+1) + rs := p.From.Reg + rt := p.GetTo2().Reg + rb := p.To.Reg + rs1 := int16(p.From.Offset) + rt1 := int16(p.GetTo2().Offset) + + enc, ok := atomicCASP[p.As] + if !ok { + c.ctxt.Diag("invalid CASP-like atomic instructions: %v\n", p) + } + // for CASPx-like instructions, Rs<0> != 1 && Rt<0> != 1 + switch { + case rs&1 != 0: + c.ctxt.Diag("source register pair must start from even register: %v\n", p) + break + case rt&1 != 0: + c.ctxt.Diag("destination register pair must start from even register: %v\n", p) + break + case rs != rs1-1: + c.ctxt.Diag("source register pair must be contiguous: %v\n", p) + break + case rt != rt1-1: + c.ctxt.Diag("destination register pair must be contiguous: %v\n", p) + break + } + // rt can't be sp. + if rt == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) + + case 107: /* tlbi, dc */ + op, ok := sysInstFields[SpecialOperand(p.From.Offset)] + if !ok || (p.As == ATLBI && op.cn != 8) || (p.As == ADC && op.cn != 7) { + c.ctxt.Diag("illegal argument: %v\n", p) + break + } + o1 = c.opirr(p, p.As) + if op.hasOperand2 { + if p.To.Reg == 0 { + c.ctxt.Diag("missing register at operand 2: %v\n", p) + } + o1 |= uint32(p.To.Reg & 0x1F) + } else { + if p.To.Reg != 0 || p.Reg != 0 { + c.ctxt.Diag("extraneous register at operand 2: %v\n", p) + } + o1 |= uint32(0x1F) + } + o1 |= uint32(SYSARG4(int(op.op1), int(op.cn), int(op.cm), int(op.op2))) + } + out[0] = o1 + out[1] = o2 + out[2] = o3 + out[3] = o4 + out[4] = o5 +} + +func (c *ctxt7) addrRelocType(p *obj.Prog) objabi.RelocType { + switch movesize(p.As) { + case 0: + return objabi.R_ARM64_PCREL_LDST8 + case 1: + return objabi.R_ARM64_PCREL_LDST16 + case 2: + return objabi.R_ARM64_PCREL_LDST32 + case 3: + return objabi.R_ARM64_PCREL_LDST64 + default: + c.ctxt.Diag("use R_ADDRARM64 relocation type for: %v\n", p) + } + return -1 +} + +/* + * basic Rm op Rn -> Rd (using shifted register with 0) + * also op Rn -> Rt + * also Rm*Rn op Ra -> Rd + * also Vm op Vn -> Vd + */ +func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { + switch a { + case AADC: + return S64 | 0<<30 | 0<<29 | 0xd0<<21 | 0<<10 + + case AADCW: + return S32 | 0<<30 | 0<<29 | 0xd0<<21 | 0<<10 + + case AADCS: + return S64 | 0<<30 | 1<<29 | 0xd0<<21 | 0<<10 + + case AADCSW: + return S32 | 0<<30 | 1<<29 | 0xd0<<21 | 0<<10 + + case ANGC, ASBC: + return S64 | 1<<30 | 0<<29 | 0xd0<<21 | 0<<10 + + case ANGCS, ASBCS: + return S64 | 1<<30 | 1<<29 | 0xd0<<21 | 0<<10 + + case ANGCW, ASBCW: + return S32 | 1<<30 | 0<<29 | 0xd0<<21 | 0<<10 + + case ANGCSW, ASBCSW: + return S32 | 1<<30 | 1<<29 | 0xd0<<21 | 0<<10 + + case AADD: + return S64 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case AADDW: + return S32 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ACMN, AADDS: + return S64 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ACMNW, AADDSW: + return S32 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ASUB: + return S64 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ASUBW: + return S32 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ACMP, ASUBS: + return S64 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ACMPW, ASUBSW: + return S32 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case AAND: + return S64 | 0<<29 | 0xA<<24 + + case AANDW: + return S32 | 0<<29 | 0xA<<24 + + case AMOVD, AORR: + return S64 | 1<<29 | 0xA<<24 + + // case AMOVW: + case AMOVWU, AORRW: + return S32 | 1<<29 | 0xA<<24 + + case AEOR: + return S64 | 2<<29 | 0xA<<24 + + case AEORW: + return S32 | 2<<29 | 0xA<<24 + + case AANDS, ATST: + return S64 | 3<<29 | 0xA<<24 + + case AANDSW, ATSTW: + return S32 | 3<<29 | 0xA<<24 + + case ABIC: + return S64 | 0<<29 | 0xA<<24 | 1<<21 + + case ABICW: + return S32 | 0<<29 | 0xA<<24 | 1<<21 + + case ABICS: + return S64 | 3<<29 | 0xA<<24 | 1<<21 + + case ABICSW: + return S32 | 3<<29 | 0xA<<24 | 1<<21 + + case AEON: + return S64 | 2<<29 | 0xA<<24 | 1<<21 + + case AEONW: + return S32 | 2<<29 | 0xA<<24 | 1<<21 + + case AMVN, AORN: + return S64 | 1<<29 | 0xA<<24 | 1<<21 + + case AMVNW, AORNW: + return S32 | 1<<29 | 0xA<<24 | 1<<21 + + case AASR: + return S64 | OPDP2(10) /* also ASRV */ + + case AASRW: + return S32 | OPDP2(10) + + case ALSL: + return S64 | OPDP2(8) + + case ALSLW: + return S32 | OPDP2(8) + + case ALSR: + return S64 | OPDP2(9) + + case ALSRW: + return S32 | OPDP2(9) + + case AROR: + return S64 | OPDP2(11) + + case ARORW: + return S32 | OPDP2(11) + + case ACCMN: + return S64 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 /* cond<<12 | nzcv<<0 */ + + case ACCMNW: + return S32 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 + + case ACCMP: + return S64 | 1<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 /* imm5<<16 | cond<<12 | nzcv<<0 */ + + case ACCMPW: + return S32 | 1<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 + + case ACRC32B: + return S32 | OPDP2(16) + + case ACRC32H: + return S32 | OPDP2(17) + + case ACRC32W: + return S32 | OPDP2(18) + + case ACRC32X: + return S64 | OPDP2(19) + + case ACRC32CB: + return S32 | OPDP2(20) + + case ACRC32CH: + return S32 | OPDP2(21) + + case ACRC32CW: + return S32 | OPDP2(22) + + case ACRC32CX: + return S64 | OPDP2(23) + + case ACSEL: + return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACSELW: + return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACSET: + return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACSETW: + return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACSETM: + return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACSETMW: + return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACINC, ACSINC: + return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACINCW, ACSINCW: + return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACINV, ACSINV: + return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACINVW, ACSINVW: + return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACNEG, ACSNEG: + return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACNEGW, ACSNEGW: + return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case AMUL, AMADD: + return S64 | 0<<29 | 0x1B<<24 | 0<<21 | 0<<15 + + case AMULW, AMADDW: + return S32 | 0<<29 | 0x1B<<24 | 0<<21 | 0<<15 + + case AMNEG, AMSUB: + return S64 | 0<<29 | 0x1B<<24 | 0<<21 | 1<<15 + + case AMNEGW, AMSUBW: + return S32 | 0<<29 | 0x1B<<24 | 0<<21 | 1<<15 + + case AMRS: + return SYSOP(1, 2, 0, 0, 0, 0, 0) + + case AMSR: + return SYSOP(0, 2, 0, 0, 0, 0, 0) + + case ANEG: + return S64 | 1<<30 | 0<<29 | 0xB<<24 | 0<<21 + + case ANEGW: + return S32 | 1<<30 | 0<<29 | 0xB<<24 | 0<<21 + + case ANEGS: + return S64 | 1<<30 | 1<<29 | 0xB<<24 | 0<<21 + + case ANEGSW: + return S32 | 1<<30 | 1<<29 | 0xB<<24 | 0<<21 + + case AREM, ASDIV: + return S64 | OPDP2(3) + + case AREMW, ASDIVW: + return S32 | OPDP2(3) + + case ASMULL, ASMADDL: + return OPDP3(1, 0, 1, 0) + + case ASMNEGL, ASMSUBL: + return OPDP3(1, 0, 1, 1) + + case ASMULH: + return OPDP3(1, 0, 2, 0) + + case AUMULL, AUMADDL: + return OPDP3(1, 0, 5, 0) + + case AUMNEGL, AUMSUBL: + return OPDP3(1, 0, 5, 1) + + case AUMULH: + return OPDP3(1, 0, 6, 0) + + case AUREM, AUDIV: + return S64 | OPDP2(2) + + case AUREMW, AUDIVW: + return S32 | OPDP2(2) + + case AAESE: + return 0x4E<<24 | 2<<20 | 8<<16 | 4<<12 | 2<<10 + + case AAESD: + return 0x4E<<24 | 2<<20 | 8<<16 | 5<<12 | 2<<10 + + case AAESMC: + return 0x4E<<24 | 2<<20 | 8<<16 | 6<<12 | 2<<10 + + case AAESIMC: + return 0x4E<<24 | 2<<20 | 8<<16 | 7<<12 | 2<<10 + + case ASHA1C: + return 0x5E<<24 | 0<<12 + + case ASHA1P: + return 0x5E<<24 | 1<<12 + + case ASHA1M: + return 0x5E<<24 | 2<<12 + + case ASHA1SU0: + return 0x5E<<24 | 3<<12 + + case ASHA256H: + return 0x5E<<24 | 4<<12 + + case ASHA256H2: + return 0x5E<<24 | 5<<12 + + case ASHA256SU1: + return 0x5E<<24 | 6<<12 + + case ASHA1H: + return 0x5E<<24 | 2<<20 | 8<<16 | 0<<12 | 2<<10 + + case ASHA1SU1: + return 0x5E<<24 | 2<<20 | 8<<16 | 1<<12 | 2<<10 + + case ASHA256SU0: + return 0x5E<<24 | 2<<20 | 8<<16 | 2<<12 | 2<<10 + + case ASHA512H: + return 0xCE<<24 | 3<<21 | 8<<12 + + case ASHA512H2: + return 0xCE<<24 | 3<<21 | 8<<12 | 4<<8 + + case ASHA512SU1: + return 0xCE<<24 | 3<<21 | 8<<12 | 8<<8 + + case ASHA512SU0: + return 0xCE<<24 | 3<<22 | 8<<12 + + case AFCVTZSD: + return FPCVTI(1, 0, 1, 3, 0) + + case AFCVTZSDW: + return FPCVTI(0, 0, 1, 3, 0) + + case AFCVTZSS: + return FPCVTI(1, 0, 0, 3, 0) + + case AFCVTZSSW: + return FPCVTI(0, 0, 0, 3, 0) + + case AFCVTZUD: + return FPCVTI(1, 0, 1, 3, 1) + + case AFCVTZUDW: + return FPCVTI(0, 0, 1, 3, 1) + + case AFCVTZUS: + return FPCVTI(1, 0, 0, 3, 1) + + case AFCVTZUSW: + return FPCVTI(0, 0, 0, 3, 1) + + case ASCVTFD: + return FPCVTI(1, 0, 1, 0, 2) + + case ASCVTFS: + return FPCVTI(1, 0, 0, 0, 2) + + case ASCVTFWD: + return FPCVTI(0, 0, 1, 0, 2) + + case ASCVTFWS: + return FPCVTI(0, 0, 0, 0, 2) + + case AUCVTFD: + return FPCVTI(1, 0, 1, 0, 3) + + case AUCVTFS: + return FPCVTI(1, 0, 0, 0, 3) + + case AUCVTFWD: + return FPCVTI(0, 0, 1, 0, 3) + + case AUCVTFWS: + return FPCVTI(0, 0, 0, 0, 3) + + case AFADDS: + return FPOP2S(0, 0, 0, 2) + + case AFADDD: + return FPOP2S(0, 0, 1, 2) + + case AFSUBS: + return FPOP2S(0, 0, 0, 3) + + case AFSUBD: + return FPOP2S(0, 0, 1, 3) + + case AFMADDD: + return FPOP3S(0, 0, 1, 0, 0) + + case AFMADDS: + return FPOP3S(0, 0, 0, 0, 0) + + case AFMSUBD: + return FPOP3S(0, 0, 1, 0, 1) + + case AFMSUBS: + return FPOP3S(0, 0, 0, 0, 1) + + case AFNMADDD: + return FPOP3S(0, 0, 1, 1, 0) + + case AFNMADDS: + return FPOP3S(0, 0, 0, 1, 0) + + case AFNMSUBD: + return FPOP3S(0, 0, 1, 1, 1) + + case AFNMSUBS: + return FPOP3S(0, 0, 0, 1, 1) + + case AFMULS: + return FPOP2S(0, 0, 0, 0) + + case AFMULD: + return FPOP2S(0, 0, 1, 0) + + case AFDIVS: + return FPOP2S(0, 0, 0, 1) + + case AFDIVD: + return FPOP2S(0, 0, 1, 1) + + case AFMAXS: + return FPOP2S(0, 0, 0, 4) + + case AFMINS: + return FPOP2S(0, 0, 0, 5) + + case AFMAXD: + return FPOP2S(0, 0, 1, 4) + + case AFMIND: + return FPOP2S(0, 0, 1, 5) + + case AFMAXNMS: + return FPOP2S(0, 0, 0, 6) + + case AFMAXNMD: + return FPOP2S(0, 0, 1, 6) + + case AFMINNMS: + return FPOP2S(0, 0, 0, 7) + + case AFMINNMD: + return FPOP2S(0, 0, 1, 7) + + case AFNMULS: + return FPOP2S(0, 0, 0, 8) + + case AFNMULD: + return FPOP2S(0, 0, 1, 8) + + case AFCMPS: + return FPCMP(0, 0, 0, 0, 0) + + case AFCMPD: + return FPCMP(0, 0, 1, 0, 0) + + case AFCMPES: + return FPCMP(0, 0, 0, 0, 16) + + case AFCMPED: + return FPCMP(0, 0, 1, 0, 16) + + case AFCCMPS: + return FPCCMP(0, 0, 0, 0) + + case AFCCMPD: + return FPCCMP(0, 0, 1, 0) + + case AFCCMPES: + return FPCCMP(0, 0, 0, 1) + + case AFCCMPED: + return FPCCMP(0, 0, 1, 1) + + case AFCSELS: + return 0x1E<<24 | 0<<22 | 1<<21 | 3<<10 + + case AFCSELD: + return 0x1E<<24 | 1<<22 | 1<<21 | 3<<10 + + case AFMOVS: + return FPOP1S(0, 0, 0, 0) + + case AFABSS: + return FPOP1S(0, 0, 0, 1) + + case AFNEGS: + return FPOP1S(0, 0, 0, 2) + + case AFSQRTS: + return FPOP1S(0, 0, 0, 3) + + case AFCVTSD: + return FPOP1S(0, 0, 0, 5) + + case AFCVTSH: + return FPOP1S(0, 0, 0, 7) + + case AFRINTNS: + return FPOP1S(0, 0, 0, 8) + + case AFRINTPS: + return FPOP1S(0, 0, 0, 9) + + case AFRINTMS: + return FPOP1S(0, 0, 0, 10) + + case AFRINTZS: + return FPOP1S(0, 0, 0, 11) + + case AFRINTAS: + return FPOP1S(0, 0, 0, 12) + + case AFRINTXS: + return FPOP1S(0, 0, 0, 14) + + case AFRINTIS: + return FPOP1S(0, 0, 0, 15) + + case AFMOVD: + return FPOP1S(0, 0, 1, 0) + + case AFABSD: + return FPOP1S(0, 0, 1, 1) + + case AFNEGD: + return FPOP1S(0, 0, 1, 2) + + case AFSQRTD: + return FPOP1S(0, 0, 1, 3) + + case AFCVTDS: + return FPOP1S(0, 0, 1, 4) + + case AFCVTDH: + return FPOP1S(0, 0, 1, 7) + + case AFRINTND: + return FPOP1S(0, 0, 1, 8) + + case AFRINTPD: + return FPOP1S(0, 0, 1, 9) + + case AFRINTMD: + return FPOP1S(0, 0, 1, 10) + + case AFRINTZD: + return FPOP1S(0, 0, 1, 11) + + case AFRINTAD: + return FPOP1S(0, 0, 1, 12) + + case AFRINTXD: + return FPOP1S(0, 0, 1, 14) + + case AFRINTID: + return FPOP1S(0, 0, 1, 15) + + case AFCVTHS: + return FPOP1S(0, 0, 3, 4) + + case AFCVTHD: + return FPOP1S(0, 0, 3, 5) + + case AVADD: + return 7<<25 | 1<<21 | 1<<15 | 1<<10 + + case AVSUB: + return 0x17<<25 | 1<<21 | 1<<15 | 1<<10 + + case AVADDP: + return 7<<25 | 1<<21 | 1<<15 | 15<<10 + + case AVAND: + return 7<<25 | 1<<21 | 7<<10 + + case AVBCAX: + return 0xCE<<24 | 1<<21 + + case AVCMEQ: + return 1<<29 | 0x71<<21 | 0x23<<10 + + case AVCNT: + return 0xE<<24 | 0x10<<17 | 5<<12 | 2<<10 + + case AVZIP1: + return 0xE<<24 | 3<<12 | 2<<10 + + case AVZIP2: + return 0xE<<24 | 1<<14 | 3<<12 | 2<<10 + + case AVEOR: + return 1<<29 | 0x71<<21 | 7<<10 + + case AVEOR3: + return 0xCE << 24 + + case AVORR: + return 7<<25 | 5<<21 | 7<<10 + + case AVREV16: + return 3<<26 | 2<<24 | 1<<21 | 3<<11 + + case AVRAX1: + return 0xCE<<24 | 3<<21 | 1<<15 | 3<<10 + + case AVREV32: + return 11<<26 | 2<<24 | 1<<21 | 1<<11 + + case AVREV64: + return 3<<26 | 2<<24 | 1<<21 | 1<<11 + + case AVMOV: + return 7<<25 | 5<<21 | 7<<10 + + case AVADDV: + return 7<<25 | 3<<20 | 3<<15 | 7<<11 + + case AVUADDLV: + return 1<<29 | 7<<25 | 3<<20 | 7<<11 + + case AVFMLA: + return 7<<25 | 0<<23 | 1<<21 | 3<<14 | 3<<10 + + case AVFMLS: + return 7<<25 | 1<<23 | 1<<21 | 3<<14 | 3<<10 + + case AVPMULL, AVPMULL2: + return 0xE<<24 | 1<<21 | 0x38<<10 + + case AVRBIT: + return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10 + + case AVLD1, AVLD2, AVLD3, AVLD4: + return 3<<26 | 1<<22 + + case AVLD1R, AVLD3R: + return 0xD<<24 | 1<<22 + + case AVLD2R, AVLD4R: + return 0xD<<24 | 3<<21 + + case AVBIF: + return 1<<29 | 7<<25 | 7<<21 | 7<<10 + + case AVBIT: + return 1<<29 | 0x75<<21 | 7<<10 + + case AVBSL: + return 1<<29 | 0x73<<21 | 7<<10 + + case AVCMTST: + return 0xE<<24 | 1<<21 | 0x23<<10 + + case AVUMAX: + return 1<<29 | 7<<25 | 1<<21 | 0x19<<10 + + case AVUMIN: + return 1<<29 | 7<<25 | 1<<21 | 0x1b<<10 + + case AVUZP1: + return 7<<25 | 3<<11 + + case AVUZP2: + return 7<<25 | 1<<14 | 3<<11 + + case AVUADDW, AVUADDW2: + return 0x17<<25 | 1<<21 | 1<<12 + + case AVTRN1: + return 7<<25 | 5<<11 + + case AVTRN2: + return 7<<25 | 1<<14 | 5<<11 + } + + c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) + return 0 +} + +/* + * imm -> Rd + * imm op Rn -> Rd + */ +func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { + switch a { + /* op $addcon, Rn, Rd */ + case AMOVD, AADD: + return S64 | 0<<30 | 0<<29 | 0x11<<24 + + case ACMN, AADDS: + return S64 | 0<<30 | 1<<29 | 0x11<<24 + + case AMOVW, AADDW: + return S32 | 0<<30 | 0<<29 | 0x11<<24 + + case ACMNW, AADDSW: + return S32 | 0<<30 | 1<<29 | 0x11<<24 + + case ASUB: + return S64 | 1<<30 | 0<<29 | 0x11<<24 + + case ACMP, ASUBS: + return S64 | 1<<30 | 1<<29 | 0x11<<24 + + case ASUBW: + return S32 | 1<<30 | 0<<29 | 0x11<<24 + + case ACMPW, ASUBSW: + return S32 | 1<<30 | 1<<29 | 0x11<<24 + + /* op $imm(SB), Rd; op label, Rd */ + case AADR: + return 0<<31 | 0x10<<24 + + case AADRP: + return 1<<31 | 0x10<<24 + + /* op $bimm, Rn, Rd */ + case AAND, ABIC: + return S64 | 0<<29 | 0x24<<23 + + case AANDW, ABICW: + return S32 | 0<<29 | 0x24<<23 | 0<<22 + + case AORR, AORN: + return S64 | 1<<29 | 0x24<<23 + + case AORRW, AORNW: + return S32 | 1<<29 | 0x24<<23 | 0<<22 + + case AEOR, AEON: + return S64 | 2<<29 | 0x24<<23 + + case AEORW, AEONW: + return S32 | 2<<29 | 0x24<<23 | 0<<22 + + case AANDS, ABICS, ATST: + return S64 | 3<<29 | 0x24<<23 + + case AANDSW, ABICSW, ATSTW: + return S32 | 3<<29 | 0x24<<23 | 0<<22 + + case AASR: + return S64 | 0<<29 | 0x26<<23 /* alias of SBFM */ + + case AASRW: + return S32 | 0<<29 | 0x26<<23 | 0<<22 + + /* op $width, $lsb, Rn, Rd */ + case ABFI: + return S64 | 2<<29 | 0x26<<23 | 1<<22 + /* alias of BFM */ + + case ABFIW: + return S32 | 2<<29 | 0x26<<23 | 0<<22 + + /* op $imms, $immr, Rn, Rd */ + case ABFM: + return S64 | 1<<29 | 0x26<<23 | 1<<22 + + case ABFMW: + return S32 | 1<<29 | 0x26<<23 | 0<<22 + + case ASBFM: + return S64 | 0<<29 | 0x26<<23 | 1<<22 + + case ASBFMW: + return S32 | 0<<29 | 0x26<<23 | 0<<22 + + case AUBFM: + return S64 | 2<<29 | 0x26<<23 | 1<<22 + + case AUBFMW: + return S32 | 2<<29 | 0x26<<23 | 0<<22 + + case ABFXIL: + return S64 | 1<<29 | 0x26<<23 | 1<<22 /* alias of BFM */ + + case ABFXILW: + return S32 | 1<<29 | 0x26<<23 | 0<<22 + + case AEXTR: + return S64 | 0<<29 | 0x27<<23 | 1<<22 | 0<<21 + + case AEXTRW: + return S32 | 0<<29 | 0x27<<23 | 0<<22 | 0<<21 + + case ACBNZ: + return S64 | 0x1A<<25 | 1<<24 + + case ACBNZW: + return S32 | 0x1A<<25 | 1<<24 + + case ACBZ: + return S64 | 0x1A<<25 | 0<<24 + + case ACBZW: + return S32 | 0x1A<<25 | 0<<24 + + case ACCMN: + return S64 | 0<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4 /* imm5<<16 | cond<<12 | nzcv<<0 */ + + case ACCMNW: + return S32 | 0<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4 + + case ACCMP: + return S64 | 1<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4 /* imm5<<16 | cond<<12 | nzcv<<0 */ + + case ACCMPW: + return S32 | 1<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4 + + case AMOVK: + return S64 | 3<<29 | 0x25<<23 + + case AMOVKW: + return S32 | 3<<29 | 0x25<<23 + + case AMOVN: + return S64 | 0<<29 | 0x25<<23 + + case AMOVNW: + return S32 | 0<<29 | 0x25<<23 + + case AMOVZ: + return S64 | 2<<29 | 0x25<<23 + + case AMOVZW: + return S32 | 2<<29 | 0x25<<23 + + case AMSR: + return SYSOP(0, 0, 0, 4, 0, 0, 0x1F) /* MSR (immediate) */ + + case AAT, + ADC, + AIC, + ATLBI, + ASYS: + return SYSOP(0, 1, 0, 0, 0, 0, 0) + + case ASYSL: + return SYSOP(1, 1, 0, 0, 0, 0, 0) + + case ATBZ: + return 0x36 << 24 + + case ATBNZ: + return 0x37 << 24 + + case ADSB: + return SYSOP(0, 0, 3, 3, 0, 4, 0x1F) + + case ADMB: + return SYSOP(0, 0, 3, 3, 0, 5, 0x1F) + + case AISB: + return SYSOP(0, 0, 3, 3, 0, 6, 0x1F) + + case AHINT: + return SYSOP(0, 0, 3, 2, 0, 0, 0x1F) + + case AVEXT: + return 0x2E<<24 | 0<<23 | 0<<21 | 0<<15 + + case AVUSHR: + return 0x5E<<23 | 1<<10 + + case AVSHL: + return 0x1E<<23 | 21<<10 + + case AVSRI: + return 0x5E<<23 | 17<<10 + + case AVSLI: + return 0x5E<<23 | 21<<10 + + case AVUSHLL, AVUXTL: + return 1<<29 | 15<<24 | 0x29<<10 + + case AVUSHLL2, AVUXTL2: + return 3<<29 | 15<<24 | 0x29<<10 + + case AVXAR: + return 0xCE<<24 | 1<<23 + + case AVUSRA: + return 1<<29 | 15<<24 | 5<<10 + + case APRFM: + return 0xf9<<24 | 2<<22 + } + + c.ctxt.Diag("%v: bad irr %v", p, a) + return 0 +} + +func (c *ctxt7) opbit(p *obj.Prog, a obj.As) uint32 { + switch a { + case ACLS: + return S64 | OPBIT(5) + + case ACLSW: + return S32 | OPBIT(5) + + case ACLZ: + return S64 | OPBIT(4) + + case ACLZW: + return S32 | OPBIT(4) + + case ARBIT: + return S64 | OPBIT(0) + + case ARBITW: + return S32 | OPBIT(0) + + case AREV: + return S64 | OPBIT(3) + + case AREVW: + return S32 | OPBIT(2) + + case AREV16: + return S64 | OPBIT(1) + + case AREV16W: + return S32 | OPBIT(1) + + case AREV32: + return S64 | OPBIT(2) + + default: + c.ctxt.Diag("bad bit op\n%v", p) + return 0 + } +} + +/* + * add/subtract sign or zero-extended register + */ +func (c *ctxt7) opxrrr(p *obj.Prog, a obj.As, extend bool) uint32 { + extension := uint32(0) + if !extend { + if isADDop(a) { + extension = LSL0_64 + } + if isADDWop(a) { + extension = LSL0_32 + } + } + + switch a { + case AADD: + return S64 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case AADDW: + return S32 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ACMN, AADDS: + return S64 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ACMNW, AADDSW: + return S32 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ASUB: + return S64 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ASUBW: + return S32 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ACMP, ASUBS: + return S64 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ACMPW, ASUBSW: + return S32 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + } + + c.ctxt.Diag("bad opxrrr %v\n%v", a, p) + return 0 +} + +func (c *ctxt7) opimm(p *obj.Prog, a obj.As) uint32 { + switch a { + case ASVC: + return 0xD4<<24 | 0<<21 | 1 /* imm16<<5 */ + + case AHVC: + return 0xD4<<24 | 0<<21 | 2 + + case ASMC: + return 0xD4<<24 | 0<<21 | 3 + + case ABRK: + return 0xD4<<24 | 1<<21 | 0 + + case AHLT: + return 0xD4<<24 | 2<<21 | 0 + + case ADCPS1: + return 0xD4<<24 | 5<<21 | 1 + + case ADCPS2: + return 0xD4<<24 | 5<<21 | 2 + + case ADCPS3: + return 0xD4<<24 | 5<<21 | 3 + + case ACLREX: + return SYSOP(0, 0, 3, 3, 0, 2, 0x1F) + } + + c.ctxt.Diag("%v: bad imm %v", p, a) + return 0 +} + +func (c *ctxt7) brdist(p *obj.Prog, preshift int, flen int, shift int) int64 { + v := int64(0) + t := int64(0) + var q *obj.Prog + if p.To.Type == obj.TYPE_BRANCH { + q = p.To.Target() + } else if p.From.Type == obj.TYPE_BRANCH { // adr, adrp + q = p.From.Target() + } + if q == nil { + // TODO: don't use brdist for this case, as it isn't a branch. + // (Calls from omovlit, and maybe adr/adrp opcodes as well.) + q = p.Pool + } + if q != nil { + v = (q.Pc >> uint(preshift)) - (c.pc >> uint(preshift)) + if (v & ((1 << uint(shift)) - 1)) != 0 { + c.ctxt.Diag("misaligned label\n%v", p) + } + v >>= uint(shift) + t = int64(1) << uint(flen-1) + if v < -t || v >= t { + c.ctxt.Diag("branch too far %#x vs %#x [%p]\n%v\n%v", v, t, c.blitrl, p, q) + panic("branch too far") + } + } + + return v & ((t << 1) - 1) +} + +/* + * pc-relative branches + */ +func (c *ctxt7) opbra(p *obj.Prog, a obj.As) uint32 { + switch a { + case ABEQ: + return OPBcc(0x0) + + case ABNE: + return OPBcc(0x1) + + case ABCS: + return OPBcc(0x2) + + case ABHS: + return OPBcc(0x2) + + case ABCC: + return OPBcc(0x3) + + case ABLO: + return OPBcc(0x3) + + case ABMI: + return OPBcc(0x4) + + case ABPL: + return OPBcc(0x5) + + case ABVS: + return OPBcc(0x6) + + case ABVC: + return OPBcc(0x7) + + case ABHI: + return OPBcc(0x8) + + case ABLS: + return OPBcc(0x9) + + case ABGE: + return OPBcc(0xa) + + case ABLT: + return OPBcc(0xb) + + case ABGT: + return OPBcc(0xc) + + case ABLE: + return OPBcc(0xd) /* imm19<<5 | cond */ + + case AB: + return 0<<31 | 5<<26 /* imm26 */ + + case obj.ADUFFZERO, obj.ADUFFCOPY, ABL: + return 1<<31 | 5<<26 + } + + c.ctxt.Diag("%v: bad bra %v", p, a) + return 0 +} + +func (c *ctxt7) opbrr(p *obj.Prog, a obj.As) uint32 { + switch a { + case ABL: + return OPBLR(1) /* BLR */ + + case AB: + return OPBLR(0) /* BR */ + + case obj.ARET: + return OPBLR(2) /* RET */ + } + + c.ctxt.Diag("%v: bad brr %v", p, a) + return 0 +} + +func (c *ctxt7) op0(p *obj.Prog, a obj.As) uint32 { + switch a { + case ADRPS: + return 0x6B<<25 | 5<<21 | 0x1F<<16 | 0x1F<<5 + + case AERET: + return 0x6B<<25 | 4<<21 | 0x1F<<16 | 0<<10 | 0x1F<<5 + + case ANOOP: + return SYSHINT(0) + + case AYIELD: + return SYSHINT(1) + + case AWFE: + return SYSHINT(2) + + case AWFI: + return SYSHINT(3) + + case ASEV: + return SYSHINT(4) + + case ASEVL: + return SYSHINT(5) + } + + c.ctxt.Diag("%v: bad op0 %v", p, a) + return 0 +} + +/* + * register offset + */ +func (c *ctxt7) opload(p *obj.Prog, a obj.As) uint32 { + switch a { + case ALDAR: + return LDSTX(3, 1, 1, 0, 1) | 0x1F<<10 + + case ALDARW: + return LDSTX(2, 1, 1, 0, 1) | 0x1F<<10 + + case ALDARB: + return LDSTX(0, 1, 1, 0, 1) | 0x1F<<10 + + case ALDARH: + return LDSTX(1, 1, 1, 0, 1) | 0x1F<<10 + + case ALDAXP: + return LDSTX(3, 0, 1, 1, 1) + + case ALDAXPW: + return LDSTX(2, 0, 1, 1, 1) + + case ALDAXR: + return LDSTX(3, 0, 1, 0, 1) | 0x1F<<10 + + case ALDAXRW: + return LDSTX(2, 0, 1, 0, 1) | 0x1F<<10 + + case ALDAXRB: + return LDSTX(0, 0, 1, 0, 1) | 0x1F<<10 + + case ALDAXRH: + return LDSTX(1, 0, 1, 0, 1) | 0x1F<<10 + + case ALDXR: + return LDSTX(3, 0, 1, 0, 0) | 0x1F<<10 + + case ALDXRB: + return LDSTX(0, 0, 1, 0, 0) | 0x1F<<10 + + case ALDXRH: + return LDSTX(1, 0, 1, 0, 0) | 0x1F<<10 + + case ALDXRW: + return LDSTX(2, 0, 1, 0, 0) | 0x1F<<10 + + case ALDXP: + return LDSTX(3, 0, 1, 1, 0) + + case ALDXPW: + return LDSTX(2, 0, 1, 1, 0) + + case AMOVNP: + return S64 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22 + + case AMOVNPW: + return S32 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22 + } + + c.ctxt.Diag("bad opload %v\n%v", a, p) + return 0 +} + +func (c *ctxt7) opstore(p *obj.Prog, a obj.As) uint32 { + switch a { + case ASTLR: + return LDSTX(3, 1, 0, 0, 1) | 0x1F<<10 + + case ASTLRB: + return LDSTX(0, 1, 0, 0, 1) | 0x1F<<10 + + case ASTLRH: + return LDSTX(1, 1, 0, 0, 1) | 0x1F<<10 + + case ASTLP: + return LDSTX(3, 0, 0, 1, 1) + + case ASTLPW: + return LDSTX(2, 0, 0, 1, 1) + + case ASTLRW: + return LDSTX(2, 1, 0, 0, 1) | 0x1F<<10 + + case ASTLXP: + return LDSTX(3, 0, 0, 1, 1) + + case ASTLXPW: + return LDSTX(2, 0, 0, 1, 1) + + case ASTLXR: + return LDSTX(3, 0, 0, 0, 1) | 0x1F<<10 + + case ASTLXRB: + return LDSTX(0, 0, 0, 0, 1) | 0x1F<<10 + + case ASTLXRH: + return LDSTX(1, 0, 0, 0, 1) | 0x1F<<10 + + case ASTLXRW: + return LDSTX(2, 0, 0, 0, 1) | 0x1F<<10 + + case ASTXR: + return LDSTX(3, 0, 0, 0, 0) | 0x1F<<10 + + case ASTXRB: + return LDSTX(0, 0, 0, 0, 0) | 0x1F<<10 + + case ASTXRH: + return LDSTX(1, 0, 0, 0, 0) | 0x1F<<10 + + case ASTXP: + return LDSTX(3, 0, 0, 1, 0) + + case ASTXPW: + return LDSTX(2, 0, 0, 1, 0) + + case ASTXRW: + return LDSTX(2, 0, 0, 0, 0) | 0x1F<<10 + + case AMOVNP: + return S64 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22 + + case AMOVNPW: + return S32 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22 + } + + c.ctxt.Diag("bad opstore %v\n%v", a, p) + return 0 +} + +/* + * load/store register (scaled 12-bit unsigned immediate) C3.3.13 + * these produce 64-bit values (when there's an option) + */ +func (c *ctxt7) olsr12u(p *obj.Prog, o int32, v int32, b int, r int) uint32 { + if v < 0 || v >= (1<<12) { + c.ctxt.Diag("offset out of range: %d\n%v", v, p) + } + o |= (v & 0xFFF) << 10 + o |= int32(b&31) << 5 + o |= int32(r & 31) + o |= 1 << 24 + return uint32(o) +} + +/* + * load/store register (unscaled 9-bit signed immediate) C3.3.12 + */ +func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { + if v < -256 || v > 255 { + c.ctxt.Diag("offset out of range: %d\n%v", v, p) + } + o |= (v & 0x1FF) << 12 + o |= int32(b&31) << 5 + o |= int32(r & 31) + return uint32(o) +} + +// store(immediate) +// scaled 12-bit unsigned immediate offset. +// unscaled 9-bit signed immediate offset. +// pre/post-indexed store. +// and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. +func (c *ctxt7) opstr(p *obj.Prog, a obj.As) uint32 { + enc := c.opldr(p, a) + switch p.As { + case AFMOVQ: + enc = enc &^ (1 << 22) + default: + enc = LD2STR(enc) + } + return enc +} + +// load(immediate) +// scaled 12-bit unsigned immediate offset. +// unscaled 9-bit signed immediate offset. +// pre/post-indexed load. +// and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. +func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 { + switch a { + case AMOVD: + return LDSTR(3, 0, 1) /* simm9<<12 | Rn<<5 | Rt */ + + case AMOVW: + return LDSTR(2, 0, 2) + + case AMOVWU: + return LDSTR(2, 0, 1) + + case AMOVH: + return LDSTR(1, 0, 2) + + case AMOVHU: + return LDSTR(1, 0, 1) + + case AMOVB: + return LDSTR(0, 0, 2) + + case AMOVBU: + return LDSTR(0, 0, 1) + + case AFMOVS: + return LDSTR(2, 1, 1) + + case AFMOVD: + return LDSTR(3, 1, 1) + + case AFMOVQ: + return LDSTR(0, 1, 3) + } + + c.ctxt.Diag("bad opldr %v\n%v", a, p) + return 0 +} + +// olsxrr attaches register operands to a load/store opcode supplied in o. +// The result either encodes a load of r from (r1+r2) or a store of r to (r1+r2). +func (c *ctxt7) olsxrr(p *obj.Prog, o int32, r int, r1 int, r2 int) uint32 { + o |= int32(r1&31) << 5 + o |= int32(r2&31) << 16 + o |= int32(r & 31) + return uint32(o) +} + +// opldrr returns the ARM64 opcode encoding corresponding to the obj.As opcode +// for load instruction with register offset. +// The offset register can be (Rn)(Rm.UXTW<<2) or (Rn)(Rm<<2) or (Rn)(Rm). +func (c *ctxt7) opldrr(p *obj.Prog, a obj.As, extension bool) uint32 { + OptionS := uint32(0x1a) + if extension { + OptionS = uint32(0) // option value and S value have been encoded into p.From.Offset. + } + switch a { + case AMOVD: + return OptionS<<10 | 0x3<<21 | 0x1f<<27 + case AMOVW: + return OptionS<<10 | 0x5<<21 | 0x17<<27 + case AMOVWU: + return OptionS<<10 | 0x3<<21 | 0x17<<27 + case AMOVH: + return OptionS<<10 | 0x5<<21 | 0x0f<<27 + case AMOVHU: + return OptionS<<10 | 0x3<<21 | 0x0f<<27 + case AMOVB: + return OptionS<<10 | 0x5<<21 | 0x07<<27 + case AMOVBU: + return OptionS<<10 | 0x3<<21 | 0x07<<27 + case AFMOVS: + return OptionS<<10 | 0x3<<21 | 0x17<<27 | 1<<26 + case AFMOVD: + return OptionS<<10 | 0x3<<21 | 0x1f<<27 | 1<<26 + } + c.ctxt.Diag("bad opldrr %v\n%v", a, p) + return 0 +} + +// opstrr returns the ARM64 opcode encoding corresponding to the obj.As opcode +// for store instruction with register offset. +// The offset register can be (Rn)(Rm.UXTW<<2) or (Rn)(Rm<<2) or (Rn)(Rm). +func (c *ctxt7) opstrr(p *obj.Prog, a obj.As, extension bool) uint32 { + OptionS := uint32(0x1a) + if extension { + OptionS = uint32(0) // option value and S value have been encoded into p.To.Offset. + } + switch a { + case AMOVD: + return OptionS<<10 | 0x1<<21 | 0x1f<<27 + case AMOVW, AMOVWU: + return OptionS<<10 | 0x1<<21 | 0x17<<27 + case AMOVH, AMOVHU: + return OptionS<<10 | 0x1<<21 | 0x0f<<27 + case AMOVB, AMOVBU: + return OptionS<<10 | 0x1<<21 | 0x07<<27 + case AFMOVS: + return OptionS<<10 | 0x1<<21 | 0x17<<27 | 1<<26 + case AFMOVD: + return OptionS<<10 | 0x1<<21 | 0x1f<<27 | 1<<26 + } + c.ctxt.Diag("bad opstrr %v\n%v", a, p) + return 0 +} + +func (c *ctxt7) oaddi(p *obj.Prog, o1 int32, v int32, r int, rt int) uint32 { + if (v & 0xFFF000) != 0 { + if v&0xFFF != 0 { + c.ctxt.Diag("%v misuses oaddi", p) + } + v >>= 12 + o1 |= 1 << 22 + } + + o1 |= ((v & 0xFFF) << 10) | (int32(r&31) << 5) | int32(rt&31) + return uint32(o1) +} + +/* + * load a literal value into dr + */ +func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { + var o1 int32 + if p.Pool == nil { /* not in literal pool */ + c.aclass(a) + c.ctxt.Logf("omovlit add %d (%#x)\n", c.instoffset, uint64(c.instoffset)) + + /* TODO: could be clever, and use general constant builder */ + o1 = int32(c.opirr(p, AADD)) + + v := int32(c.instoffset) + if v != 0 && (v&0xFFF) == 0 { + v >>= 12 + o1 |= 1 << 22 /* shift, by 12 */ + } + + o1 |= ((v & 0xFFF) << 10) | (REGZERO & 31 << 5) | int32(dr&31) + } else { + fp, w := 0, 0 + switch as { + case AFMOVS, AVMOVS: + fp = 1 + w = 0 /* 32-bit SIMD/FP */ + + case AFMOVD, AVMOVD: + fp = 1 + w = 1 /* 64-bit SIMD/FP */ + + case AVMOVQ: + fp = 1 + w = 2 /* 128-bit SIMD/FP */ + + case AMOVD: + if p.Pool.As == ADWORD { + w = 1 /* 64-bit */ + } else if p.Pool.To.Offset < 0 { + w = 2 /* 32-bit, sign-extended to 64-bit */ + } else if p.Pool.To.Offset >= 0 { + w = 0 /* 32-bit, zero-extended to 64-bit */ + } else { + c.ctxt.Diag("invalid operand %v in %v", a, p) + } + + case AMOVBU, AMOVHU, AMOVWU: + w = 0 /* 32-bit, zero-extended to 64-bit */ + + case AMOVB, AMOVH, AMOVW: + w = 2 /* 32-bit, sign-extended to 64-bit */ + + default: + c.ctxt.Diag("invalid operation %v in %v", as, p) + } + + v := int32(c.brdist(p, 0, 19, 2)) + o1 = (int32(w) << 30) | (int32(fp) << 26) | (3 << 27) + o1 |= (v & 0x7FFFF) << 5 + o1 |= int32(dr & 31) + } + + return uint32(o1) +} + +// load a constant (MOVCON or BITCON) in a into rt +func (c *ctxt7) omovconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int) (o1 uint32) { + if cls := oclass(a); (cls == C_BITCON || cls == C_ABCON || cls == C_ABCON0) && rt != REGZERO { + // or $bitcon, REGZERO, rt. rt can't be ZR. + mode := 64 + var as1 obj.As + switch as { + case AMOVW: + as1 = AORRW + mode = 32 + case AMOVD: + as1 = AORR + } + o1 = c.opirr(p, as1) + o1 |= bitconEncode(uint64(a.Offset), mode) | uint32(REGZERO&31)<<5 | uint32(rt&31) + return o1 + } + + if as == AMOVW { + d := uint32(a.Offset) + s := movcon(int64(d)) + if s < 0 || 16*s >= 32 { + d = ^d + s = movcon(int64(d)) + if s < 0 || 16*s >= 32 { + c.ctxt.Diag("impossible 32-bit move wide: %#x\n%v", uint32(a.Offset), p) + } + o1 = c.opirr(p, AMOVNW) + } else { + o1 = c.opirr(p, AMOVZW) + } + o1 |= MOVCONST(int64(d), s, rt) + } + if as == AMOVD { + d := a.Offset + s := movcon(d) + if s < 0 || 16*s >= 64 { + d = ^d + s = movcon(d) + if s < 0 || 16*s >= 64 { + c.ctxt.Diag("impossible 64-bit move wide: %#x\n%v", uint64(a.Offset), p) + } + o1 = c.opirr(p, AMOVN) + } else { + o1 = c.opirr(p, AMOVZ) + } + o1 |= MOVCONST(d, s, rt) + } + return o1 +} + +// load a 32-bit/64-bit large constant (LCON or VCON) in a.Offset into rt +// put the instruction sequence in os and return the number of instructions. +func (c *ctxt7) omovlconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int, os []uint32) (num uint8) { + switch as { + case AMOVW: + d := uint32(a.Offset) + // use MOVZW and MOVKW to load a constant to rt + os[0] = c.opirr(p, AMOVZW) + os[0] |= MOVCONST(int64(d), 0, rt) + os[1] = c.opirr(p, AMOVKW) + os[1] |= MOVCONST(int64(d), 1, rt) + return 2 + + case AMOVD: + d := a.Offset + dn := ^d + var immh [4]uint64 + var i int + zeroCount := int(0) + negCount := int(0) + for i = 0; i < 4; i++ { + immh[i] = uint64((d >> uint(i*16)) & 0xffff) + if immh[i] == 0 { + zeroCount++ + } else if immh[i] == 0xffff { + negCount++ + } + } + + if zeroCount == 4 || negCount == 4 { + c.ctxt.Diag("the immediate should be MOVCON: %v", p) + } + switch { + case zeroCount == 3: + // one MOVZ + for i = 0; i < 4; i++ { + if immh[i] != 0 { + os[0] = c.opirr(p, AMOVZ) + os[0] |= MOVCONST(d, i, rt) + break + } + } + return 1 + + case negCount == 3: + // one MOVN + for i = 0; i < 4; i++ { + if immh[i] != 0xffff { + os[0] = c.opirr(p, AMOVN) + os[0] |= MOVCONST(dn, i, rt) + break + } + } + return 1 + + case zeroCount == 2: + // one MOVZ and one MOVK + for i = 0; i < 4; i++ { + if immh[i] != 0 { + os[0] = c.opirr(p, AMOVZ) + os[0] |= MOVCONST(d, i, rt) + i++ + break + } + } + for ; i < 4; i++ { + if immh[i] != 0 { + os[1] = c.opirr(p, AMOVK) + os[1] |= MOVCONST(d, i, rt) + } + } + return 2 + + case negCount == 2: + // one MOVN and one MOVK + for i = 0; i < 4; i++ { + if immh[i] != 0xffff { + os[0] = c.opirr(p, AMOVN) + os[0] |= MOVCONST(dn, i, rt) + i++ + break + } + } + for ; i < 4; i++ { + if immh[i] != 0xffff { + os[1] = c.opirr(p, AMOVK) + os[1] |= MOVCONST(d, i, rt) + } + } + return 2 + + case zeroCount == 1: + // one MOVZ and two MOVKs + for i = 0; i < 4; i++ { + if immh[i] != 0 { + os[0] = c.opirr(p, AMOVZ) + os[0] |= MOVCONST(d, i, rt) + i++ + break + } + } + + for j := 1; i < 4; i++ { + if immh[i] != 0 { + os[j] = c.opirr(p, AMOVK) + os[j] |= MOVCONST(d, i, rt) + j++ + } + } + return 3 + + case negCount == 1: + // one MOVN and two MOVKs + for i = 0; i < 4; i++ { + if immh[i] != 0xffff { + os[0] = c.opirr(p, AMOVN) + os[0] |= MOVCONST(dn, i, rt) + i++ + break + } + } + + for j := 1; i < 4; i++ { + if immh[i] != 0xffff { + os[j] = c.opirr(p, AMOVK) + os[j] |= MOVCONST(d, i, rt) + j++ + } + } + return 3 + + default: + // one MOVZ and 3 MOVKs + os[0] = c.opirr(p, AMOVZ) + os[0] |= MOVCONST(d, 0, rt) + for i = 1; i < 4; i++ { + os[i] = c.opirr(p, AMOVK) + os[i] |= MOVCONST(d, i, rt) + } + return 4 + } + default: + return 0 + } +} + +func (c *ctxt7) opbfm(p *obj.Prog, a obj.As, r int, s int, rf int, rt int) uint32 { + var b uint32 + o := c.opirr(p, a) + if (o & (1 << 31)) == 0 { + b = 32 + } else { + b = 64 + } + if r < 0 || uint32(r) >= b { + c.ctxt.Diag("illegal bit number\n%v", p) + } + o |= (uint32(r) & 0x3F) << 16 + if s < 0 || uint32(s) >= b { + c.ctxt.Diag("illegal bit number\n%v", p) + } + o |= (uint32(s) & 0x3F) << 10 + o |= (uint32(rf&31) << 5) | uint32(rt&31) + return o +} + +func (c *ctxt7) opextr(p *obj.Prog, a obj.As, v int32, rn int, rm int, rt int) uint32 { + var b uint32 + o := c.opirr(p, a) + if (o & (1 << 31)) != 0 { + b = 63 + } else { + b = 31 + } + if v < 0 || uint32(v) > b { + c.ctxt.Diag("illegal bit number\n%v", p) + } + o |= uint32(v) << 10 + o |= uint32(rn&31) << 5 + o |= uint32(rm&31) << 16 + o |= uint32(rt & 31) + return o +} + +/* generate instruction encoding for ldp and stp series */ +func (c *ctxt7) opldpstp(p *obj.Prog, o *Optab, vo int32, rbase, rl, rh, ldp uint32) uint32 { + wback := false + if o.scond == C_XPOST || o.scond == C_XPRE { + wback = true + } + switch p.As { + case ALDP, ALDPW, ALDPSW: + c.checkUnpredictable(p, true, wback, p.From.Reg, p.To.Reg, int16(p.To.Offset)) + case ASTP, ASTPW: + if wback { + c.checkUnpredictable(p, false, true, p.To.Reg, p.From.Reg, int16(p.From.Offset)) + } + case AFLDPD, AFLDPQ, AFLDPS: + c.checkUnpredictable(p, true, false, p.From.Reg, p.To.Reg, int16(p.To.Offset)) + } + var ret uint32 + // check offset + switch p.As { + case AFLDPQ, AFSTPQ: + if vo < -1024 || vo > 1008 || vo%16 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 16 + ret = 2<<30 | 1<<26 + case AFLDPD, AFSTPD: + if vo < -512 || vo > 504 || vo%8 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 8 + ret = 1<<30 | 1<<26 + case AFLDPS, AFSTPS: + if vo < -256 || vo > 252 || vo%4 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 4 + ret = 1 << 26 + case ALDP, ASTP: + if vo < -512 || vo > 504 || vo%8 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 8 + ret = 2 << 30 + case ALDPW, ASTPW: + if vo < -256 || vo > 252 || vo%4 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 4 + ret = 0 + case ALDPSW: + if vo < -256 || vo > 252 || vo%4 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 4 + ret = 1 << 30 + default: + c.ctxt.Diag("invalid instruction %v\n", p) + } + // check register pair + switch p.As { + case AFLDPQ, AFLDPD, AFLDPS, AFSTPQ, AFSTPD, AFSTPS: + if rl < REG_F0 || REG_F31 < rl || rh < REG_F0 || REG_F31 < rh { + c.ctxt.Diag("invalid register pair %v\n", p) + } + case ALDP, ALDPW, ALDPSW: + if rl < REG_R0 || REG_R30 < rl || rh < REG_R0 || REG_R30 < rh { + c.ctxt.Diag("invalid register pair %v\n", p) + } + case ASTP, ASTPW: + if rl < REG_R0 || REG_R31 < rl || rh < REG_R0 || REG_R31 < rh { + c.ctxt.Diag("invalid register pair %v\n", p) + } + } + // other conditional flag bits + switch o.scond { + case C_XPOST: + ret |= 1 << 23 + case C_XPRE: + ret |= 3 << 23 + default: + ret |= 2 << 23 + } + ret |= 5<<27 | (ldp&1)<<22 | uint32(vo&0x7f)<<15 | (rh&31)<<10 | (rbase&31)<<5 | (rl & 31) + return ret +} + +func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 { + if p.As == AVLD1 || p.As == AVST1 { + return o1 + } + + o1 &^= 0xf000 // mask out "opcode" field (bit 12-15) + switch p.As { + case AVLD1R, AVLD2R: + o1 |= 0xC << 12 + case AVLD3R, AVLD4R: + o1 |= 0xE << 12 + case AVLD2, AVST2: + o1 |= 8 << 12 + case AVLD3, AVST3: + o1 |= 4 << 12 + case AVLD4, AVST4: + default: + c.ctxt.Diag("unsupported instruction:%v\n", p.As) + } + return o1 +} + +/* + * size in log2(bytes) + */ +func movesize(a obj.As) int { + switch a { + case AFMOVQ: + return 4 + + case AMOVD, AFMOVD: + return 3 + + case AMOVW, AMOVWU, AFMOVS: + return 2 + + case AMOVH, AMOVHU: + return 1 + + case AMOVB, AMOVBU: + return 0 + + default: + return -1 + } +} + +// rm is the Rm register value, o is the extension, amount is the left shift value. +func roff(rm int16, o uint32, amount int16) uint32 { + return uint32(rm&31)<<16 | o<<13 | uint32(amount)<<10 +} + +// encRegShiftOrExt returns the encoding of shifted/extended register, Rx<<n and Rx.UXTW<<n, etc. +func (c *ctxt7) encRegShiftOrExt(p *obj.Prog, a *obj.Addr, r int16) uint32 { + var num, rm int16 + num = (r >> 5) & 7 + rm = r & 31 + switch { + case REG_UXTB <= r && r < REG_UXTH: + return roff(rm, 0, num) + case REG_UXTH <= r && r < REG_UXTW: + return roff(rm, 1, num) + case REG_UXTW <= r && r < REG_UXTX: + if a.Type == obj.TYPE_MEM { + if num == 0 { + return roff(rm, 2, 2) + } else { + return roff(rm, 2, 6) + } + } else { + return roff(rm, 2, num) + } + case REG_UXTX <= r && r < REG_SXTB: + return roff(rm, 3, num) + case REG_SXTB <= r && r < REG_SXTH: + return roff(rm, 4, num) + case REG_SXTH <= r && r < REG_SXTW: + return roff(rm, 5, num) + case REG_SXTW <= r && r < REG_SXTX: + if a.Type == obj.TYPE_MEM { + if num == 0 { + return roff(rm, 6, 2) + } else { + return roff(rm, 6, 6) + } + } else { + return roff(rm, 6, num) + } + case REG_SXTX <= r && r < REG_SPECIAL: + if a.Type == obj.TYPE_MEM { + if num == 0 { + return roff(rm, 7, 2) + } else { + return roff(rm, 7, 6) + } + } else { + return roff(rm, 7, num) + } + case REG_LSL <= r && r < REG_ARNG: + if a.Type == obj.TYPE_MEM { // (R1)(R2<<1) + return roff(rm, 3, 6) + } else if isADDWop(p.As) { + return roff(rm, 2, num) + } + return roff(rm, 3, num) + default: + c.ctxt.Diag("unsupported register extension type.") + } + + return 0 +} + +// pack returns the encoding of the "Q" field and two arrangement specifiers. +func pack(q uint32, arngA, arngB uint8) uint32 { + return uint32(q)<<16 | uint32(arngA)<<8 | uint32(arngB) +} diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.go b/src/cmd/internal/obj/arm64/asm_arm64_test.go new file mode 100644 index 0000000..c52717d --- /dev/null +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.go @@ -0,0 +1,171 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package arm64 + +import ( + "bytes" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "regexp" + "testing" +) + +// TestLarge generates a very large file to verify that large +// program builds successfully, in particular, too-far +// conditional branches are fixed, and also verify that the +// instruction's pc can be correctly aligned even when branches +// need to be fixed. +func TestLarge(t *testing.T) { + if testing.Short() { + t.Skip("Skip in short mode") + } + testenv.MustHaveGoBuild(t) + + dir, err := os.MkdirTemp("", "testlarge") + if err != nil { + t.Fatalf("could not create directory: %v", err) + } + defer os.RemoveAll(dir) + + // generate a very large function + buf := bytes.NewBuffer(make([]byte, 0, 7000000)) + gen(buf) + + tmpfile := filepath.Join(dir, "x.s") + err = os.WriteFile(tmpfile, buf.Bytes(), 0644) + if err != nil { + t.Fatalf("can't write output: %v\n", err) + } + + pattern := `0x0080\s00128\s\(.*\)\tMOVD\t\$3,\sR3` + + // assemble generated file + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Assemble failed: %v, output: %s", err, out) + } + matched, err := regexp.MatchString(pattern, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The alignment is not correct: %t, output:%s\n", matched, out) + } + + // build generated file + cmd = testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux") + out, err = cmd.CombinedOutput() + if err != nil { + t.Errorf("Build failed: %v, output: %s", err, out) + } +} + +// gen generates a very large program, with a very far conditional branch. +func gen(buf *bytes.Buffer) { + fmt.Fprintln(buf, "TEXT f(SB),0,$0-0") + fmt.Fprintln(buf, "TBZ $5, R0, label") + fmt.Fprintln(buf, "CBZ R0, label") + fmt.Fprintln(buf, "BEQ label") + fmt.Fprintln(buf, "PCALIGN $128") + fmt.Fprintln(buf, "MOVD $3, R3") + for i := 0; i < 1<<19; i++ { + fmt.Fprintln(buf, "MOVD R0, R1") + } + fmt.Fprintln(buf, "label:") + fmt.Fprintln(buf, "RET") +} + +// Issue 20348. +func TestNoRet(t *testing.T) { + dir, err := os.MkdirTemp("", "testnoret") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + tmpfile := filepath.Join(dir, "x.s") + if err := os.WriteFile(tmpfile, []byte("TEXT ·stub(SB),$0-0\nNOP\n"), 0644); err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux") + if out, err := cmd.CombinedOutput(); err != nil { + t.Errorf("%v\n%s", err, out) + } +} + +// TestPCALIGN verifies the correctness of the PCALIGN by checking if the +// code can be aligned to the alignment value. +func TestPCALIGN(t *testing.T) { + testenv.MustHaveGoBuild(t) + dir, err := os.MkdirTemp("", "testpcalign") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + tmpfile := filepath.Join(dir, "test.s") + tmpout := filepath.Join(dir, "test.o") + + code1 := []byte("TEXT ·foo(SB),$0-0\nMOVD $0, R0\nPCALIGN $8\nMOVD $1, R1\nRET\n") + code2 := []byte("TEXT ·foo(SB),$0-0\nMOVD $0, R0\nPCALIGN $16\nMOVD $2, R2\nRET\n") + // If the output contains this pattern, the pc-offsite of "MOVD $1, R1" is 8 bytes aligned. + out1 := `0x0008\s00008\s\(.*\)\tMOVD\t\$1,\sR1` + // If the output contains this pattern, the pc-offsite of "MOVD $2, R2" is 16 bytes aligned. + out2 := `0x0010\s00016\s\(.*\)\tMOVD\t\$2,\sR2` + var testCases = []struct { + name string + code []byte + out string + }{ + {"8-byte alignment", code1, out1}, + {"16-byte alignment", code2, out2}, + } + + for _, test := range testCases { + if err := os.WriteFile(tmpfile, test.code, 0644); err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-S", "-o", tmpout, tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("The %s build failed: %v, output: %s", test.name, err, out) + continue + } + + matched, err := regexp.MatchString(test.out, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The %s testing failed!\ninput: %s\noutput: %s\n", test.name, test.code, out) + } + } +} + +func testvmovq() (r1, r2 uint64) + +// TestVMOVQ checks if the arm64 VMOVQ instruction is working properly. +func TestVMOVQ(t *testing.T) { + a, b := testvmovq() + if a != 0x7040201008040201 || b != 0x3040201008040201 { + t.Errorf("TestVMOVQ got: a=0x%x, b=0x%x, want: a=0x7040201008040201, b=0x3040201008040201", a, b) + } +} + +func testmovk() uint64 + +// TestMOVK makes sure MOVK with a very large constant works. See issue 52261. +func TestMOVK(t *testing.T) { + x := testmovk() + want := uint64(40000 << 48) + if x != want { + t.Errorf("TestMOVK got %x want %x\n", x, want) + } +} diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.s b/src/cmd/internal/obj/arm64/asm_arm64_test.s new file mode 100644 index 0000000..f85433c --- /dev/null +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.s @@ -0,0 +1,21 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// testvmovq() (r1, r2 uint64) +TEXT ·testvmovq(SB), NOSPLIT, $0-16 + VMOVQ $0x7040201008040201, $0x3040201008040201, V1 + VMOV V1.D[0], R0 + VMOV V1.D[1], R1 + MOVD R0, r1+0(FP) + MOVD R1, r2+8(FP) + RET + +// testmovk() uint64 +TEXT ·testmovk(SB), NOSPLIT, $0-8 + MOVD $0, R0 + MOVK $(40000<<48), R0 + MOVD R0, ret+0(FP) + RET diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go new file mode 100644 index 0000000..4606e98 --- /dev/null +++ b/src/cmd/internal/obj/arm64/doc.go @@ -0,0 +1,296 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package arm64 implements an ARM64 assembler. Go assembly syntax is different from GNU ARM64 +syntax, but we can still follow the general rules to map between them. + +# Instructions mnemonics mapping rules + +1. Most instructions use width suffixes of instruction names to indicate operand width rather than +using different register names. + +Examples: + + ADC R24, R14, R12 <=> adc x12, x14, x24 + ADDW R26->24, R21, R15 <=> add w15, w21, w26, asr #24 + FCMPS F2, F3 <=> fcmp s3, s2 + FCMPD F2, F3 <=> fcmp d3, d2 + FCVTDH F2, F3 <=> fcvt h3, d2 + +2. Go uses .P and .W suffixes to indicate post-increment and pre-increment. + +Examples: + + MOVD.P -8(R10), R8 <=> ldr x8, [x10],#-8 + MOVB.W 16(R16), R10 <=> ldrsb x10, [x16,#16]! + MOVBU.W 16(R16), R10 <=> ldrb x10, [x16,#16]! + +3. Go uses a series of MOV instructions as load and store. + +64-bit variant ldr, str, stur => MOVD; +32-bit variant str, stur, ldrsw => MOVW; +32-bit variant ldr => MOVWU; +ldrb => MOVBU; ldrh => MOVHU; +ldrsb, sturb, strb => MOVB; +ldrsh, sturh, strh => MOVH. + +4. Go moves conditions into opcode suffix, like BLT. + +5. Go adds a V prefix for most floating-point and SIMD instructions, except cryptographic extension +instructions and floating-point(scalar) instructions. + +Examples: + + VADD V5.H8, V18.H8, V9.H8 <=> add v9.8h, v18.8h, v5.8h + VLD1.P (R6)(R11), [V31.D1] <=> ld1 {v31.1d}, [x6], x11 + VFMLA V29.S2, V20.S2, V14.S2 <=> fmla v14.2s, v20.2s, v29.2s + AESD V22.B16, V19.B16 <=> aesd v19.16b, v22.16b + SCVTFWS R3, F16 <=> scvtf s17, w6 + +6. Align directive + +Go asm supports the PCALIGN directive, which indicates that the next instruction should be aligned +to a specified boundary by padding with NOOP instruction. The alignment value supported on arm64 +must be a power of 2 and in the range of [8, 2048]. + +Examples: + + PCALIGN $16 + MOVD $2, R0 // This instruction is aligned with 16 bytes. + PCALIGN $1024 + MOVD $3, R1 // This instruction is aligned with 1024 bytes. + +PCALIGN also changes the function alignment. If a function has one or more PCALIGN directives, +its address will be aligned to the same or coarser boundary, which is the maximum of all the +alignment values. + +In the following example, the function Add is aligned with 128 bytes. + +Examples: + + TEXT ·Add(SB),$40-16 + MOVD $2, R0 + PCALIGN $32 + MOVD $4, R1 + PCALIGN $128 + MOVD $8, R2 + RET + +On arm64, functions in Go are aligned to 16 bytes by default, we can also use PCALGIN to set the +function alignment. The functions that need to be aligned are preferably using NOFRAME and NOSPLIT +to avoid the impact of the prologues inserted by the assembler, so that the function address will +have the same alignment as the first hand-written instruction. + +In the following example, PCALIGN at the entry of the function Add will align its address to 2048 bytes. + +Examples: + + TEXT ·Add(SB),NOSPLIT|NOFRAME,$0 + PCALIGN $2048 + MOVD $1, R0 + MOVD $1, R1 + RET + +7. Move large constants to vector registers. + +Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively. +And for a 128-bit interger, it take two 64-bit operands, for the low and high parts separately. + +Examples: + + VMOVS $0x11223344, V0 + VMOVD $0x1122334455667788, V1 + VMOVQ $0x1122334455667788, $0x99aabbccddeeff00, V2 // V2=0x99aabbccddeeff001122334455667788 + +8. Move an optionally-shifted 16-bit immediate value to a register. + +The instructions are MOVK(W), MOVZ(W) and MOVN(W), the assembly syntax is "op $(uimm16<<shift), <Rd>". The <uimm16> +is the 16-bit unsigned immediate, in the range 0 to 65535; For the 32-bit variant, the <shift> is 0 or 16, for the +64-bit variant, the <shift> is 0, 16, 32 or 48. + +The current Go assembler does not accept zero shifts, such as "op $0, Rd" and "op $(0<<(16|32|48)), Rd" instructions. + +Examples: + + MOVK $(10<<32), R20 <=> movk x20, #10, lsl #32 + MOVZW $(20<<16), R8 <=> movz w8, #20, lsl #16 + MOVK $(0<<16), R10 will be reported as an error by the assembler. + +Special Cases. + +(1) umov is written as VMOV. + +(2) br is renamed JMP, blr is renamed CALL. + +(3) No need to add "W" suffix: LDARB, LDARH, LDAXRB, LDAXRH, LDTRH, LDXRB, LDXRH. + +(4) In Go assembly syntax, NOP is a zero-width pseudo-instruction serves generic purpose, nothing +related to real ARM64 instruction. NOOP serves for the hardware nop instruction. NOOP is an alias of +HINT $0. + +Examples: + + VMOV V13.B[1], R20 <=> mov x20, v13.b[1] + VMOV V13.H[1], R20 <=> mov w20, v13.h[1] + JMP (R3) <=> br x3 + CALL (R17) <=> blr x17 + LDAXRB (R19), R16 <=> ldaxrb w16, [x19] + NOOP <=> nop + +# Register mapping rules + +1. All basic register names are written as Rn. + +2. Go uses ZR as the zero register and RSP as the stack pointer. + +3. Bn, Hn, Dn, Sn and Qn instructions are written as Fn in floating-point instructions and as Vn +in SIMD instructions. + +# Argument mapping rules + +1. The operands appear in left-to-right assignment order. + +Go reverses the arguments of most instructions. + +Examples: + + ADD R11.SXTB<<1, RSP, R25 <=> add x25, sp, w11, sxtb #1 + VADD V16, V19, V14 <=> add d14, d19, d16 + +Special Cases. + +(1) Argument order is the same as in the GNU ARM64 syntax: cbz, cbnz and some store instructions, +such as str, stur, strb, sturb, strh, sturh stlr, stlrb. stlrh, st1. + +Examples: + + MOVD R29, 384(R19) <=> str x29, [x19,#384] + MOVB.P R30, 30(R4) <=> strb w30, [x4],#30 + STLRH R21, (R19) <=> stlrh w21, [x19] + +(2) MADD, MADDW, MSUB, MSUBW, SMADDL, SMSUBL, UMADDL, UMSUBL <Rm>, <Ra>, <Rn>, <Rd> + +Examples: + + MADD R2, R30, R22, R6 <=> madd x6, x22, x2, x30 + SMSUBL R10, R3, R17, R27 <=> smsubl x27, w17, w10, x3 + +(3) FMADDD, FMADDS, FMSUBD, FMSUBS, FNMADDD, FNMADDS, FNMSUBD, FNMSUBS <Fm>, <Fa>, <Fn>, <Fd> + +Examples: + + FMADDD F30, F20, F3, F29 <=> fmadd d29, d3, d30, d20 + FNMSUBS F7, F25, F7, F22 <=> fnmsub s22, s7, s7, s25 + +(4) BFI, BFXIL, SBFIZ, SBFX, UBFIZ, UBFX $<lsb>, <Rn>, $<width>, <Rd> + +Examples: + + BFIW $16, R20, $6, R0 <=> bfi w0, w20, #16, #6 + UBFIZ $34, R26, $5, R20 <=> ubfiz x20, x26, #34, #5 + +(5) FCCMPD, FCCMPS, FCCMPED, FCCMPES <cond>, Fm. Fn, $<nzcv> + +Examples: + + FCCMPD AL, F8, F26, $0 <=> fccmp d26, d8, #0x0, al + FCCMPS VS, F29, F4, $4 <=> fccmp s4, s29, #0x4, vs + FCCMPED LE, F20, F5, $13 <=> fccmpe d5, d20, #0xd, le + FCCMPES NE, F26, F10, $0 <=> fccmpe s10, s26, #0x0, ne + +(6) CCMN, CCMNW, CCMP, CCMPW <cond>, <Rn>, $<imm>, $<nzcv> + +Examples: + + CCMP MI, R22, $12, $13 <=> ccmp x22, #0xc, #0xd, mi + CCMNW AL, R1, $11, $8 <=> ccmn w1, #0xb, #0x8, al + +(7) CCMN, CCMNW, CCMP, CCMPW <cond>, <Rn>, <Rm>, $<nzcv> + +Examples: + + CCMN VS, R13, R22, $10 <=> ccmn x13, x22, #0xa, vs + CCMPW HS, R19, R14, $11 <=> ccmp w19, w14, #0xb, cs + +(9) CSEL, CSELW, CSNEG, CSNEGW, CSINC, CSINCW <cond>, <Rn>, <Rm>, <Rd> ; +FCSELD, FCSELS <cond>, <Fn>, <Fm>, <Fd> + +Examples: + + CSEL GT, R0, R19, R1 <=> csel x1, x0, x19, gt + CSNEGW GT, R7, R17, R8 <=> csneg w8, w7, w17, gt + FCSELD EQ, F15, F18, F16 <=> fcsel d16, d15, d18, eq + +(10) TBNZ, TBZ $<imm>, <Rt>, <label> + +(11) STLXR, STLXRW, STXR, STXRW, STLXRB, STLXRH, STXRB, STXRH <Rf>, (<Rn|RSP>), <Rs> + +Examples: + + STLXR ZR, (R15), R16 <=> stlxr w16, xzr, [x15] + STXRB R9, (R21), R19 <=> stxrb w19, w9, [x21] + +(12) STLXP, STLXPW, STXP, STXPW (<Rf1>, <Rf2>), (<Rn|RSP>), <Rs> + +Examples: + + STLXP (R17, R19), (R4), R5 <=> stlxp w5, x17, x19, [x4] + STXPW (R30, R25), (R22), R13 <=> stxp w13, w30, w25, [x22] + +2. Expressions for special arguments. + +#<immediate> is written as $<immediate>. + +Optionally-shifted immediate. + +Examples: + + ADD $(3151<<12), R14, R20 <=> add x20, x14, #0xc4f, lsl #12 + ADDW $1864, R25, R6 <=> add w6, w25, #0x748 + +Optionally-shifted registers are written as <Rm>{<shift><amount>}. +The <shift> can be <<(lsl), >>(lsr), ->(asr), @>(ror). + +Examples: + + ADD R19>>30, R10, R24 <=> add x24, x10, x19, lsr #30 + ADDW R26->24, R21, R15 <=> add w15, w21, w26, asr #24 + +Extended registers are written as <Rm>{.<extend>{<<<amount>}}. +<extend> can be UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW or SXTX. + +Examples: + + ADDS R19.UXTB<<4, R9, R26 <=> adds x26, x9, w19, uxtb #4 + ADDSW R14.SXTX, R14, R6 <=> adds w6, w14, w14, sxtx + +Memory references: [<Xn|SP>{,#0}] is written as (Rn|RSP), a base register and an immediate +offset is written as imm(Rn|RSP), a base register and an offset register is written as (Rn|RSP)(Rm). + +Examples: + + LDAR (R22), R9 <=> ldar x9, [x22] + LDP 28(R17), (R15, R23) <=> ldp x15, x23, [x17,#28] + MOVWU (R4)(R12<<2), R8 <=> ldr w8, [x4, x12, lsl #2] + MOVD (R7)(R11.UXTW<<3), R25 <=> ldr x25, [x7,w11,uxtw #3] + MOVBU (R27)(R23), R14 <=> ldrb w14, [x27,x23] + +Register pairs are written as (Rt1, Rt2). + +Examples: + + LDP.P -240(R11), (R12, R26) <=> ldp x12, x26, [x11],#-240 + +Register with arrangement and register with arrangement and index. + +Examples: + + VADD V5.H8, V18.H8, V9.H8 <=> add v9.8h, v18.8h, v5.8h + VLD1 (R2), [V21.B16] <=> ld1 {v21.16b}, [x2] + VST1.P V9.S[1], (R16)(R21) <=> st1 {v9.s}[1], [x16], x28 + VST1.P [V13.H8, V14.H8, V15.H8], (R3)(R14) <=> st1 {v13.8h-v15.8h}, [x3], x14 + VST1.P [V14.D1, V15.D1], (R7)(R23) <=> st1 {v14.1d, v15.1d}, [x7], x23 +*/ +package arm64 diff --git a/src/cmd/internal/obj/arm64/list7.go b/src/cmd/internal/obj/arm64/list7.go new file mode 100644 index 0000000..0187ad3 --- /dev/null +++ b/src/cmd/internal/obj/arm64/list7.go @@ -0,0 +1,255 @@ +// cmd/7l/list.c and cmd/7l/sub.c from Vita Nuova. +// https://code.google.com/p/ken-cc/source/browse/ +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm64 + +import ( + "cmd/internal/obj" + "fmt" +) + +var strcond = [16]string{ + "EQ", + "NE", + "HS", + "LO", + "MI", + "PL", + "VS", + "VC", + "HI", + "LS", + "GE", + "LT", + "GT", + "LE", + "AL", + "NV", +} + +func init() { + obj.RegisterRegister(obj.RBaseARM64, REG_SPECIAL+1024, rconv) + obj.RegisterOpcode(obj.ABaseARM64, Anames) + obj.RegisterRegisterList(obj.RegListARM64Lo, obj.RegListARM64Hi, rlconv) + obj.RegisterOpSuffix("arm64", obj.CConvARM) + obj.RegisterSpecialOperands(int64(SPOP_BEGIN), int64(SPOP_END), SPCconv) +} + +func arrange(a int) string { + switch a { + case ARNG_8B: + return "B8" + case ARNG_16B: + return "B16" + case ARNG_4H: + return "H4" + case ARNG_8H: + return "H8" + case ARNG_2S: + return "S2" + case ARNG_4S: + return "S4" + case ARNG_1D: + return "D1" + case ARNG_2D: + return "D2" + case ARNG_B: + return "B" + case ARNG_H: + return "H" + case ARNG_S: + return "S" + case ARNG_D: + return "D" + case ARNG_1Q: + return "Q1" + default: + return "" + } +} + +func rconv(r int) string { + ext := (r >> 5) & 7 + if r == REGG { + return "g" + } + switch { + case REG_R0 <= r && r <= REG_R30: + return fmt.Sprintf("R%d", r-REG_R0) + case r == REG_R31: + return "ZR" + case REG_F0 <= r && r <= REG_F31: + return fmt.Sprintf("F%d", r-REG_F0) + case REG_V0 <= r && r <= REG_V31: + return fmt.Sprintf("V%d", r-REG_V0) + case r == REGSP: + return "RSP" + case REG_UXTB <= r && r < REG_UXTH: + if ext != 0 { + return fmt.Sprintf("%s.UXTB<<%d", regname(r), ext) + } else { + return fmt.Sprintf("%s.UXTB", regname(r)) + } + case REG_UXTH <= r && r < REG_UXTW: + if ext != 0 { + return fmt.Sprintf("%s.UXTH<<%d", regname(r), ext) + } else { + return fmt.Sprintf("%s.UXTH", regname(r)) + } + case REG_UXTW <= r && r < REG_UXTX: + if ext != 0 { + return fmt.Sprintf("%s.UXTW<<%d", regname(r), ext) + } else { + return fmt.Sprintf("%s.UXTW", regname(r)) + } + case REG_UXTX <= r && r < REG_SXTB: + if ext != 0 { + return fmt.Sprintf("%s.UXTX<<%d", regname(r), ext) + } else { + return fmt.Sprintf("%s.UXTX", regname(r)) + } + case REG_SXTB <= r && r < REG_SXTH: + if ext != 0 { + return fmt.Sprintf("%s.SXTB<<%d", regname(r), ext) + } else { + return fmt.Sprintf("%s.SXTB", regname(r)) + } + case REG_SXTH <= r && r < REG_SXTW: + if ext != 0 { + return fmt.Sprintf("%s.SXTH<<%d", regname(r), ext) + } else { + return fmt.Sprintf("%s.SXTH", regname(r)) + } + case REG_SXTW <= r && r < REG_SXTX: + if ext != 0 { + return fmt.Sprintf("%s.SXTW<<%d", regname(r), ext) + } else { + return fmt.Sprintf("%s.SXTW", regname(r)) + } + case REG_SXTX <= r && r < REG_SPECIAL: + if ext != 0 { + return fmt.Sprintf("%s.SXTX<<%d", regname(r), ext) + } else { + return fmt.Sprintf("%s.SXTX", regname(r)) + } + // bits 0-4 indicate register, bits 5-7 indicate shift amount, bit 8 equals to 0. + case REG_LSL <= r && r < (REG_LSL+1<<8): + return fmt.Sprintf("R%d<<%d", r&31, (r>>5)&7) + case REG_ARNG <= r && r < REG_ELEM: + return fmt.Sprintf("V%d.%s", r&31, arrange((r>>5)&15)) + case REG_ELEM <= r && r < REG_ELEM_END: + return fmt.Sprintf("V%d.%s", r&31, arrange((r>>5)&15)) + } + // Return system register name. + name, _, _ := SysRegEnc(int16(r)) + if name != "" { + return name + } + return fmt.Sprintf("badreg(%d)", r) +} + +func DRconv(a int) string { + if a >= C_NONE && a <= C_NCLASS { + return cnames7[a] + } + return "C_??" +} + +func SPCconv(a int64) string { + spc := SpecialOperand(a) + if spc >= SPOP_BEGIN && spc < SPOP_END { + return fmt.Sprintf("%s", spc) + } + return "SPC_??" +} + +func rlconv(list int64) string { + str := "" + + // ARM64 register list follows ARM64 instruction decode schema + // | 31 | 30 | ... | 15 - 12 | 11 - 10 | ... | + // +----+----+-----+---------+---------+-----+ + // | | Q | ... | opcode | size | ... | + + firstReg := int(list & 31) + opcode := (list >> 12) & 15 + var regCnt int + var t string + switch opcode { + case 0x7: + regCnt = 1 + case 0xa: + regCnt = 2 + case 0x6: + regCnt = 3 + case 0x2: + regCnt = 4 + default: + regCnt = -1 + } + // Q:size + arng := ((list>>30)&1)<<2 | (list>>10)&3 + switch arng { + case 0: + t = "B8" + case 4: + t = "B16" + case 1: + t = "H4" + case 5: + t = "H8" + case 2: + t = "S2" + case 6: + t = "S4" + case 3: + t = "D1" + case 7: + t = "D2" + } + for i := 0; i < regCnt; i++ { + if str == "" { + str += "[" + } else { + str += "," + } + str += fmt.Sprintf("V%d.", (firstReg+i)&31) + str += t + } + str += "]" + return str +} + +func regname(r int) string { + if r&31 == 31 { + return "ZR" + } + return fmt.Sprintf("R%d", r&31) +} diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go new file mode 100644 index 0000000..6c2cb63 --- /dev/null +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -0,0 +1,1095 @@ +// cmd/7l/noop.c, cmd/7l/obj.c, cmd/ld/pass.c from Vita Nuova. +// https://code.google.com/p/ken-cc/source/browse/ +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm64 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/src" + "cmd/internal/sys" + "internal/buildcfg" + "log" + "math" +) + +// zrReplace is the set of instructions for which $0 in the From operand +// should be replaced with REGZERO. +var zrReplace = map[obj.As]bool{ + AMOVD: true, + AMOVW: true, + AMOVWU: true, + AMOVH: true, + AMOVHU: true, + AMOVB: true, + AMOVBU: true, + ASBC: true, + ASBCW: true, + ASBCS: true, + ASBCSW: true, + AADC: true, + AADCW: true, + AADCS: true, + AADCSW: true, + AFMOVD: true, + AFMOVS: true, + AMSR: true, +} + +func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { + if c.ctxt.Flag_maymorestack != "" { + p = c.cursym.Func().SpillRegisterArgs(p, c.newprog) + + // Save LR and make room for FP, REGCTXT. Leave room + // for caller's saved FP. + const frameSize = 32 + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_MEM + p.Scond = C_XPRE + p.To.Offset = -frameSize + p.To.Reg = REGSP + p.Spadj = frameSize + + // Save FP. + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REGFP + p.To.Type = obj.TYPE_MEM + p.To.Reg = REGSP + p.To.Offset = -8 + + p = obj.Appendp(p, c.newprog) + p.As = ASUB + p.From.Type = obj.TYPE_CONST + p.From.Offset = 8 + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP + + // Save REGCTXT (for simplicity we do this whether or + // not we need it.) + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REGCTXT + p.To.Type = obj.TYPE_MEM + p.To.Reg = REGSP + p.To.Offset = 8 + + // BL maymorestack + p = obj.Appendp(p, c.newprog) + p.As = ABL + p.To.Type = obj.TYPE_BRANCH + // See ../x86/obj6.go + p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI()) + + // Restore REGCTXT. + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + p.From.Offset = 8 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGCTXT + + // Restore FP. + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + p.From.Offset = -8 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP + + // Restore LR and SP. + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.Scond = C_XPOST + p.From.Offset = frameSize + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + p.Spadj = -frameSize + + p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog) + } + + // Jump back to here after morestack returns. + startPred := p + + // MOV g_stackguard(g), RT1 + p = obj.Appendp(p, c.newprog) + + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0 + if c.cursym.CFunc() { + p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REGRT1 + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + q := (*obj.Prog)(nil) + if framesize <= objabi.StackSmall { + // small stack: SP < stackguard + // CMP stackguard, SP + + p = obj.Appendp(p, c.newprog) + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REGRT1 + p.Reg = REGSP + } else if framesize <= objabi.StackBig { + // large stack: SP-framesize < stackguard-StackSmall + // SUB $(framesize-StackSmall), SP, RT2 + // CMP stackguard, RT2 + p = obj.Appendp(p, c.newprog) + + p.As = ASUB + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(framesize) - objabi.StackSmall + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGRT2 + + p = obj.Appendp(p, c.newprog) + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REGRT1 + p.Reg = REGRT2 + } else { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // SUBS $(framesize-StackSmall), SP, RT2 + // // On underflow, jump to morestack + // BLO label_of_call_to_morestack + // CMP stackguard, RT2 + + p = obj.Appendp(p, c.newprog) + p.As = ASUBS + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(framesize) - objabi.StackSmall + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGRT2 + + p = obj.Appendp(p, c.newprog) + q = p + p.As = ABLO + p.To.Type = obj.TYPE_BRANCH + + p = obj.Appendp(p, c.newprog) + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REGRT1 + p.Reg = REGRT2 + } + + // BLS do-morestack + bls := obj.Appendp(p, c.newprog) + bls.As = ABLS + bls.To.Type = obj.TYPE_BRANCH + + end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) + + var last *obj.Prog + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { + } + + // Now we are at the end of the function, but logically + // we are still in function prologue. We need to fix the + // SP data and PCDATA. + spfix := obj.Appendp(last, c.newprog) + spfix.As = obj.ANOP + spfix.Spadj = -framesize + + pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog) + pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog) + + if q != nil { + q.To.SetTarget(pcdata) + } + bls.To.SetTarget(pcdata) + + spill := c.cursym.Func().SpillRegisterArgs(pcdata, c.newprog) + + // MOV LR, R3 + movlr := obj.Appendp(spill, c.newprog) + movlr.As = AMOVD + movlr.From.Type = obj.TYPE_REG + movlr.From.Reg = REGLINK + movlr.To.Type = obj.TYPE_REG + movlr.To.Reg = REG_R3 + + debug := movlr + if false { + debug = obj.Appendp(debug, c.newprog) + debug.As = AMOVD + debug.From.Type = obj.TYPE_CONST + debug.From.Offset = int64(framesize) + debug.To.Type = obj.TYPE_REG + debug.To.Reg = REGTMP + } + + // BL runtime.morestack(SB) + call := obj.Appendp(debug, c.newprog) + call.As = ABL + call.To.Type = obj.TYPE_BRANCH + morestack := "runtime.morestack" + switch { + case c.cursym.CFunc(): + morestack = "runtime.morestackc" + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): + morestack = "runtime.morestack_noctxt" + } + call.To.Sym = c.ctxt.Lookup(morestack) + + unspill := c.cursym.Func().UnspillRegisterArgs(call, c.newprog) + pcdata = c.ctxt.EndUnsafePoint(unspill, c.newprog, -1) + + // B start + jmp := obj.Appendp(pcdata, c.newprog) + jmp.As = AB + jmp.To.Type = obj.TYPE_BRANCH + jmp.To.SetTarget(startPred.Link) + jmp.Spadj = +framesize + + return end +} + +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + c := ctxt7{ctxt: ctxt, newprog: newprog} + + p.From.Class = 0 + p.To.Class = 0 + + // Previously we rewrote $0 to ZR, but we have now removed this change. + // In order to be compatible with some previous legal instruction formats, + // reserve the previous conversion for some specific instructions. + if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 && zrReplace[p.As] { + p.From.Type = obj.TYPE_REG + p.From.Reg = REGZERO + } + + // Rewrite BR/BL to symbol as TYPE_BRANCH. + switch p.As { + case AB, + ABL, + obj.ARET, + obj.ADUFFZERO, + obj.ADUFFCOPY: + if p.To.Sym != nil { + p.To.Type = obj.TYPE_BRANCH + } + break + } + + // Rewrite float constants to values stored in memory. + switch p.As { + case AFMOVS: + if p.From.Type == obj.TYPE_FCONST { + f64 := p.From.Val.(float64) + f32 := float32(f64) + if c.chipfloat7(f64) > 0 { + break + } + if math.Float32bits(f32) == 0 { + p.From.Type = obj.TYPE_REG + p.From.Reg = REGZERO + break + } + p.From.Type = obj.TYPE_MEM + p.From.Sym = c.ctxt.Float32Sym(f32) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + case AFMOVD: + if p.From.Type == obj.TYPE_FCONST { + f64 := p.From.Val.(float64) + if c.chipfloat7(f64) > 0 { + break + } + if math.Float64bits(f64) == 0 { + p.From.Type = obj.TYPE_REG + p.From.Reg = REGZERO + break + } + p.From.Type = obj.TYPE_MEM + p.From.Sym = c.ctxt.Float64Sym(f64) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + break + } + + if c.ctxt.Flag_dynlink { + c.rewriteToUseGot(p) + } +} + +// Rewrite p, if necessary, to access global data via the global offset table. +func (c *ctxt7) rewriteToUseGot(p *obj.Prog) { + if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { + // ADUFFxxx $offset + // becomes + // MOVD runtime.duffxxx@GOT, REGTMP + // ADD $offset, REGTMP + // CALL REGTMP + var sym *obj.LSym + if p.As == obj.ADUFFZERO { + sym = c.ctxt.LookupABI("runtime.duffzero", obj.ABIInternal) + } else { + sym = c.ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal) + } + offset := p.To.Offset + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + p.From.Sym = sym + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + p.To.Name = obj.NAME_NONE + p.To.Offset = 0 + p.To.Sym = nil + p1 := obj.Appendp(p, c.newprog) + p1.As = AADD + p1.From.Type = obj.TYPE_CONST + p1.From.Offset = offset + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REGTMP + p2 := obj.Appendp(p1, c.newprog) + p2.As = obj.ACALL + p2.To.Type = obj.TYPE_REG + p2.To.Reg = REGTMP + } + + // We only care about global data: NAME_EXTERN means a global + // symbol in the Go sense, and p.Sym.Local is true for a few + // internally defined symbols. + if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // MOVD $sym, Rx becomes MOVD sym@GOT, Rx + // MOVD $sym+<off>, Rx becomes MOVD sym@GOT, Rx; ADD <off>, Rx + if p.As != AMOVD { + c.ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -dynlink", p) + } + if p.To.Type != obj.TYPE_REG { + c.ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -dynlink", p) + } + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + if p.From.Offset != 0 { + q := obj.Appendp(p, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = p.From.Offset + q.To = p.To + p.From.Offset = 0 + } + } + if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + var source *obj.Addr + // MOVx sym, Ry becomes MOVD sym@GOT, REGTMP; MOVx (REGTMP), Ry + // MOVx Ry, sym becomes MOVD sym@GOT, REGTMP; MOVD Ry, (REGTMP) + // An addition may be inserted between the two MOVs if there is an offset. + if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + c.ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) + } + source = &p.From + } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + source = &p.To + } else { + return + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { + return + } + if source.Sym.Type == objabi.STLSBSS { + return + } + if source.Type != obj.TYPE_MEM { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + p1 := obj.Appendp(p, c.newprog) + p2 := obj.Appendp(p1, c.newprog) + p1.As = AMOVD + p1.From.Type = obj.TYPE_MEM + p1.From.Sym = source.Sym + p1.From.Name = obj.NAME_GOTREF + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REGTMP + + p2.As = p.As + p2.From = p.From + p2.To = p.To + if p.From.Name == obj.NAME_EXTERN { + p2.From.Reg = REGTMP + p2.From.Name = obj.NAME_NONE + p2.From.Sym = nil + } else if p.To.Name == obj.NAME_EXTERN { + p2.To.Reg = REGTMP + p2.To.Name = obj.NAME_NONE + p2.To.Sym = nil + } else { + return + } + obj.Nopout(p) +} + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym} + + p := c.cursym.Func().Text + textstksiz := p.To.Offset + if textstksiz == -8 { + // Historical way to mark NOFRAME. + p.From.Sym.Set(obj.AttrNoFrame, true) + textstksiz = 0 + } + if textstksiz < 0 { + c.ctxt.Diag("negative frame size %d - did you mean NOFRAME?", textstksiz) + } + if p.From.Sym.NoFrame() { + if textstksiz != 0 { + c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) + } + } + + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) + + /* + * find leaf subroutines + */ + for p := c.cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.ATEXT: + p.Mark |= LEAF + + case ABL, + obj.ADUFFZERO, + obj.ADUFFCOPY: + c.cursym.Func().Text.Mark &^= LEAF + } + } + + var q *obj.Prog + var q1 *obj.Prog + var retjmp *obj.LSym + for p := c.cursym.Func().Text; p != nil; p = p.Link { + o := p.As + switch o { + case obj.ATEXT: + c.cursym.Func().Text = p + c.autosize = int32(textstksiz) + + if p.Mark&LEAF != 0 && c.autosize == 0 { + // A leaf function with no locals has no frame. + p.From.Sym.Set(obj.AttrNoFrame, true) + } + + if !p.From.Sym.NoFrame() { + // If there is a stack frame at all, it includes + // space to save the LR. + c.autosize += 8 + } + + if c.autosize != 0 { + extrasize := int32(0) + if c.autosize%16 == 8 { + // Allocate extra 8 bytes on the frame top to save FP + extrasize = 8 + } else if c.autosize&(16-1) == 0 { + // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16 + extrasize = 16 + } else { + c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) + } + c.autosize += extrasize + c.cursym.Func().Locals += extrasize + + // low 32 bits for autosize + // high 32 bits for extrasize + p.To.Offset = int64(c.autosize) | int64(extrasize)<<32 + } else { + // NOFRAME + p.To.Offset = 0 + } + + if c.autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { + if c.ctxt.Debugvlog { + c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func().Text.From.Sym.Name) + } + c.cursym.Func().Text.Mark |= LEAF + } + + if cursym.Func().Text.Mark&LEAF != 0 { + cursym.Set(obj.AttrLeaf, true) + if p.From.Sym.NoFrame() { + break + } + } + + if p.Mark&LEAF != 0 && c.autosize < objabi.StackSmall { + // A leaf function with a small stack can be marked + // NOSPLIT, avoiding a stack check. + p.From.Sym.Set(obj.AttrNoSplit, true) + } + + if !p.From.Sym.NoSplit() { + p = c.stacksplit(p, c.autosize) // emit split check + } + + var prologueEnd *obj.Prog + + aoffset := c.autosize + if aoffset > 0xf0 { + // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned. + // so the maximum aoffset value is 0xf0. + aoffset = 0xf0 + } + + // Frame is non-empty. Make sure to save link register, even if + // it is a leaf function, so that traceback works. + q = p + if c.autosize > aoffset { + // Frame size is too large for a MOVD.W instruction. Store the frame pointer + // register and link register before decrementing SP, so if a signal comes + // during the execution of the function prologue, the traceback code will + // not see a half-updated stack frame. + + // SUB $autosize, RSP, R20 + q1 = obj.Appendp(q, c.newprog) + q1.Pos = p.Pos + q1.As = ASUB + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = int64(c.autosize) + q1.Reg = REGSP + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REG_R20 + + prologueEnd = q1 + + // STP (R29, R30), -8(R20) + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASTP + q1.From.Type = obj.TYPE_REGREG + q1.From.Reg = REGFP + q1.From.Offset = REGLINK + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REG_R20 + q1.To.Offset = -8 + + // This is not async preemptible, as if we open a frame + // at the current SP, it will clobber the saved LR. + q1 = c.ctxt.StartUnsafePoint(q1, c.newprog) + + // MOVD R20, RSP + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REG_R20 + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGSP + q1.Spadj = c.autosize + + q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1) + + if buildcfg.GOOS == "ios" { + // iOS does not support SA_ONSTACK. We will run the signal handler + // on the G stack. If we write below SP, it may be clobbered by + // the signal handler. So we save FP and LR after decrementing SP. + // STP (R29, R30), -8(RSP) + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASTP + q1.From.Type = obj.TYPE_REGREG + q1.From.Reg = REGFP + q1.From.Offset = REGLINK + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REGSP + q1.To.Offset = -8 + } + } else { + // small frame, update SP and save LR in a single MOVD.W instruction. + // So if a signal comes during the execution of the function prologue, + // the traceback code will not see a half-updated stack frame. + // Also, on Linux, in a cgo binary we may get a SIGSETXID signal + // early on before the signal stack is set, as glibc doesn't allow + // us to block SIGSETXID. So it is important that we don't write below + // the SP until the signal stack is set. + // Luckily, all the functions from thread entry to setting the signal + // stack have small frames. + q1 = obj.Appendp(q, c.newprog) + q1.As = AMOVD + q1.Pos = p.Pos + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGLINK + q1.To.Type = obj.TYPE_MEM + q1.Scond = C_XPRE + q1.To.Offset = int64(-aoffset) + q1.To.Reg = REGSP + q1.Spadj = aoffset + + prologueEnd = q1 + + // Frame pointer. + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGFP + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REGSP + q1.To.Offset = -8 + } + + prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd) + + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASUB + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = 8 + q1.Reg = REGSP + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGFP + + if c.cursym.Func().Text.From.Sym.Wrapper() { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOV g_panic(g), RT1 + // CBNZ checkargp + // end: + // NOP + // ... function body ... + // checkargp: + // MOV panic_argp(RT1), RT2 + // ADD $(autosize+8), RSP, R20 + // CMP RT2, R20 + // BNE end + // ADD $8, RSP, R20 + // MOVD R20, panic_argp(RT1) + // B end + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not an ARM64 NOP: it encodes to 0 instruction bytes. + q = q1 + + // MOV g_panic(g), RT1 + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_MEM + q.From.Reg = REGG + q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic + q.To.Type = obj.TYPE_REG + q.To.Reg = REGRT1 + + // CBNZ RT1, checkargp + cbnz := obj.Appendp(q, c.newprog) + cbnz.As = ACBNZ + cbnz.From.Type = obj.TYPE_REG + cbnz.From.Reg = REGRT1 + cbnz.To.Type = obj.TYPE_BRANCH + + // Empty branch target at the top of the function body + end := obj.Appendp(cbnz, c.newprog) + end.As = obj.ANOP + + // find the end of the function + var last *obj.Prog + for last = end; last.Link != nil; last = last.Link { + } + + // MOV panic_argp(RT1), RT2 + mov := obj.Appendp(last, c.newprog) + mov.As = AMOVD + mov.From.Type = obj.TYPE_MEM + mov.From.Reg = REGRT1 + mov.From.Offset = 0 // Panic.argp + mov.To.Type = obj.TYPE_REG + mov.To.Reg = REGRT2 + + // CBNZ branches to the MOV above + cbnz.To.SetTarget(mov) + + // ADD $(autosize+8), SP, R20 + q = obj.Appendp(mov, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(c.autosize) + 8 + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R20 + + // CMP RT2, R20 + q = obj.Appendp(q, c.newprog) + q.As = ACMP + q.From.Type = obj.TYPE_REG + q.From.Reg = REGRT2 + q.Reg = REG_R20 + + // BNE end + q = obj.Appendp(q, c.newprog) + q.As = ABNE + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(end) + + // ADD $8, SP, R20 + q = obj.Appendp(q, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = 8 + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R20 + + // MOV R20, panic_argp(RT1) + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R20 + q.To.Type = obj.TYPE_MEM + q.To.Reg = REGRT1 + q.To.Offset = 0 // Panic.argp + + // B end + q = obj.Appendp(q, c.newprog) + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(end) + } + + case obj.ARET: + nocache(p) + if p.From.Type == obj.TYPE_CONST { + c.ctxt.Diag("using BECOME (%v) is not supported!", p) + break + } + + retjmp = p.To.Sym + p.To = obj.Addr{} + if c.cursym.Func().Text.Mark&LEAF != 0 { + if c.autosize != 0 { + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(c.autosize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = -c.autosize + + // Frame pointer. + p = obj.Appendp(p, c.newprog) + p.As = ASUB + p.From.Type = obj.TYPE_CONST + p.From.Offset = 8 + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP + } + } else { + aoffset := c.autosize + // LDP -8(RSP), (R29, R30) + p.As = ALDP + p.From.Type = obj.TYPE_MEM + p.From.Offset = -8 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REGREG + p.To.Reg = REGFP + p.To.Offset = REGLINK + + // ADD $aoffset, RSP, RSP + q = newprog() + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(aoffset) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -aoffset + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + } + + // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC', + // so that if you are debugging a low-level crash where PC and LR are zero, + // you can look at R27 to see what jumped to the zero. + // This is useful when bringing up Go on a new system. + // (There is similar code in ../ppc64/obj9.go:/if.false.) + const debugRETZERO = false + if debugRETZERO { + if p.As != obj.ARET { + q = newprog() + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + } + p.As = AADR + p.From.Type = obj.TYPE_BRANCH + p.From.Offset = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + + } + + if p.As != obj.ARET { + q = newprog() + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + } + + if retjmp != nil { // retjmp + p.As = AB + p.To.Type = obj.TYPE_BRANCH + p.To.Sym = retjmp + p.Spadj = +c.autosize + break + } + + p.As = obj.ARET + p.To.Type = obj.TYPE_MEM + p.To.Offset = 0 + p.To.Reg = REGLINK + p.Spadj = +c.autosize + + case AADD, ASUB: + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { + if p.As == AADD { + p.Spadj = int32(-p.From.Offset) + } else { + p.Spadj = int32(+p.From.Offset) + } + } + + case obj.AGETCALLERPC: + if cursym.Leaf() { + /* MOVD LR, Rd */ + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + } else { + /* MOVD (RSP), Rd */ + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + } + + case obj.ADUFFCOPY: + // ADR ret_addr, R27 + // STP (FP, R27), -24(SP) + // SUB 24, SP, FP + // DUFFCOPY + // ret_addr: + // SUB 8, SP, FP + + q1 := p + // copy DUFFCOPY from q1 to q4 + q4 := obj.Appendp(p, c.newprog) + q4.Pos = p.Pos + q4.As = obj.ADUFFCOPY + q4.To = p.To + + q1.As = AADR + q1.From.Type = obj.TYPE_BRANCH + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REG_R27 + + q2 := obj.Appendp(q1, c.newprog) + q2.Pos = p.Pos + q2.As = ASTP + q2.From.Type = obj.TYPE_REGREG + q2.From.Reg = REGFP + q2.From.Offset = int64(REG_R27) + q2.To.Type = obj.TYPE_MEM + q2.To.Reg = REGSP + q2.To.Offset = -24 + + // maintain FP for DUFFCOPY + q3 := obj.Appendp(q2, c.newprog) + q3.Pos = p.Pos + q3.As = ASUB + q3.From.Type = obj.TYPE_CONST + q3.From.Offset = 24 + q3.Reg = REGSP + q3.To.Type = obj.TYPE_REG + q3.To.Reg = REGFP + + q5 := obj.Appendp(q4, c.newprog) + q5.Pos = p.Pos + q5.As = ASUB + q5.From.Type = obj.TYPE_CONST + q5.From.Offset = 8 + q5.Reg = REGSP + q5.To.Type = obj.TYPE_REG + q5.To.Reg = REGFP + q1.From.SetTarget(q5) + p = q5 + + case obj.ADUFFZERO: + // ADR ret_addr, R27 + // STP (FP, R27), -24(SP) + // SUB 24, SP, FP + // DUFFZERO + // ret_addr: + // SUB 8, SP, FP + + q1 := p + // copy DUFFZERO from q1 to q4 + q4 := obj.Appendp(p, c.newprog) + q4.Pos = p.Pos + q4.As = obj.ADUFFZERO + q4.To = p.To + + q1.As = AADR + q1.From.Type = obj.TYPE_BRANCH + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REG_R27 + + q2 := obj.Appendp(q1, c.newprog) + q2.Pos = p.Pos + q2.As = ASTP + q2.From.Type = obj.TYPE_REGREG + q2.From.Reg = REGFP + q2.From.Offset = int64(REG_R27) + q2.To.Type = obj.TYPE_MEM + q2.To.Reg = REGSP + q2.To.Offset = -24 + + // maintain FP for DUFFZERO + q3 := obj.Appendp(q2, c.newprog) + q3.Pos = p.Pos + q3.As = ASUB + q3.From.Type = obj.TYPE_CONST + q3.From.Offset = 24 + q3.Reg = REGSP + q3.To.Type = obj.TYPE_REG + q3.To.Reg = REGFP + + q5 := obj.Appendp(q4, c.newprog) + q5.Pos = p.Pos + q5.As = ASUB + q5.From.Type = obj.TYPE_CONST + q5.From.Offset = 8 + q5.Reg = REGSP + q5.To.Type = obj.TYPE_REG + q5.To.Reg = REGFP + q1.From.SetTarget(q5) + p = q5 + } + + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { + f := c.cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + if p.From.Type == obj.TYPE_SHIFT && (p.To.Reg == REG_RSP || p.Reg == REG_RSP) { + offset := p.From.Offset + op := offset & (3 << 22) + if op != SHIFT_LL { + ctxt.Diag("illegal combination: %v", p) + } + r := (offset >> 16) & 31 + shift := (offset >> 10) & 63 + if shift > 4 { + // the shift amount is out of range, in order to avoid repeated error + // reportings, don't call ctxt.Diag, because asmout case 27 has the + // same check. + shift = 7 + } + p.From.Type = obj.TYPE_REG + p.From.Reg = int16(REG_LSL + r + (shift&7)<<5) + p.From.Offset = 0 + } + } +} + +func nocache(p *obj.Prog) { + p.Optab = 0 + p.From.Class = 0 + p.To.Class = 0 +} + +var unaryDst = map[obj.As]bool{ + AWORD: true, + ADWORD: true, + ABL: true, + AB: true, + ACLREX: true, +} + +var Linkarm64 = obj.LinkArch{ + Arch: sys.ArchARM64, + Init: buildop, + Preprocess: preprocess, + Assemble: span7, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: ARM64DWARFRegisters, +} diff --git a/src/cmd/internal/obj/arm64/specialoperand_string.go b/src/cmd/internal/obj/arm64/specialoperand_string.go new file mode 100644 index 0000000..0a73c69 --- /dev/null +++ b/src/cmd/internal/obj/arm64/specialoperand_string.go @@ -0,0 +1,166 @@ +// Code generated by "stringer -type SpecialOperand -trimprefix SPOP_"; DO NOT EDIT. + +package arm64 + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[SPOP_PLDL1KEEP-0] + _ = x[SPOP_BEGIN-0] + _ = x[SPOP_PLDL1STRM-1] + _ = x[SPOP_PLDL2KEEP-2] + _ = x[SPOP_PLDL2STRM-3] + _ = x[SPOP_PLDL3KEEP-4] + _ = x[SPOP_PLDL3STRM-5] + _ = x[SPOP_PLIL1KEEP-6] + _ = x[SPOP_PLIL1STRM-7] + _ = x[SPOP_PLIL2KEEP-8] + _ = x[SPOP_PLIL2STRM-9] + _ = x[SPOP_PLIL3KEEP-10] + _ = x[SPOP_PLIL3STRM-11] + _ = x[SPOP_PSTL1KEEP-12] + _ = x[SPOP_PSTL1STRM-13] + _ = x[SPOP_PSTL2KEEP-14] + _ = x[SPOP_PSTL2STRM-15] + _ = x[SPOP_PSTL3KEEP-16] + _ = x[SPOP_PSTL3STRM-17] + _ = x[SPOP_VMALLE1IS-18] + _ = x[SPOP_VAE1IS-19] + _ = x[SPOP_ASIDE1IS-20] + _ = x[SPOP_VAAE1IS-21] + _ = x[SPOP_VALE1IS-22] + _ = x[SPOP_VAALE1IS-23] + _ = x[SPOP_VMALLE1-24] + _ = x[SPOP_VAE1-25] + _ = x[SPOP_ASIDE1-26] + _ = x[SPOP_VAAE1-27] + _ = x[SPOP_VALE1-28] + _ = x[SPOP_VAALE1-29] + _ = x[SPOP_IPAS2E1IS-30] + _ = x[SPOP_IPAS2LE1IS-31] + _ = x[SPOP_ALLE2IS-32] + _ = x[SPOP_VAE2IS-33] + _ = x[SPOP_ALLE1IS-34] + _ = x[SPOP_VALE2IS-35] + _ = x[SPOP_VMALLS12E1IS-36] + _ = x[SPOP_IPAS2E1-37] + _ = x[SPOP_IPAS2LE1-38] + _ = x[SPOP_ALLE2-39] + _ = x[SPOP_VAE2-40] + _ = x[SPOP_ALLE1-41] + _ = x[SPOP_VALE2-42] + _ = x[SPOP_VMALLS12E1-43] + _ = x[SPOP_ALLE3IS-44] + _ = x[SPOP_VAE3IS-45] + _ = x[SPOP_VALE3IS-46] + _ = x[SPOP_ALLE3-47] + _ = x[SPOP_VAE3-48] + _ = x[SPOP_VALE3-49] + _ = x[SPOP_VMALLE1OS-50] + _ = x[SPOP_VAE1OS-51] + _ = x[SPOP_ASIDE1OS-52] + _ = x[SPOP_VAAE1OS-53] + _ = x[SPOP_VALE1OS-54] + _ = x[SPOP_VAALE1OS-55] + _ = x[SPOP_RVAE1IS-56] + _ = x[SPOP_RVAAE1IS-57] + _ = x[SPOP_RVALE1IS-58] + _ = x[SPOP_RVAALE1IS-59] + _ = x[SPOP_RVAE1OS-60] + _ = x[SPOP_RVAAE1OS-61] + _ = x[SPOP_RVALE1OS-62] + _ = x[SPOP_RVAALE1OS-63] + _ = x[SPOP_RVAE1-64] + _ = x[SPOP_RVAAE1-65] + _ = x[SPOP_RVALE1-66] + _ = x[SPOP_RVAALE1-67] + _ = x[SPOP_RIPAS2E1IS-68] + _ = x[SPOP_RIPAS2LE1IS-69] + _ = x[SPOP_ALLE2OS-70] + _ = x[SPOP_VAE2OS-71] + _ = x[SPOP_ALLE1OS-72] + _ = x[SPOP_VALE2OS-73] + _ = x[SPOP_VMALLS12E1OS-74] + _ = x[SPOP_RVAE2IS-75] + _ = x[SPOP_RVALE2IS-76] + _ = x[SPOP_IPAS2E1OS-77] + _ = x[SPOP_RIPAS2E1-78] + _ = x[SPOP_RIPAS2E1OS-79] + _ = x[SPOP_IPAS2LE1OS-80] + _ = x[SPOP_RIPAS2LE1-81] + _ = x[SPOP_RIPAS2LE1OS-82] + _ = x[SPOP_RVAE2OS-83] + _ = x[SPOP_RVALE2OS-84] + _ = x[SPOP_RVAE2-85] + _ = x[SPOP_RVALE2-86] + _ = x[SPOP_ALLE3OS-87] + _ = x[SPOP_VAE3OS-88] + _ = x[SPOP_VALE3OS-89] + _ = x[SPOP_RVAE3IS-90] + _ = x[SPOP_RVALE3IS-91] + _ = x[SPOP_RVAE3OS-92] + _ = x[SPOP_RVALE3OS-93] + _ = x[SPOP_RVAE3-94] + _ = x[SPOP_RVALE3-95] + _ = x[SPOP_IVAC-96] + _ = x[SPOP_ISW-97] + _ = x[SPOP_CSW-98] + _ = x[SPOP_CISW-99] + _ = x[SPOP_ZVA-100] + _ = x[SPOP_CVAC-101] + _ = x[SPOP_CVAU-102] + _ = x[SPOP_CIVAC-103] + _ = x[SPOP_IGVAC-104] + _ = x[SPOP_IGSW-105] + _ = x[SPOP_IGDVAC-106] + _ = x[SPOP_IGDSW-107] + _ = x[SPOP_CGSW-108] + _ = x[SPOP_CGDSW-109] + _ = x[SPOP_CIGSW-110] + _ = x[SPOP_CIGDSW-111] + _ = x[SPOP_GVA-112] + _ = x[SPOP_GZVA-113] + _ = x[SPOP_CGVAC-114] + _ = x[SPOP_CGDVAC-115] + _ = x[SPOP_CGVAP-116] + _ = x[SPOP_CGDVAP-117] + _ = x[SPOP_CGVADP-118] + _ = x[SPOP_CGDVADP-119] + _ = x[SPOP_CIGVAC-120] + _ = x[SPOP_CIGDVAC-121] + _ = x[SPOP_CVAP-122] + _ = x[SPOP_CVADP-123] + _ = x[SPOP_DAIFSet-124] + _ = x[SPOP_DAIFClr-125] + _ = x[SPOP_EQ-126] + _ = x[SPOP_NE-127] + _ = x[SPOP_HS-128] + _ = x[SPOP_LO-129] + _ = x[SPOP_MI-130] + _ = x[SPOP_PL-131] + _ = x[SPOP_VS-132] + _ = x[SPOP_VC-133] + _ = x[SPOP_HI-134] + _ = x[SPOP_LS-135] + _ = x[SPOP_GE-136] + _ = x[SPOP_LT-137] + _ = x[SPOP_GT-138] + _ = x[SPOP_LE-139] + _ = x[SPOP_AL-140] + _ = x[SPOP_NV-141] + _ = x[SPOP_END-142] +} + +const _SpecialOperand_name = "PLDL1KEEPPLDL1STRMPLDL2KEEPPLDL2STRMPLDL3KEEPPLDL3STRMPLIL1KEEPPLIL1STRMPLIL2KEEPPLIL2STRMPLIL3KEEPPLIL3STRMPSTL1KEEPPSTL1STRMPSTL2KEEPPSTL2STRMPSTL3KEEPPSTL3STRMVMALLE1ISVAE1ISASIDE1ISVAAE1ISVALE1ISVAALE1ISVMALLE1VAE1ASIDE1VAAE1VALE1VAALE1IPAS2E1ISIPAS2LE1ISALLE2ISVAE2ISALLE1ISVALE2ISVMALLS12E1ISIPAS2E1IPAS2LE1ALLE2VAE2ALLE1VALE2VMALLS12E1ALLE3ISVAE3ISVALE3ISALLE3VAE3VALE3VMALLE1OSVAE1OSASIDE1OSVAAE1OSVALE1OSVAALE1OSRVAE1ISRVAAE1ISRVALE1ISRVAALE1ISRVAE1OSRVAAE1OSRVALE1OSRVAALE1OSRVAE1RVAAE1RVALE1RVAALE1RIPAS2E1ISRIPAS2LE1ISALLE2OSVAE2OSALLE1OSVALE2OSVMALLS12E1OSRVAE2ISRVALE2ISIPAS2E1OSRIPAS2E1RIPAS2E1OSIPAS2LE1OSRIPAS2LE1RIPAS2LE1OSRVAE2OSRVALE2OSRVAE2RVALE2ALLE3OSVAE3OSVALE3OSRVAE3ISRVALE3ISRVAE3OSRVALE3OSRVAE3RVALE3IVACISWCSWCISWZVACVACCVAUCIVACIGVACIGSWIGDVACIGDSWCGSWCGDSWCIGSWCIGDSWGVAGZVACGVACCGDVACCGVAPCGDVAPCGVADPCGDVADPCIGVACCIGDVACCVAPCVADPDAIFSetDAIFClrEQNEHSLOMIPLVSVCHILSGELTGTLEALNVEND" + +var _SpecialOperand_index = [...]uint16{0, 9, 18, 27, 36, 45, 54, 63, 72, 81, 90, 99, 108, 117, 126, 135, 144, 153, 162, 171, 177, 185, 192, 199, 207, 214, 218, 224, 229, 234, 240, 249, 259, 266, 272, 279, 286, 298, 305, 313, 318, 322, 327, 332, 342, 349, 355, 362, 367, 371, 376, 385, 391, 399, 406, 413, 421, 428, 436, 444, 453, 460, 468, 476, 485, 490, 496, 502, 509, 519, 530, 537, 543, 550, 557, 569, 576, 584, 593, 601, 611, 621, 630, 641, 648, 656, 661, 667, 674, 680, 687, 694, 702, 709, 717, 722, 728, 732, 735, 738, 742, 745, 749, 753, 758, 763, 767, 773, 778, 782, 787, 792, 798, 801, 805, 810, 816, 821, 827, 833, 840, 846, 853, 857, 862, 869, 876, 878, 880, 882, 884, 886, 888, 890, 892, 894, 896, 898, 900, 902, 904, 906, 908, 911} + +func (i SpecialOperand) String() string { + if i < 0 || i >= SpecialOperand(len(_SpecialOperand_index)-1) { + return "SpecialOperand(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _SpecialOperand_name[_SpecialOperand_index[i]:_SpecialOperand_index[i+1]] +} diff --git a/src/cmd/internal/obj/arm64/sysRegEnc.go b/src/cmd/internal/obj/arm64/sysRegEnc.go new file mode 100644 index 0000000..7760a44 --- /dev/null +++ b/src/cmd/internal/obj/arm64/sysRegEnc.go @@ -0,0 +1,895 @@ +// Code generated by arm64gen -i ./files -o sysRegEnc.go. DO NOT EDIT. + +package arm64 + +const ( + SYSREG_BEGIN = REG_SPECIAL + iota + REG_ACTLR_EL1 + REG_AFSR0_EL1 + REG_AFSR1_EL1 + REG_AIDR_EL1 + REG_AMAIR_EL1 + REG_AMCFGR_EL0 + REG_AMCGCR_EL0 + REG_AMCNTENCLR0_EL0 + REG_AMCNTENCLR1_EL0 + REG_AMCNTENSET0_EL0 + REG_AMCNTENSET1_EL0 + REG_AMCR_EL0 + REG_AMEVCNTR00_EL0 + REG_AMEVCNTR01_EL0 + REG_AMEVCNTR02_EL0 + REG_AMEVCNTR03_EL0 + REG_AMEVCNTR04_EL0 + REG_AMEVCNTR05_EL0 + REG_AMEVCNTR06_EL0 + REG_AMEVCNTR07_EL0 + REG_AMEVCNTR08_EL0 + REG_AMEVCNTR09_EL0 + REG_AMEVCNTR010_EL0 + REG_AMEVCNTR011_EL0 + REG_AMEVCNTR012_EL0 + REG_AMEVCNTR013_EL0 + REG_AMEVCNTR014_EL0 + REG_AMEVCNTR015_EL0 + REG_AMEVCNTR10_EL0 + REG_AMEVCNTR11_EL0 + REG_AMEVCNTR12_EL0 + REG_AMEVCNTR13_EL0 + REG_AMEVCNTR14_EL0 + REG_AMEVCNTR15_EL0 + REG_AMEVCNTR16_EL0 + REG_AMEVCNTR17_EL0 + REG_AMEVCNTR18_EL0 + REG_AMEVCNTR19_EL0 + REG_AMEVCNTR110_EL0 + REG_AMEVCNTR111_EL0 + REG_AMEVCNTR112_EL0 + REG_AMEVCNTR113_EL0 + REG_AMEVCNTR114_EL0 + REG_AMEVCNTR115_EL0 + REG_AMEVTYPER00_EL0 + REG_AMEVTYPER01_EL0 + REG_AMEVTYPER02_EL0 + REG_AMEVTYPER03_EL0 + REG_AMEVTYPER04_EL0 + REG_AMEVTYPER05_EL0 + REG_AMEVTYPER06_EL0 + REG_AMEVTYPER07_EL0 + REG_AMEVTYPER08_EL0 + REG_AMEVTYPER09_EL0 + REG_AMEVTYPER010_EL0 + REG_AMEVTYPER011_EL0 + REG_AMEVTYPER012_EL0 + REG_AMEVTYPER013_EL0 + REG_AMEVTYPER014_EL0 + REG_AMEVTYPER015_EL0 + REG_AMEVTYPER10_EL0 + REG_AMEVTYPER11_EL0 + REG_AMEVTYPER12_EL0 + REG_AMEVTYPER13_EL0 + REG_AMEVTYPER14_EL0 + REG_AMEVTYPER15_EL0 + REG_AMEVTYPER16_EL0 + REG_AMEVTYPER17_EL0 + REG_AMEVTYPER18_EL0 + REG_AMEVTYPER19_EL0 + REG_AMEVTYPER110_EL0 + REG_AMEVTYPER111_EL0 + REG_AMEVTYPER112_EL0 + REG_AMEVTYPER113_EL0 + REG_AMEVTYPER114_EL0 + REG_AMEVTYPER115_EL0 + REG_AMUSERENR_EL0 + REG_APDAKeyHi_EL1 + REG_APDAKeyLo_EL1 + REG_APDBKeyHi_EL1 + REG_APDBKeyLo_EL1 + REG_APGAKeyHi_EL1 + REG_APGAKeyLo_EL1 + REG_APIAKeyHi_EL1 + REG_APIAKeyLo_EL1 + REG_APIBKeyHi_EL1 + REG_APIBKeyLo_EL1 + REG_CCSIDR2_EL1 + REG_CCSIDR_EL1 + REG_CLIDR_EL1 + REG_CNTFRQ_EL0 + REG_CNTKCTL_EL1 + REG_CNTP_CTL_EL0 + REG_CNTP_CVAL_EL0 + REG_CNTP_TVAL_EL0 + REG_CNTPCT_EL0 + REG_CNTPS_CTL_EL1 + REG_CNTPS_CVAL_EL1 + REG_CNTPS_TVAL_EL1 + REG_CNTV_CTL_EL0 + REG_CNTV_CVAL_EL0 + REG_CNTV_TVAL_EL0 + REG_CNTVCT_EL0 + REG_CONTEXTIDR_EL1 + REG_CPACR_EL1 + REG_CSSELR_EL1 + REG_CTR_EL0 + REG_CurrentEL + REG_DAIF + REG_DBGAUTHSTATUS_EL1 + REG_DBGBCR0_EL1 + REG_DBGBCR1_EL1 + REG_DBGBCR2_EL1 + REG_DBGBCR3_EL1 + REG_DBGBCR4_EL1 + REG_DBGBCR5_EL1 + REG_DBGBCR6_EL1 + REG_DBGBCR7_EL1 + REG_DBGBCR8_EL1 + REG_DBGBCR9_EL1 + REG_DBGBCR10_EL1 + REG_DBGBCR11_EL1 + REG_DBGBCR12_EL1 + REG_DBGBCR13_EL1 + REG_DBGBCR14_EL1 + REG_DBGBCR15_EL1 + REG_DBGBVR0_EL1 + REG_DBGBVR1_EL1 + REG_DBGBVR2_EL1 + REG_DBGBVR3_EL1 + REG_DBGBVR4_EL1 + REG_DBGBVR5_EL1 + REG_DBGBVR6_EL1 + REG_DBGBVR7_EL1 + REG_DBGBVR8_EL1 + REG_DBGBVR9_EL1 + REG_DBGBVR10_EL1 + REG_DBGBVR11_EL1 + REG_DBGBVR12_EL1 + REG_DBGBVR13_EL1 + REG_DBGBVR14_EL1 + REG_DBGBVR15_EL1 + REG_DBGCLAIMCLR_EL1 + REG_DBGCLAIMSET_EL1 + REG_DBGDTR_EL0 + REG_DBGDTRRX_EL0 + REG_DBGDTRTX_EL0 + REG_DBGPRCR_EL1 + REG_DBGWCR0_EL1 + REG_DBGWCR1_EL1 + REG_DBGWCR2_EL1 + REG_DBGWCR3_EL1 + REG_DBGWCR4_EL1 + REG_DBGWCR5_EL1 + REG_DBGWCR6_EL1 + REG_DBGWCR7_EL1 + REG_DBGWCR8_EL1 + REG_DBGWCR9_EL1 + REG_DBGWCR10_EL1 + REG_DBGWCR11_EL1 + REG_DBGWCR12_EL1 + REG_DBGWCR13_EL1 + REG_DBGWCR14_EL1 + REG_DBGWCR15_EL1 + REG_DBGWVR0_EL1 + REG_DBGWVR1_EL1 + REG_DBGWVR2_EL1 + REG_DBGWVR3_EL1 + REG_DBGWVR4_EL1 + REG_DBGWVR5_EL1 + REG_DBGWVR6_EL1 + REG_DBGWVR7_EL1 + REG_DBGWVR8_EL1 + REG_DBGWVR9_EL1 + REG_DBGWVR10_EL1 + REG_DBGWVR11_EL1 + REG_DBGWVR12_EL1 + REG_DBGWVR13_EL1 + REG_DBGWVR14_EL1 + REG_DBGWVR15_EL1 + REG_DCZID_EL0 + REG_DISR_EL1 + REG_DIT + REG_DLR_EL0 + REG_DSPSR_EL0 + REG_ELR_EL1 + REG_ERRIDR_EL1 + REG_ERRSELR_EL1 + REG_ERXADDR_EL1 + REG_ERXCTLR_EL1 + REG_ERXFR_EL1 + REG_ERXMISC0_EL1 + REG_ERXMISC1_EL1 + REG_ERXMISC2_EL1 + REG_ERXMISC3_EL1 + REG_ERXPFGCDN_EL1 + REG_ERXPFGCTL_EL1 + REG_ERXPFGF_EL1 + REG_ERXSTATUS_EL1 + REG_ESR_EL1 + REG_FAR_EL1 + REG_FPCR + REG_FPSR + REG_GCR_EL1 + REG_GMID_EL1 + REG_ICC_AP0R0_EL1 + REG_ICC_AP0R1_EL1 + REG_ICC_AP0R2_EL1 + REG_ICC_AP0R3_EL1 + REG_ICC_AP1R0_EL1 + REG_ICC_AP1R1_EL1 + REG_ICC_AP1R2_EL1 + REG_ICC_AP1R3_EL1 + REG_ICC_ASGI1R_EL1 + REG_ICC_BPR0_EL1 + REG_ICC_BPR1_EL1 + REG_ICC_CTLR_EL1 + REG_ICC_DIR_EL1 + REG_ICC_EOIR0_EL1 + REG_ICC_EOIR1_EL1 + REG_ICC_HPPIR0_EL1 + REG_ICC_HPPIR1_EL1 + REG_ICC_IAR0_EL1 + REG_ICC_IAR1_EL1 + REG_ICC_IGRPEN0_EL1 + REG_ICC_IGRPEN1_EL1 + REG_ICC_PMR_EL1 + REG_ICC_RPR_EL1 + REG_ICC_SGI0R_EL1 + REG_ICC_SGI1R_EL1 + REG_ICC_SRE_EL1 + REG_ICV_AP0R0_EL1 + REG_ICV_AP0R1_EL1 + REG_ICV_AP0R2_EL1 + REG_ICV_AP0R3_EL1 + REG_ICV_AP1R0_EL1 + REG_ICV_AP1R1_EL1 + REG_ICV_AP1R2_EL1 + REG_ICV_AP1R3_EL1 + REG_ICV_BPR0_EL1 + REG_ICV_BPR1_EL1 + REG_ICV_CTLR_EL1 + REG_ICV_DIR_EL1 + REG_ICV_EOIR0_EL1 + REG_ICV_EOIR1_EL1 + REG_ICV_HPPIR0_EL1 + REG_ICV_HPPIR1_EL1 + REG_ICV_IAR0_EL1 + REG_ICV_IAR1_EL1 + REG_ICV_IGRPEN0_EL1 + REG_ICV_IGRPEN1_EL1 + REG_ICV_PMR_EL1 + REG_ICV_RPR_EL1 + REG_ID_AA64AFR0_EL1 + REG_ID_AA64AFR1_EL1 + REG_ID_AA64DFR0_EL1 + REG_ID_AA64DFR1_EL1 + REG_ID_AA64ISAR0_EL1 + REG_ID_AA64ISAR1_EL1 + REG_ID_AA64MMFR0_EL1 + REG_ID_AA64MMFR1_EL1 + REG_ID_AA64MMFR2_EL1 + REG_ID_AA64PFR0_EL1 + REG_ID_AA64PFR1_EL1 + REG_ID_AA64ZFR0_EL1 + REG_ID_AFR0_EL1 + REG_ID_DFR0_EL1 + REG_ID_ISAR0_EL1 + REG_ID_ISAR1_EL1 + REG_ID_ISAR2_EL1 + REG_ID_ISAR3_EL1 + REG_ID_ISAR4_EL1 + REG_ID_ISAR5_EL1 + REG_ID_ISAR6_EL1 + REG_ID_MMFR0_EL1 + REG_ID_MMFR1_EL1 + REG_ID_MMFR2_EL1 + REG_ID_MMFR3_EL1 + REG_ID_MMFR4_EL1 + REG_ID_PFR0_EL1 + REG_ID_PFR1_EL1 + REG_ID_PFR2_EL1 + REG_ISR_EL1 + REG_LORC_EL1 + REG_LOREA_EL1 + REG_LORID_EL1 + REG_LORN_EL1 + REG_LORSA_EL1 + REG_MAIR_EL1 + REG_MDCCINT_EL1 + REG_MDCCSR_EL0 + REG_MDRAR_EL1 + REG_MDSCR_EL1 + REG_MIDR_EL1 + REG_MPAM0_EL1 + REG_MPAM1_EL1 + REG_MPAMIDR_EL1 + REG_MPIDR_EL1 + REG_MVFR0_EL1 + REG_MVFR1_EL1 + REG_MVFR2_EL1 + REG_NZCV + REG_OSDLR_EL1 + REG_OSDTRRX_EL1 + REG_OSDTRTX_EL1 + REG_OSECCR_EL1 + REG_OSLAR_EL1 + REG_OSLSR_EL1 + REG_PAN + REG_PAR_EL1 + REG_PMBIDR_EL1 + REG_PMBLIMITR_EL1 + REG_PMBPTR_EL1 + REG_PMBSR_EL1 + REG_PMCCFILTR_EL0 + REG_PMCCNTR_EL0 + REG_PMCEID0_EL0 + REG_PMCEID1_EL0 + REG_PMCNTENCLR_EL0 + REG_PMCNTENSET_EL0 + REG_PMCR_EL0 + REG_PMEVCNTR0_EL0 + REG_PMEVCNTR1_EL0 + REG_PMEVCNTR2_EL0 + REG_PMEVCNTR3_EL0 + REG_PMEVCNTR4_EL0 + REG_PMEVCNTR5_EL0 + REG_PMEVCNTR6_EL0 + REG_PMEVCNTR7_EL0 + REG_PMEVCNTR8_EL0 + REG_PMEVCNTR9_EL0 + REG_PMEVCNTR10_EL0 + REG_PMEVCNTR11_EL0 + REG_PMEVCNTR12_EL0 + REG_PMEVCNTR13_EL0 + REG_PMEVCNTR14_EL0 + REG_PMEVCNTR15_EL0 + REG_PMEVCNTR16_EL0 + REG_PMEVCNTR17_EL0 + REG_PMEVCNTR18_EL0 + REG_PMEVCNTR19_EL0 + REG_PMEVCNTR20_EL0 + REG_PMEVCNTR21_EL0 + REG_PMEVCNTR22_EL0 + REG_PMEVCNTR23_EL0 + REG_PMEVCNTR24_EL0 + REG_PMEVCNTR25_EL0 + REG_PMEVCNTR26_EL0 + REG_PMEVCNTR27_EL0 + REG_PMEVCNTR28_EL0 + REG_PMEVCNTR29_EL0 + REG_PMEVCNTR30_EL0 + REG_PMEVTYPER0_EL0 + REG_PMEVTYPER1_EL0 + REG_PMEVTYPER2_EL0 + REG_PMEVTYPER3_EL0 + REG_PMEVTYPER4_EL0 + REG_PMEVTYPER5_EL0 + REG_PMEVTYPER6_EL0 + REG_PMEVTYPER7_EL0 + REG_PMEVTYPER8_EL0 + REG_PMEVTYPER9_EL0 + REG_PMEVTYPER10_EL0 + REG_PMEVTYPER11_EL0 + REG_PMEVTYPER12_EL0 + REG_PMEVTYPER13_EL0 + REG_PMEVTYPER14_EL0 + REG_PMEVTYPER15_EL0 + REG_PMEVTYPER16_EL0 + REG_PMEVTYPER17_EL0 + REG_PMEVTYPER18_EL0 + REG_PMEVTYPER19_EL0 + REG_PMEVTYPER20_EL0 + REG_PMEVTYPER21_EL0 + REG_PMEVTYPER22_EL0 + REG_PMEVTYPER23_EL0 + REG_PMEVTYPER24_EL0 + REG_PMEVTYPER25_EL0 + REG_PMEVTYPER26_EL0 + REG_PMEVTYPER27_EL0 + REG_PMEVTYPER28_EL0 + REG_PMEVTYPER29_EL0 + REG_PMEVTYPER30_EL0 + REG_PMINTENCLR_EL1 + REG_PMINTENSET_EL1 + REG_PMMIR_EL1 + REG_PMOVSCLR_EL0 + REG_PMOVSSET_EL0 + REG_PMSCR_EL1 + REG_PMSELR_EL0 + REG_PMSEVFR_EL1 + REG_PMSFCR_EL1 + REG_PMSICR_EL1 + REG_PMSIDR_EL1 + REG_PMSIRR_EL1 + REG_PMSLATFR_EL1 + REG_PMSWINC_EL0 + REG_PMUSERENR_EL0 + REG_PMXEVCNTR_EL0 + REG_PMXEVTYPER_EL0 + REG_REVIDR_EL1 + REG_RGSR_EL1 + REG_RMR_EL1 + REG_RNDR + REG_RNDRRS + REG_RVBAR_EL1 + REG_SCTLR_EL1 + REG_SCXTNUM_EL0 + REG_SCXTNUM_EL1 + REG_SP_EL0 + REG_SP_EL1 + REG_SPSel + REG_SPSR_abt + REG_SPSR_EL1 + REG_SPSR_fiq + REG_SPSR_irq + REG_SPSR_und + REG_SSBS + REG_TCO + REG_TCR_EL1 + REG_TFSR_EL1 + REG_TFSRE0_EL1 + REG_TPIDR_EL0 + REG_TPIDR_EL1 + REG_TPIDRRO_EL0 + REG_TRFCR_EL1 + REG_TTBR0_EL1 + REG_TTBR1_EL1 + REG_UAO + REG_VBAR_EL1 + REG_ZCR_EL1 + SYSREG_END +) + +const ( + SR_READ = 1 << iota + SR_WRITE +) + +var SystemReg = []struct { + Name string + Reg int16 + Enc uint32 + // AccessFlags is the readable and writeable property of system register. + AccessFlags uint8 +}{ + {"ACTLR_EL1", REG_ACTLR_EL1, 0x181020, SR_READ | SR_WRITE}, + {"AFSR0_EL1", REG_AFSR0_EL1, 0x185100, SR_READ | SR_WRITE}, + {"AFSR1_EL1", REG_AFSR1_EL1, 0x185120, SR_READ | SR_WRITE}, + {"AIDR_EL1", REG_AIDR_EL1, 0x1900e0, SR_READ}, + {"AMAIR_EL1", REG_AMAIR_EL1, 0x18a300, SR_READ | SR_WRITE}, + {"AMCFGR_EL0", REG_AMCFGR_EL0, 0x1bd220, SR_READ}, + {"AMCGCR_EL0", REG_AMCGCR_EL0, 0x1bd240, SR_READ}, + {"AMCNTENCLR0_EL0", REG_AMCNTENCLR0_EL0, 0x1bd280, SR_READ | SR_WRITE}, + {"AMCNTENCLR1_EL0", REG_AMCNTENCLR1_EL0, 0x1bd300, SR_READ | SR_WRITE}, + {"AMCNTENSET0_EL0", REG_AMCNTENSET0_EL0, 0x1bd2a0, SR_READ | SR_WRITE}, + {"AMCNTENSET1_EL0", REG_AMCNTENSET1_EL0, 0x1bd320, SR_READ | SR_WRITE}, + {"AMCR_EL0", REG_AMCR_EL0, 0x1bd200, SR_READ | SR_WRITE}, + {"AMEVCNTR00_EL0", REG_AMEVCNTR00_EL0, 0x1bd400, SR_READ | SR_WRITE}, + {"AMEVCNTR01_EL0", REG_AMEVCNTR01_EL0, 0x1bd420, SR_READ | SR_WRITE}, + {"AMEVCNTR02_EL0", REG_AMEVCNTR02_EL0, 0x1bd440, SR_READ | SR_WRITE}, + {"AMEVCNTR03_EL0", REG_AMEVCNTR03_EL0, 0x1bd460, SR_READ | SR_WRITE}, + {"AMEVCNTR04_EL0", REG_AMEVCNTR04_EL0, 0x1bd480, SR_READ | SR_WRITE}, + {"AMEVCNTR05_EL0", REG_AMEVCNTR05_EL0, 0x1bd4a0, SR_READ | SR_WRITE}, + {"AMEVCNTR06_EL0", REG_AMEVCNTR06_EL0, 0x1bd4c0, SR_READ | SR_WRITE}, + {"AMEVCNTR07_EL0", REG_AMEVCNTR07_EL0, 0x1bd4e0, SR_READ | SR_WRITE}, + {"AMEVCNTR08_EL0", REG_AMEVCNTR08_EL0, 0x1bd500, SR_READ | SR_WRITE}, + {"AMEVCNTR09_EL0", REG_AMEVCNTR09_EL0, 0x1bd520, SR_READ | SR_WRITE}, + {"AMEVCNTR010_EL0", REG_AMEVCNTR010_EL0, 0x1bd540, SR_READ | SR_WRITE}, + {"AMEVCNTR011_EL0", REG_AMEVCNTR011_EL0, 0x1bd560, SR_READ | SR_WRITE}, + {"AMEVCNTR012_EL0", REG_AMEVCNTR012_EL0, 0x1bd580, SR_READ | SR_WRITE}, + {"AMEVCNTR013_EL0", REG_AMEVCNTR013_EL0, 0x1bd5a0, SR_READ | SR_WRITE}, + {"AMEVCNTR014_EL0", REG_AMEVCNTR014_EL0, 0x1bd5c0, SR_READ | SR_WRITE}, + {"AMEVCNTR015_EL0", REG_AMEVCNTR015_EL0, 0x1bd5e0, SR_READ | SR_WRITE}, + {"AMEVCNTR10_EL0", REG_AMEVCNTR10_EL0, 0x1bdc00, SR_READ | SR_WRITE}, + {"AMEVCNTR11_EL0", REG_AMEVCNTR11_EL0, 0x1bdc20, SR_READ | SR_WRITE}, + {"AMEVCNTR12_EL0", REG_AMEVCNTR12_EL0, 0x1bdc40, SR_READ | SR_WRITE}, + {"AMEVCNTR13_EL0", REG_AMEVCNTR13_EL0, 0x1bdc60, SR_READ | SR_WRITE}, + {"AMEVCNTR14_EL0", REG_AMEVCNTR14_EL0, 0x1bdc80, SR_READ | SR_WRITE}, + {"AMEVCNTR15_EL0", REG_AMEVCNTR15_EL0, 0x1bdca0, SR_READ | SR_WRITE}, + {"AMEVCNTR16_EL0", REG_AMEVCNTR16_EL0, 0x1bdcc0, SR_READ | SR_WRITE}, + {"AMEVCNTR17_EL0", REG_AMEVCNTR17_EL0, 0x1bdce0, SR_READ | SR_WRITE}, + {"AMEVCNTR18_EL0", REG_AMEVCNTR18_EL0, 0x1bdd00, SR_READ | SR_WRITE}, + {"AMEVCNTR19_EL0", REG_AMEVCNTR19_EL0, 0x1bdd20, SR_READ | SR_WRITE}, + {"AMEVCNTR110_EL0", REG_AMEVCNTR110_EL0, 0x1bdd40, SR_READ | SR_WRITE}, + {"AMEVCNTR111_EL0", REG_AMEVCNTR111_EL0, 0x1bdd60, SR_READ | SR_WRITE}, + {"AMEVCNTR112_EL0", REG_AMEVCNTR112_EL0, 0x1bdd80, SR_READ | SR_WRITE}, + {"AMEVCNTR113_EL0", REG_AMEVCNTR113_EL0, 0x1bdda0, SR_READ | SR_WRITE}, + {"AMEVCNTR114_EL0", REG_AMEVCNTR114_EL0, 0x1bddc0, SR_READ | SR_WRITE}, + {"AMEVCNTR115_EL0", REG_AMEVCNTR115_EL0, 0x1bdde0, SR_READ | SR_WRITE}, + {"AMEVTYPER00_EL0", REG_AMEVTYPER00_EL0, 0x1bd600, SR_READ}, + {"AMEVTYPER01_EL0", REG_AMEVTYPER01_EL0, 0x1bd620, SR_READ}, + {"AMEVTYPER02_EL0", REG_AMEVTYPER02_EL0, 0x1bd640, SR_READ}, + {"AMEVTYPER03_EL0", REG_AMEVTYPER03_EL0, 0x1bd660, SR_READ}, + {"AMEVTYPER04_EL0", REG_AMEVTYPER04_EL0, 0x1bd680, SR_READ}, + {"AMEVTYPER05_EL0", REG_AMEVTYPER05_EL0, 0x1bd6a0, SR_READ}, + {"AMEVTYPER06_EL0", REG_AMEVTYPER06_EL0, 0x1bd6c0, SR_READ}, + {"AMEVTYPER07_EL0", REG_AMEVTYPER07_EL0, 0x1bd6e0, SR_READ}, + {"AMEVTYPER08_EL0", REG_AMEVTYPER08_EL0, 0x1bd700, SR_READ}, + {"AMEVTYPER09_EL0", REG_AMEVTYPER09_EL0, 0x1bd720, SR_READ}, + {"AMEVTYPER010_EL0", REG_AMEVTYPER010_EL0, 0x1bd740, SR_READ}, + {"AMEVTYPER011_EL0", REG_AMEVTYPER011_EL0, 0x1bd760, SR_READ}, + {"AMEVTYPER012_EL0", REG_AMEVTYPER012_EL0, 0x1bd780, SR_READ}, + {"AMEVTYPER013_EL0", REG_AMEVTYPER013_EL0, 0x1bd7a0, SR_READ}, + {"AMEVTYPER014_EL0", REG_AMEVTYPER014_EL0, 0x1bd7c0, SR_READ}, + {"AMEVTYPER015_EL0", REG_AMEVTYPER015_EL0, 0x1bd7e0, SR_READ}, + {"AMEVTYPER10_EL0", REG_AMEVTYPER10_EL0, 0x1bde00, SR_READ | SR_WRITE}, + {"AMEVTYPER11_EL0", REG_AMEVTYPER11_EL0, 0x1bde20, SR_READ | SR_WRITE}, + {"AMEVTYPER12_EL0", REG_AMEVTYPER12_EL0, 0x1bde40, SR_READ | SR_WRITE}, + {"AMEVTYPER13_EL0", REG_AMEVTYPER13_EL0, 0x1bde60, SR_READ | SR_WRITE}, + {"AMEVTYPER14_EL0", REG_AMEVTYPER14_EL0, 0x1bde80, SR_READ | SR_WRITE}, + {"AMEVTYPER15_EL0", REG_AMEVTYPER15_EL0, 0x1bdea0, SR_READ | SR_WRITE}, + {"AMEVTYPER16_EL0", REG_AMEVTYPER16_EL0, 0x1bdec0, SR_READ | SR_WRITE}, + {"AMEVTYPER17_EL0", REG_AMEVTYPER17_EL0, 0x1bdee0, SR_READ | SR_WRITE}, + {"AMEVTYPER18_EL0", REG_AMEVTYPER18_EL0, 0x1bdf00, SR_READ | SR_WRITE}, + {"AMEVTYPER19_EL0", REG_AMEVTYPER19_EL0, 0x1bdf20, SR_READ | SR_WRITE}, + {"AMEVTYPER110_EL0", REG_AMEVTYPER110_EL0, 0x1bdf40, SR_READ | SR_WRITE}, + {"AMEVTYPER111_EL0", REG_AMEVTYPER111_EL0, 0x1bdf60, SR_READ | SR_WRITE}, + {"AMEVTYPER112_EL0", REG_AMEVTYPER112_EL0, 0x1bdf80, SR_READ | SR_WRITE}, + {"AMEVTYPER113_EL0", REG_AMEVTYPER113_EL0, 0x1bdfa0, SR_READ | SR_WRITE}, + {"AMEVTYPER114_EL0", REG_AMEVTYPER114_EL0, 0x1bdfc0, SR_READ | SR_WRITE}, + {"AMEVTYPER115_EL0", REG_AMEVTYPER115_EL0, 0x1bdfe0, SR_READ | SR_WRITE}, + {"AMUSERENR_EL0", REG_AMUSERENR_EL0, 0x1bd260, SR_READ | SR_WRITE}, + {"APDAKeyHi_EL1", REG_APDAKeyHi_EL1, 0x182220, SR_READ | SR_WRITE}, + {"APDAKeyLo_EL1", REG_APDAKeyLo_EL1, 0x182200, SR_READ | SR_WRITE}, + {"APDBKeyHi_EL1", REG_APDBKeyHi_EL1, 0x182260, SR_READ | SR_WRITE}, + {"APDBKeyLo_EL1", REG_APDBKeyLo_EL1, 0x182240, SR_READ | SR_WRITE}, + {"APGAKeyHi_EL1", REG_APGAKeyHi_EL1, 0x182320, SR_READ | SR_WRITE}, + {"APGAKeyLo_EL1", REG_APGAKeyLo_EL1, 0x182300, SR_READ | SR_WRITE}, + {"APIAKeyHi_EL1", REG_APIAKeyHi_EL1, 0x182120, SR_READ | SR_WRITE}, + {"APIAKeyLo_EL1", REG_APIAKeyLo_EL1, 0x182100, SR_READ | SR_WRITE}, + {"APIBKeyHi_EL1", REG_APIBKeyHi_EL1, 0x182160, SR_READ | SR_WRITE}, + {"APIBKeyLo_EL1", REG_APIBKeyLo_EL1, 0x182140, SR_READ | SR_WRITE}, + {"CCSIDR2_EL1", REG_CCSIDR2_EL1, 0x190040, SR_READ}, + {"CCSIDR_EL1", REG_CCSIDR_EL1, 0x190000, SR_READ}, + {"CLIDR_EL1", REG_CLIDR_EL1, 0x190020, SR_READ}, + {"CNTFRQ_EL0", REG_CNTFRQ_EL0, 0x1be000, SR_READ | SR_WRITE}, + {"CNTKCTL_EL1", REG_CNTKCTL_EL1, 0x18e100, SR_READ | SR_WRITE}, + {"CNTP_CTL_EL0", REG_CNTP_CTL_EL0, 0x1be220, SR_READ | SR_WRITE}, + {"CNTP_CVAL_EL0", REG_CNTP_CVAL_EL0, 0x1be240, SR_READ | SR_WRITE}, + {"CNTP_TVAL_EL0", REG_CNTP_TVAL_EL0, 0x1be200, SR_READ | SR_WRITE}, + {"CNTPCT_EL0", REG_CNTPCT_EL0, 0x1be020, SR_READ}, + {"CNTPS_CTL_EL1", REG_CNTPS_CTL_EL1, 0x1fe220, SR_READ | SR_WRITE}, + {"CNTPS_CVAL_EL1", REG_CNTPS_CVAL_EL1, 0x1fe240, SR_READ | SR_WRITE}, + {"CNTPS_TVAL_EL1", REG_CNTPS_TVAL_EL1, 0x1fe200, SR_READ | SR_WRITE}, + {"CNTV_CTL_EL0", REG_CNTV_CTL_EL0, 0x1be320, SR_READ | SR_WRITE}, + {"CNTV_CVAL_EL0", REG_CNTV_CVAL_EL0, 0x1be340, SR_READ | SR_WRITE}, + {"CNTV_TVAL_EL0", REG_CNTV_TVAL_EL0, 0x1be300, SR_READ | SR_WRITE}, + {"CNTVCT_EL0", REG_CNTVCT_EL0, 0x1be040, SR_READ}, + {"CONTEXTIDR_EL1", REG_CONTEXTIDR_EL1, 0x18d020, SR_READ | SR_WRITE}, + {"CPACR_EL1", REG_CPACR_EL1, 0x181040, SR_READ | SR_WRITE}, + {"CSSELR_EL1", REG_CSSELR_EL1, 0x1a0000, SR_READ | SR_WRITE}, + {"CTR_EL0", REG_CTR_EL0, 0x1b0020, SR_READ}, + {"CurrentEL", REG_CurrentEL, 0x184240, SR_READ}, + {"DAIF", REG_DAIF, 0x1b4220, SR_READ | SR_WRITE}, + {"DBGAUTHSTATUS_EL1", REG_DBGAUTHSTATUS_EL1, 0x107ec0, SR_READ}, + {"DBGBCR0_EL1", REG_DBGBCR0_EL1, 0x1000a0, SR_READ | SR_WRITE}, + {"DBGBCR1_EL1", REG_DBGBCR1_EL1, 0x1001a0, SR_READ | SR_WRITE}, + {"DBGBCR2_EL1", REG_DBGBCR2_EL1, 0x1002a0, SR_READ | SR_WRITE}, + {"DBGBCR3_EL1", REG_DBGBCR3_EL1, 0x1003a0, SR_READ | SR_WRITE}, + {"DBGBCR4_EL1", REG_DBGBCR4_EL1, 0x1004a0, SR_READ | SR_WRITE}, + {"DBGBCR5_EL1", REG_DBGBCR5_EL1, 0x1005a0, SR_READ | SR_WRITE}, + {"DBGBCR6_EL1", REG_DBGBCR6_EL1, 0x1006a0, SR_READ | SR_WRITE}, + {"DBGBCR7_EL1", REG_DBGBCR7_EL1, 0x1007a0, SR_READ | SR_WRITE}, + {"DBGBCR8_EL1", REG_DBGBCR8_EL1, 0x1008a0, SR_READ | SR_WRITE}, + {"DBGBCR9_EL1", REG_DBGBCR9_EL1, 0x1009a0, SR_READ | SR_WRITE}, + {"DBGBCR10_EL1", REG_DBGBCR10_EL1, 0x100aa0, SR_READ | SR_WRITE}, + {"DBGBCR11_EL1", REG_DBGBCR11_EL1, 0x100ba0, SR_READ | SR_WRITE}, + {"DBGBCR12_EL1", REG_DBGBCR12_EL1, 0x100ca0, SR_READ | SR_WRITE}, + {"DBGBCR13_EL1", REG_DBGBCR13_EL1, 0x100da0, SR_READ | SR_WRITE}, + {"DBGBCR14_EL1", REG_DBGBCR14_EL1, 0x100ea0, SR_READ | SR_WRITE}, + {"DBGBCR15_EL1", REG_DBGBCR15_EL1, 0x100fa0, SR_READ | SR_WRITE}, + {"DBGBVR0_EL1", REG_DBGBVR0_EL1, 0x100080, SR_READ | SR_WRITE}, + {"DBGBVR1_EL1", REG_DBGBVR1_EL1, 0x100180, SR_READ | SR_WRITE}, + {"DBGBVR2_EL1", REG_DBGBVR2_EL1, 0x100280, SR_READ | SR_WRITE}, + {"DBGBVR3_EL1", REG_DBGBVR3_EL1, 0x100380, SR_READ | SR_WRITE}, + {"DBGBVR4_EL1", REG_DBGBVR4_EL1, 0x100480, SR_READ | SR_WRITE}, + {"DBGBVR5_EL1", REG_DBGBVR5_EL1, 0x100580, SR_READ | SR_WRITE}, + {"DBGBVR6_EL1", REG_DBGBVR6_EL1, 0x100680, SR_READ | SR_WRITE}, + {"DBGBVR7_EL1", REG_DBGBVR7_EL1, 0x100780, SR_READ | SR_WRITE}, + {"DBGBVR8_EL1", REG_DBGBVR8_EL1, 0x100880, SR_READ | SR_WRITE}, + {"DBGBVR9_EL1", REG_DBGBVR9_EL1, 0x100980, SR_READ | SR_WRITE}, + {"DBGBVR10_EL1", REG_DBGBVR10_EL1, 0x100a80, SR_READ | SR_WRITE}, + {"DBGBVR11_EL1", REG_DBGBVR11_EL1, 0x100b80, SR_READ | SR_WRITE}, + {"DBGBVR12_EL1", REG_DBGBVR12_EL1, 0x100c80, SR_READ | SR_WRITE}, + {"DBGBVR13_EL1", REG_DBGBVR13_EL1, 0x100d80, SR_READ | SR_WRITE}, + {"DBGBVR14_EL1", REG_DBGBVR14_EL1, 0x100e80, SR_READ | SR_WRITE}, + {"DBGBVR15_EL1", REG_DBGBVR15_EL1, 0x100f80, SR_READ | SR_WRITE}, + {"DBGCLAIMCLR_EL1", REG_DBGCLAIMCLR_EL1, 0x1079c0, SR_READ | SR_WRITE}, + {"DBGCLAIMSET_EL1", REG_DBGCLAIMSET_EL1, 0x1078c0, SR_READ | SR_WRITE}, + {"DBGDTR_EL0", REG_DBGDTR_EL0, 0x130400, SR_READ | SR_WRITE}, + {"DBGDTRRX_EL0", REG_DBGDTRRX_EL0, 0x130500, SR_READ}, + {"DBGDTRTX_EL0", REG_DBGDTRTX_EL0, 0x130500, SR_WRITE}, + {"DBGPRCR_EL1", REG_DBGPRCR_EL1, 0x101480, SR_READ | SR_WRITE}, + {"DBGWCR0_EL1", REG_DBGWCR0_EL1, 0x1000e0, SR_READ | SR_WRITE}, + {"DBGWCR1_EL1", REG_DBGWCR1_EL1, 0x1001e0, SR_READ | SR_WRITE}, + {"DBGWCR2_EL1", REG_DBGWCR2_EL1, 0x1002e0, SR_READ | SR_WRITE}, + {"DBGWCR3_EL1", REG_DBGWCR3_EL1, 0x1003e0, SR_READ | SR_WRITE}, + {"DBGWCR4_EL1", REG_DBGWCR4_EL1, 0x1004e0, SR_READ | SR_WRITE}, + {"DBGWCR5_EL1", REG_DBGWCR5_EL1, 0x1005e0, SR_READ | SR_WRITE}, + {"DBGWCR6_EL1", REG_DBGWCR6_EL1, 0x1006e0, SR_READ | SR_WRITE}, + {"DBGWCR7_EL1", REG_DBGWCR7_EL1, 0x1007e0, SR_READ | SR_WRITE}, + {"DBGWCR8_EL1", REG_DBGWCR8_EL1, 0x1008e0, SR_READ | SR_WRITE}, + {"DBGWCR9_EL1", REG_DBGWCR9_EL1, 0x1009e0, SR_READ | SR_WRITE}, + {"DBGWCR10_EL1", REG_DBGWCR10_EL1, 0x100ae0, SR_READ | SR_WRITE}, + {"DBGWCR11_EL1", REG_DBGWCR11_EL1, 0x100be0, SR_READ | SR_WRITE}, + {"DBGWCR12_EL1", REG_DBGWCR12_EL1, 0x100ce0, SR_READ | SR_WRITE}, + {"DBGWCR13_EL1", REG_DBGWCR13_EL1, 0x100de0, SR_READ | SR_WRITE}, + {"DBGWCR14_EL1", REG_DBGWCR14_EL1, 0x100ee0, SR_READ | SR_WRITE}, + {"DBGWCR15_EL1", REG_DBGWCR15_EL1, 0x100fe0, SR_READ | SR_WRITE}, + {"DBGWVR0_EL1", REG_DBGWVR0_EL1, 0x1000c0, SR_READ | SR_WRITE}, + {"DBGWVR1_EL1", REG_DBGWVR1_EL1, 0x1001c0, SR_READ | SR_WRITE}, + {"DBGWVR2_EL1", REG_DBGWVR2_EL1, 0x1002c0, SR_READ | SR_WRITE}, + {"DBGWVR3_EL1", REG_DBGWVR3_EL1, 0x1003c0, SR_READ | SR_WRITE}, + {"DBGWVR4_EL1", REG_DBGWVR4_EL1, 0x1004c0, SR_READ | SR_WRITE}, + {"DBGWVR5_EL1", REG_DBGWVR5_EL1, 0x1005c0, SR_READ | SR_WRITE}, + {"DBGWVR6_EL1", REG_DBGWVR6_EL1, 0x1006c0, SR_READ | SR_WRITE}, + {"DBGWVR7_EL1", REG_DBGWVR7_EL1, 0x1007c0, SR_READ | SR_WRITE}, + {"DBGWVR8_EL1", REG_DBGWVR8_EL1, 0x1008c0, SR_READ | SR_WRITE}, + {"DBGWVR9_EL1", REG_DBGWVR9_EL1, 0x1009c0, SR_READ | SR_WRITE}, + {"DBGWVR10_EL1", REG_DBGWVR10_EL1, 0x100ac0, SR_READ | SR_WRITE}, + {"DBGWVR11_EL1", REG_DBGWVR11_EL1, 0x100bc0, SR_READ | SR_WRITE}, + {"DBGWVR12_EL1", REG_DBGWVR12_EL1, 0x100cc0, SR_READ | SR_WRITE}, + {"DBGWVR13_EL1", REG_DBGWVR13_EL1, 0x100dc0, SR_READ | SR_WRITE}, + {"DBGWVR14_EL1", REG_DBGWVR14_EL1, 0x100ec0, SR_READ | SR_WRITE}, + {"DBGWVR15_EL1", REG_DBGWVR15_EL1, 0x100fc0, SR_READ | SR_WRITE}, + {"DCZID_EL0", REG_DCZID_EL0, 0x1b00e0, SR_READ}, + {"DISR_EL1", REG_DISR_EL1, 0x18c120, SR_READ | SR_WRITE}, + {"DIT", REG_DIT, 0x1b42a0, SR_READ | SR_WRITE}, + {"DLR_EL0", REG_DLR_EL0, 0x1b4520, SR_READ | SR_WRITE}, + {"DSPSR_EL0", REG_DSPSR_EL0, 0x1b4500, SR_READ | SR_WRITE}, + {"ELR_EL1", REG_ELR_EL1, 0x184020, SR_READ | SR_WRITE}, + {"ERRIDR_EL1", REG_ERRIDR_EL1, 0x185300, SR_READ}, + {"ERRSELR_EL1", REG_ERRSELR_EL1, 0x185320, SR_READ | SR_WRITE}, + {"ERXADDR_EL1", REG_ERXADDR_EL1, 0x185460, SR_READ | SR_WRITE}, + {"ERXCTLR_EL1", REG_ERXCTLR_EL1, 0x185420, SR_READ | SR_WRITE}, + {"ERXFR_EL1", REG_ERXFR_EL1, 0x185400, SR_READ}, + {"ERXMISC0_EL1", REG_ERXMISC0_EL1, 0x185500, SR_READ | SR_WRITE}, + {"ERXMISC1_EL1", REG_ERXMISC1_EL1, 0x185520, SR_READ | SR_WRITE}, + {"ERXMISC2_EL1", REG_ERXMISC2_EL1, 0x185540, SR_READ | SR_WRITE}, + {"ERXMISC3_EL1", REG_ERXMISC3_EL1, 0x185560, SR_READ | SR_WRITE}, + {"ERXPFGCDN_EL1", REG_ERXPFGCDN_EL1, 0x1854c0, SR_READ | SR_WRITE}, + {"ERXPFGCTL_EL1", REG_ERXPFGCTL_EL1, 0x1854a0, SR_READ | SR_WRITE}, + {"ERXPFGF_EL1", REG_ERXPFGF_EL1, 0x185480, SR_READ}, + {"ERXSTATUS_EL1", REG_ERXSTATUS_EL1, 0x185440, SR_READ | SR_WRITE}, + {"ESR_EL1", REG_ESR_EL1, 0x185200, SR_READ | SR_WRITE}, + {"FAR_EL1", REG_FAR_EL1, 0x186000, SR_READ | SR_WRITE}, + {"FPCR", REG_FPCR, 0x1b4400, SR_READ | SR_WRITE}, + {"FPSR", REG_FPSR, 0x1b4420, SR_READ | SR_WRITE}, + {"GCR_EL1", REG_GCR_EL1, 0x1810c0, SR_READ | SR_WRITE}, + {"GMID_EL1", REG_GMID_EL1, 0x31400, SR_READ}, + {"ICC_AP0R0_EL1", REG_ICC_AP0R0_EL1, 0x18c880, SR_READ | SR_WRITE}, + {"ICC_AP0R1_EL1", REG_ICC_AP0R1_EL1, 0x18c8a0, SR_READ | SR_WRITE}, + {"ICC_AP0R2_EL1", REG_ICC_AP0R2_EL1, 0x18c8c0, SR_READ | SR_WRITE}, + {"ICC_AP0R3_EL1", REG_ICC_AP0R3_EL1, 0x18c8e0, SR_READ | SR_WRITE}, + {"ICC_AP1R0_EL1", REG_ICC_AP1R0_EL1, 0x18c900, SR_READ | SR_WRITE}, + {"ICC_AP1R1_EL1", REG_ICC_AP1R1_EL1, 0x18c920, SR_READ | SR_WRITE}, + {"ICC_AP1R2_EL1", REG_ICC_AP1R2_EL1, 0x18c940, SR_READ | SR_WRITE}, + {"ICC_AP1R3_EL1", REG_ICC_AP1R3_EL1, 0x18c960, SR_READ | SR_WRITE}, + {"ICC_ASGI1R_EL1", REG_ICC_ASGI1R_EL1, 0x18cbc0, SR_WRITE}, + {"ICC_BPR0_EL1", REG_ICC_BPR0_EL1, 0x18c860, SR_READ | SR_WRITE}, + {"ICC_BPR1_EL1", REG_ICC_BPR1_EL1, 0x18cc60, SR_READ | SR_WRITE}, + {"ICC_CTLR_EL1", REG_ICC_CTLR_EL1, 0x18cc80, SR_READ | SR_WRITE}, + {"ICC_DIR_EL1", REG_ICC_DIR_EL1, 0x18cb20, SR_WRITE}, + {"ICC_EOIR0_EL1", REG_ICC_EOIR0_EL1, 0x18c820, SR_WRITE}, + {"ICC_EOIR1_EL1", REG_ICC_EOIR1_EL1, 0x18cc20, SR_WRITE}, + {"ICC_HPPIR0_EL1", REG_ICC_HPPIR0_EL1, 0x18c840, SR_READ}, + {"ICC_HPPIR1_EL1", REG_ICC_HPPIR1_EL1, 0x18cc40, SR_READ}, + {"ICC_IAR0_EL1", REG_ICC_IAR0_EL1, 0x18c800, SR_READ}, + {"ICC_IAR1_EL1", REG_ICC_IAR1_EL1, 0x18cc00, SR_READ}, + {"ICC_IGRPEN0_EL1", REG_ICC_IGRPEN0_EL1, 0x18ccc0, SR_READ | SR_WRITE}, + {"ICC_IGRPEN1_EL1", REG_ICC_IGRPEN1_EL1, 0x18cce0, SR_READ | SR_WRITE}, + {"ICC_PMR_EL1", REG_ICC_PMR_EL1, 0x184600, SR_READ | SR_WRITE}, + {"ICC_RPR_EL1", REG_ICC_RPR_EL1, 0x18cb60, SR_READ}, + {"ICC_SGI0R_EL1", REG_ICC_SGI0R_EL1, 0x18cbe0, SR_WRITE}, + {"ICC_SGI1R_EL1", REG_ICC_SGI1R_EL1, 0x18cba0, SR_WRITE}, + {"ICC_SRE_EL1", REG_ICC_SRE_EL1, 0x18cca0, SR_READ | SR_WRITE}, + {"ICV_AP0R0_EL1", REG_ICV_AP0R0_EL1, 0x18c880, SR_READ | SR_WRITE}, + {"ICV_AP0R1_EL1", REG_ICV_AP0R1_EL1, 0x18c8a0, SR_READ | SR_WRITE}, + {"ICV_AP0R2_EL1", REG_ICV_AP0R2_EL1, 0x18c8c0, SR_READ | SR_WRITE}, + {"ICV_AP0R3_EL1", REG_ICV_AP0R3_EL1, 0x18c8e0, SR_READ | SR_WRITE}, + {"ICV_AP1R0_EL1", REG_ICV_AP1R0_EL1, 0x18c900, SR_READ | SR_WRITE}, + {"ICV_AP1R1_EL1", REG_ICV_AP1R1_EL1, 0x18c920, SR_READ | SR_WRITE}, + {"ICV_AP1R2_EL1", REG_ICV_AP1R2_EL1, 0x18c940, SR_READ | SR_WRITE}, + {"ICV_AP1R3_EL1", REG_ICV_AP1R3_EL1, 0x18c960, SR_READ | SR_WRITE}, + {"ICV_BPR0_EL1", REG_ICV_BPR0_EL1, 0x18c860, SR_READ | SR_WRITE}, + {"ICV_BPR1_EL1", REG_ICV_BPR1_EL1, 0x18cc60, SR_READ | SR_WRITE}, + {"ICV_CTLR_EL1", REG_ICV_CTLR_EL1, 0x18cc80, SR_READ | SR_WRITE}, + {"ICV_DIR_EL1", REG_ICV_DIR_EL1, 0x18cb20, SR_WRITE}, + {"ICV_EOIR0_EL1", REG_ICV_EOIR0_EL1, 0x18c820, SR_WRITE}, + {"ICV_EOIR1_EL1", REG_ICV_EOIR1_EL1, 0x18cc20, SR_WRITE}, + {"ICV_HPPIR0_EL1", REG_ICV_HPPIR0_EL1, 0x18c840, SR_READ}, + {"ICV_HPPIR1_EL1", REG_ICV_HPPIR1_EL1, 0x18cc40, SR_READ}, + {"ICV_IAR0_EL1", REG_ICV_IAR0_EL1, 0x18c800, SR_READ}, + {"ICV_IAR1_EL1", REG_ICV_IAR1_EL1, 0x18cc00, SR_READ}, + {"ICV_IGRPEN0_EL1", REG_ICV_IGRPEN0_EL1, 0x18ccc0, SR_READ | SR_WRITE}, + {"ICV_IGRPEN1_EL1", REG_ICV_IGRPEN1_EL1, 0x18cce0, SR_READ | SR_WRITE}, + {"ICV_PMR_EL1", REG_ICV_PMR_EL1, 0x184600, SR_READ | SR_WRITE}, + {"ICV_RPR_EL1", REG_ICV_RPR_EL1, 0x18cb60, SR_READ}, + {"ID_AA64AFR0_EL1", REG_ID_AA64AFR0_EL1, 0x180580, SR_READ}, + {"ID_AA64AFR1_EL1", REG_ID_AA64AFR1_EL1, 0x1805a0, SR_READ}, + {"ID_AA64DFR0_EL1", REG_ID_AA64DFR0_EL1, 0x180500, SR_READ}, + {"ID_AA64DFR1_EL1", REG_ID_AA64DFR1_EL1, 0x180520, SR_READ}, + {"ID_AA64ISAR0_EL1", REG_ID_AA64ISAR0_EL1, 0x180600, SR_READ}, + {"ID_AA64ISAR1_EL1", REG_ID_AA64ISAR1_EL1, 0x180620, SR_READ}, + {"ID_AA64MMFR0_EL1", REG_ID_AA64MMFR0_EL1, 0x180700, SR_READ}, + {"ID_AA64MMFR1_EL1", REG_ID_AA64MMFR1_EL1, 0x180720, SR_READ}, + {"ID_AA64MMFR2_EL1", REG_ID_AA64MMFR2_EL1, 0x180740, SR_READ}, + {"ID_AA64PFR0_EL1", REG_ID_AA64PFR0_EL1, 0x180400, SR_READ}, + {"ID_AA64PFR1_EL1", REG_ID_AA64PFR1_EL1, 0x180420, SR_READ}, + {"ID_AA64ZFR0_EL1", REG_ID_AA64ZFR0_EL1, 0x180480, SR_READ}, + {"ID_AFR0_EL1", REG_ID_AFR0_EL1, 0x180160, SR_READ}, + {"ID_DFR0_EL1", REG_ID_DFR0_EL1, 0x180140, SR_READ}, + {"ID_ISAR0_EL1", REG_ID_ISAR0_EL1, 0x180200, SR_READ}, + {"ID_ISAR1_EL1", REG_ID_ISAR1_EL1, 0x180220, SR_READ}, + {"ID_ISAR2_EL1", REG_ID_ISAR2_EL1, 0x180240, SR_READ}, + {"ID_ISAR3_EL1", REG_ID_ISAR3_EL1, 0x180260, SR_READ}, + {"ID_ISAR4_EL1", REG_ID_ISAR4_EL1, 0x180280, SR_READ}, + {"ID_ISAR5_EL1", REG_ID_ISAR5_EL1, 0x1802a0, SR_READ}, + {"ID_ISAR6_EL1", REG_ID_ISAR6_EL1, 0x1802e0, SR_READ}, + {"ID_MMFR0_EL1", REG_ID_MMFR0_EL1, 0x180180, SR_READ}, + {"ID_MMFR1_EL1", REG_ID_MMFR1_EL1, 0x1801a0, SR_READ}, + {"ID_MMFR2_EL1", REG_ID_MMFR2_EL1, 0x1801c0, SR_READ}, + {"ID_MMFR3_EL1", REG_ID_MMFR3_EL1, 0x1801e0, SR_READ}, + {"ID_MMFR4_EL1", REG_ID_MMFR4_EL1, 0x1802c0, SR_READ}, + {"ID_PFR0_EL1", REG_ID_PFR0_EL1, 0x180100, SR_READ}, + {"ID_PFR1_EL1", REG_ID_PFR1_EL1, 0x180120, SR_READ}, + {"ID_PFR2_EL1", REG_ID_PFR2_EL1, 0x180380, SR_READ}, + {"ISR_EL1", REG_ISR_EL1, 0x18c100, SR_READ}, + {"LORC_EL1", REG_LORC_EL1, 0x18a460, SR_READ | SR_WRITE}, + {"LOREA_EL1", REG_LOREA_EL1, 0x18a420, SR_READ | SR_WRITE}, + {"LORID_EL1", REG_LORID_EL1, 0x18a4e0, SR_READ}, + {"LORN_EL1", REG_LORN_EL1, 0x18a440, SR_READ | SR_WRITE}, + {"LORSA_EL1", REG_LORSA_EL1, 0x18a400, SR_READ | SR_WRITE}, + {"MAIR_EL1", REG_MAIR_EL1, 0x18a200, SR_READ | SR_WRITE}, + {"MDCCINT_EL1", REG_MDCCINT_EL1, 0x100200, SR_READ | SR_WRITE}, + {"MDCCSR_EL0", REG_MDCCSR_EL0, 0x130100, SR_READ}, + {"MDRAR_EL1", REG_MDRAR_EL1, 0x101000, SR_READ}, + {"MDSCR_EL1", REG_MDSCR_EL1, 0x100240, SR_READ | SR_WRITE}, + {"MIDR_EL1", REG_MIDR_EL1, 0x180000, SR_READ}, + {"MPAM0_EL1", REG_MPAM0_EL1, 0x18a520, SR_READ | SR_WRITE}, + {"MPAM1_EL1", REG_MPAM1_EL1, 0x18a500, SR_READ | SR_WRITE}, + {"MPAMIDR_EL1", REG_MPAMIDR_EL1, 0x18a480, SR_READ}, + {"MPIDR_EL1", REG_MPIDR_EL1, 0x1800a0, SR_READ}, + {"MVFR0_EL1", REG_MVFR0_EL1, 0x180300, SR_READ}, + {"MVFR1_EL1", REG_MVFR1_EL1, 0x180320, SR_READ}, + {"MVFR2_EL1", REG_MVFR2_EL1, 0x180340, SR_READ}, + {"NZCV", REG_NZCV, 0x1b4200, SR_READ | SR_WRITE}, + {"OSDLR_EL1", REG_OSDLR_EL1, 0x101380, SR_READ | SR_WRITE}, + {"OSDTRRX_EL1", REG_OSDTRRX_EL1, 0x100040, SR_READ | SR_WRITE}, + {"OSDTRTX_EL1", REG_OSDTRTX_EL1, 0x100340, SR_READ | SR_WRITE}, + {"OSECCR_EL1", REG_OSECCR_EL1, 0x100640, SR_READ | SR_WRITE}, + {"OSLAR_EL1", REG_OSLAR_EL1, 0x101080, SR_WRITE}, + {"OSLSR_EL1", REG_OSLSR_EL1, 0x101180, SR_READ}, + {"PAN", REG_PAN, 0x184260, SR_READ | SR_WRITE}, + {"PAR_EL1", REG_PAR_EL1, 0x187400, SR_READ | SR_WRITE}, + {"PMBIDR_EL1", REG_PMBIDR_EL1, 0x189ae0, SR_READ}, + {"PMBLIMITR_EL1", REG_PMBLIMITR_EL1, 0x189a00, SR_READ | SR_WRITE}, + {"PMBPTR_EL1", REG_PMBPTR_EL1, 0x189a20, SR_READ | SR_WRITE}, + {"PMBSR_EL1", REG_PMBSR_EL1, 0x189a60, SR_READ | SR_WRITE}, + {"PMCCFILTR_EL0", REG_PMCCFILTR_EL0, 0x1befe0, SR_READ | SR_WRITE}, + {"PMCCNTR_EL0", REG_PMCCNTR_EL0, 0x1b9d00, SR_READ | SR_WRITE}, + {"PMCEID0_EL0", REG_PMCEID0_EL0, 0x1b9cc0, SR_READ}, + {"PMCEID1_EL0", REG_PMCEID1_EL0, 0x1b9ce0, SR_READ}, + {"PMCNTENCLR_EL0", REG_PMCNTENCLR_EL0, 0x1b9c40, SR_READ | SR_WRITE}, + {"PMCNTENSET_EL0", REG_PMCNTENSET_EL0, 0x1b9c20, SR_READ | SR_WRITE}, + {"PMCR_EL0", REG_PMCR_EL0, 0x1b9c00, SR_READ | SR_WRITE}, + {"PMEVCNTR0_EL0", REG_PMEVCNTR0_EL0, 0x1be800, SR_READ | SR_WRITE}, + {"PMEVCNTR1_EL0", REG_PMEVCNTR1_EL0, 0x1be820, SR_READ | SR_WRITE}, + {"PMEVCNTR2_EL0", REG_PMEVCNTR2_EL0, 0x1be840, SR_READ | SR_WRITE}, + {"PMEVCNTR3_EL0", REG_PMEVCNTR3_EL0, 0x1be860, SR_READ | SR_WRITE}, + {"PMEVCNTR4_EL0", REG_PMEVCNTR4_EL0, 0x1be880, SR_READ | SR_WRITE}, + {"PMEVCNTR5_EL0", REG_PMEVCNTR5_EL0, 0x1be8a0, SR_READ | SR_WRITE}, + {"PMEVCNTR6_EL0", REG_PMEVCNTR6_EL0, 0x1be8c0, SR_READ | SR_WRITE}, + {"PMEVCNTR7_EL0", REG_PMEVCNTR7_EL0, 0x1be8e0, SR_READ | SR_WRITE}, + {"PMEVCNTR8_EL0", REG_PMEVCNTR8_EL0, 0x1be900, SR_READ | SR_WRITE}, + {"PMEVCNTR9_EL0", REG_PMEVCNTR9_EL0, 0x1be920, SR_READ | SR_WRITE}, + {"PMEVCNTR10_EL0", REG_PMEVCNTR10_EL0, 0x1be940, SR_READ | SR_WRITE}, + {"PMEVCNTR11_EL0", REG_PMEVCNTR11_EL0, 0x1be960, SR_READ | SR_WRITE}, + {"PMEVCNTR12_EL0", REG_PMEVCNTR12_EL0, 0x1be980, SR_READ | SR_WRITE}, + {"PMEVCNTR13_EL0", REG_PMEVCNTR13_EL0, 0x1be9a0, SR_READ | SR_WRITE}, + {"PMEVCNTR14_EL0", REG_PMEVCNTR14_EL0, 0x1be9c0, SR_READ | SR_WRITE}, + {"PMEVCNTR15_EL0", REG_PMEVCNTR15_EL0, 0x1be9e0, SR_READ | SR_WRITE}, + {"PMEVCNTR16_EL0", REG_PMEVCNTR16_EL0, 0x1bea00, SR_READ | SR_WRITE}, + {"PMEVCNTR17_EL0", REG_PMEVCNTR17_EL0, 0x1bea20, SR_READ | SR_WRITE}, + {"PMEVCNTR18_EL0", REG_PMEVCNTR18_EL0, 0x1bea40, SR_READ | SR_WRITE}, + {"PMEVCNTR19_EL0", REG_PMEVCNTR19_EL0, 0x1bea60, SR_READ | SR_WRITE}, + {"PMEVCNTR20_EL0", REG_PMEVCNTR20_EL0, 0x1bea80, SR_READ | SR_WRITE}, + {"PMEVCNTR21_EL0", REG_PMEVCNTR21_EL0, 0x1beaa0, SR_READ | SR_WRITE}, + {"PMEVCNTR22_EL0", REG_PMEVCNTR22_EL0, 0x1beac0, SR_READ | SR_WRITE}, + {"PMEVCNTR23_EL0", REG_PMEVCNTR23_EL0, 0x1beae0, SR_READ | SR_WRITE}, + {"PMEVCNTR24_EL0", REG_PMEVCNTR24_EL0, 0x1beb00, SR_READ | SR_WRITE}, + {"PMEVCNTR25_EL0", REG_PMEVCNTR25_EL0, 0x1beb20, SR_READ | SR_WRITE}, + {"PMEVCNTR26_EL0", REG_PMEVCNTR26_EL0, 0x1beb40, SR_READ | SR_WRITE}, + {"PMEVCNTR27_EL0", REG_PMEVCNTR27_EL0, 0x1beb60, SR_READ | SR_WRITE}, + {"PMEVCNTR28_EL0", REG_PMEVCNTR28_EL0, 0x1beb80, SR_READ | SR_WRITE}, + {"PMEVCNTR29_EL0", REG_PMEVCNTR29_EL0, 0x1beba0, SR_READ | SR_WRITE}, + {"PMEVCNTR30_EL0", REG_PMEVCNTR30_EL0, 0x1bebc0, SR_READ | SR_WRITE}, + {"PMEVTYPER0_EL0", REG_PMEVTYPER0_EL0, 0x1bec00, SR_READ | SR_WRITE}, + {"PMEVTYPER1_EL0", REG_PMEVTYPER1_EL0, 0x1bec20, SR_READ | SR_WRITE}, + {"PMEVTYPER2_EL0", REG_PMEVTYPER2_EL0, 0x1bec40, SR_READ | SR_WRITE}, + {"PMEVTYPER3_EL0", REG_PMEVTYPER3_EL0, 0x1bec60, SR_READ | SR_WRITE}, + {"PMEVTYPER4_EL0", REG_PMEVTYPER4_EL0, 0x1bec80, SR_READ | SR_WRITE}, + {"PMEVTYPER5_EL0", REG_PMEVTYPER5_EL0, 0x1beca0, SR_READ | SR_WRITE}, + {"PMEVTYPER6_EL0", REG_PMEVTYPER6_EL0, 0x1becc0, SR_READ | SR_WRITE}, + {"PMEVTYPER7_EL0", REG_PMEVTYPER7_EL0, 0x1bece0, SR_READ | SR_WRITE}, + {"PMEVTYPER8_EL0", REG_PMEVTYPER8_EL0, 0x1bed00, SR_READ | SR_WRITE}, + {"PMEVTYPER9_EL0", REG_PMEVTYPER9_EL0, 0x1bed20, SR_READ | SR_WRITE}, + {"PMEVTYPER10_EL0", REG_PMEVTYPER10_EL0, 0x1bed40, SR_READ | SR_WRITE}, + {"PMEVTYPER11_EL0", REG_PMEVTYPER11_EL0, 0x1bed60, SR_READ | SR_WRITE}, + {"PMEVTYPER12_EL0", REG_PMEVTYPER12_EL0, 0x1bed80, SR_READ | SR_WRITE}, + {"PMEVTYPER13_EL0", REG_PMEVTYPER13_EL0, 0x1beda0, SR_READ | SR_WRITE}, + {"PMEVTYPER14_EL0", REG_PMEVTYPER14_EL0, 0x1bedc0, SR_READ | SR_WRITE}, + {"PMEVTYPER15_EL0", REG_PMEVTYPER15_EL0, 0x1bede0, SR_READ | SR_WRITE}, + {"PMEVTYPER16_EL0", REG_PMEVTYPER16_EL0, 0x1bee00, SR_READ | SR_WRITE}, + {"PMEVTYPER17_EL0", REG_PMEVTYPER17_EL0, 0x1bee20, SR_READ | SR_WRITE}, + {"PMEVTYPER18_EL0", REG_PMEVTYPER18_EL0, 0x1bee40, SR_READ | SR_WRITE}, + {"PMEVTYPER19_EL0", REG_PMEVTYPER19_EL0, 0x1bee60, SR_READ | SR_WRITE}, + {"PMEVTYPER20_EL0", REG_PMEVTYPER20_EL0, 0x1bee80, SR_READ | SR_WRITE}, + {"PMEVTYPER21_EL0", REG_PMEVTYPER21_EL0, 0x1beea0, SR_READ | SR_WRITE}, + {"PMEVTYPER22_EL0", REG_PMEVTYPER22_EL0, 0x1beec0, SR_READ | SR_WRITE}, + {"PMEVTYPER23_EL0", REG_PMEVTYPER23_EL0, 0x1beee0, SR_READ | SR_WRITE}, + {"PMEVTYPER24_EL0", REG_PMEVTYPER24_EL0, 0x1bef00, SR_READ | SR_WRITE}, + {"PMEVTYPER25_EL0", REG_PMEVTYPER25_EL0, 0x1bef20, SR_READ | SR_WRITE}, + {"PMEVTYPER26_EL0", REG_PMEVTYPER26_EL0, 0x1bef40, SR_READ | SR_WRITE}, + {"PMEVTYPER27_EL0", REG_PMEVTYPER27_EL0, 0x1bef60, SR_READ | SR_WRITE}, + {"PMEVTYPER28_EL0", REG_PMEVTYPER28_EL0, 0x1bef80, SR_READ | SR_WRITE}, + {"PMEVTYPER29_EL0", REG_PMEVTYPER29_EL0, 0x1befa0, SR_READ | SR_WRITE}, + {"PMEVTYPER30_EL0", REG_PMEVTYPER30_EL0, 0x1befc0, SR_READ | SR_WRITE}, + {"PMINTENCLR_EL1", REG_PMINTENCLR_EL1, 0x189e40, SR_READ | SR_WRITE}, + {"PMINTENSET_EL1", REG_PMINTENSET_EL1, 0x189e20, SR_READ | SR_WRITE}, + {"PMMIR_EL1", REG_PMMIR_EL1, 0x189ec0, SR_READ}, + {"PMOVSCLR_EL0", REG_PMOVSCLR_EL0, 0x1b9c60, SR_READ | SR_WRITE}, + {"PMOVSSET_EL0", REG_PMOVSSET_EL0, 0x1b9e60, SR_READ | SR_WRITE}, + {"PMSCR_EL1", REG_PMSCR_EL1, 0x189900, SR_READ | SR_WRITE}, + {"PMSELR_EL0", REG_PMSELR_EL0, 0x1b9ca0, SR_READ | SR_WRITE}, + {"PMSEVFR_EL1", REG_PMSEVFR_EL1, 0x1899a0, SR_READ | SR_WRITE}, + {"PMSFCR_EL1", REG_PMSFCR_EL1, 0x189980, SR_READ | SR_WRITE}, + {"PMSICR_EL1", REG_PMSICR_EL1, 0x189940, SR_READ | SR_WRITE}, + {"PMSIDR_EL1", REG_PMSIDR_EL1, 0x1899e0, SR_READ}, + {"PMSIRR_EL1", REG_PMSIRR_EL1, 0x189960, SR_READ | SR_WRITE}, + {"PMSLATFR_EL1", REG_PMSLATFR_EL1, 0x1899c0, SR_READ | SR_WRITE}, + {"PMSWINC_EL0", REG_PMSWINC_EL0, 0x1b9c80, SR_WRITE}, + {"PMUSERENR_EL0", REG_PMUSERENR_EL0, 0x1b9e00, SR_READ | SR_WRITE}, + {"PMXEVCNTR_EL0", REG_PMXEVCNTR_EL0, 0x1b9d40, SR_READ | SR_WRITE}, + {"PMXEVTYPER_EL0", REG_PMXEVTYPER_EL0, 0x1b9d20, SR_READ | SR_WRITE}, + {"REVIDR_EL1", REG_REVIDR_EL1, 0x1800c0, SR_READ}, + {"RGSR_EL1", REG_RGSR_EL1, 0x1810a0, SR_READ | SR_WRITE}, + {"RMR_EL1", REG_RMR_EL1, 0x18c040, SR_READ | SR_WRITE}, + {"RNDR", REG_RNDR, 0x1b2400, SR_READ}, + {"RNDRRS", REG_RNDRRS, 0x1b2420, SR_READ}, + {"RVBAR_EL1", REG_RVBAR_EL1, 0x18c020, SR_READ}, + {"SCTLR_EL1", REG_SCTLR_EL1, 0x181000, SR_READ | SR_WRITE}, + {"SCXTNUM_EL0", REG_SCXTNUM_EL0, 0x1bd0e0, SR_READ | SR_WRITE}, + {"SCXTNUM_EL1", REG_SCXTNUM_EL1, 0x18d0e0, SR_READ | SR_WRITE}, + {"SP_EL0", REG_SP_EL0, 0x184100, SR_READ | SR_WRITE}, + {"SP_EL1", REG_SP_EL1, 0x1c4100, SR_READ | SR_WRITE}, + {"SPSel", REG_SPSel, 0x184200, SR_READ | SR_WRITE}, + {"SPSR_abt", REG_SPSR_abt, 0x1c4320, SR_READ | SR_WRITE}, + {"SPSR_EL1", REG_SPSR_EL1, 0x184000, SR_READ | SR_WRITE}, + {"SPSR_fiq", REG_SPSR_fiq, 0x1c4360, SR_READ | SR_WRITE}, + {"SPSR_irq", REG_SPSR_irq, 0x1c4300, SR_READ | SR_WRITE}, + {"SPSR_und", REG_SPSR_und, 0x1c4340, SR_READ | SR_WRITE}, + {"SSBS", REG_SSBS, 0x1b42c0, SR_READ | SR_WRITE}, + {"TCO", REG_TCO, 0x1b42e0, SR_READ | SR_WRITE}, + {"TCR_EL1", REG_TCR_EL1, 0x182040, SR_READ | SR_WRITE}, + {"TFSR_EL1", REG_TFSR_EL1, 0x185600, SR_READ | SR_WRITE}, + {"TFSRE0_EL1", REG_TFSRE0_EL1, 0x185620, SR_READ | SR_WRITE}, + {"TPIDR_EL0", REG_TPIDR_EL0, 0x1bd040, SR_READ | SR_WRITE}, + {"TPIDR_EL1", REG_TPIDR_EL1, 0x18d080, SR_READ | SR_WRITE}, + {"TPIDRRO_EL0", REG_TPIDRRO_EL0, 0x1bd060, SR_READ | SR_WRITE}, + {"TRFCR_EL1", REG_TRFCR_EL1, 0x181220, SR_READ | SR_WRITE}, + {"TTBR0_EL1", REG_TTBR0_EL1, 0x182000, SR_READ | SR_WRITE}, + {"TTBR1_EL1", REG_TTBR1_EL1, 0x182020, SR_READ | SR_WRITE}, + {"UAO", REG_UAO, 0x184280, SR_READ | SR_WRITE}, + {"VBAR_EL1", REG_VBAR_EL1, 0x18c000, SR_READ | SR_WRITE}, + {"ZCR_EL1", REG_ZCR_EL1, 0x181200, SR_READ | SR_WRITE}, +} + +func SysRegEnc(r int16) (string, uint32, uint8) { + // The automatic generator guarantees that the order + // of Reg in SystemReg struct is consistent with the + // order of system register declarations + if r <= SYSREG_BEGIN || r >= SYSREG_END { + return "", 0, 0 + } + v := SystemReg[r-SYSREG_BEGIN-1] + return v.Name, v.Enc, v.AccessFlags +} diff --git a/src/cmd/internal/obj/data.go b/src/cmd/internal/obj/data.go new file mode 100644 index 0000000..bcba53c --- /dev/null +++ b/src/cmd/internal/obj/data.go @@ -0,0 +1,207 @@ +// Derived from Inferno utils/6l/obj.c and utils/6l/span.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/obj.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package obj + +import ( + "cmd/internal/objabi" + "log" + "math" +) + +// Grow increases the length of s.P to lsiz. +func (s *LSym) Grow(lsiz int64) { + siz := int(lsiz) + if int64(siz) != lsiz { + log.Fatalf("LSym.Grow size %d too long", lsiz) + } + if len(s.P) >= siz { + return + } + s.P = append(s.P, make([]byte, siz-len(s.P))...) +} + +// GrowCap increases the capacity of s.P to c. +func (s *LSym) GrowCap(c int64) { + if int64(cap(s.P)) >= c { + return + } + if s.P == nil { + s.P = make([]byte, 0, c) + return + } + b := make([]byte, len(s.P), c) + copy(b, s.P) + s.P = b +} + +// prepwrite prepares to write data of size siz into s at offset off. +func (s *LSym) prepwrite(ctxt *Link, off int64, siz int) { + if off < 0 || siz < 0 || off >= 1<<30 { + ctxt.Diag("prepwrite: bad off=%d siz=%d s=%v", off, siz, s) + } + switch s.Type { + case objabi.Sxxx, objabi.SBSS: + s.Type = objabi.SDATA + case objabi.SNOPTRBSS: + s.Type = objabi.SNOPTRDATA + case objabi.STLSBSS: + ctxt.Diag("cannot supply data for %v var %v", s.Type, s.Name) + } + l := off + int64(siz) + s.Grow(l) + if l > s.Size { + s.Size = l + } +} + +// WriteFloat32 writes f into s at offset off. +func (s *LSym) WriteFloat32(ctxt *Link, off int64, f float32) { + s.prepwrite(ctxt, off, 4) + ctxt.Arch.ByteOrder.PutUint32(s.P[off:], math.Float32bits(f)) +} + +// WriteFloat64 writes f into s at offset off. +func (s *LSym) WriteFloat64(ctxt *Link, off int64, f float64) { + s.prepwrite(ctxt, off, 8) + ctxt.Arch.ByteOrder.PutUint64(s.P[off:], math.Float64bits(f)) +} + +// WriteInt writes an integer i of size siz into s at offset off. +func (s *LSym) WriteInt(ctxt *Link, off int64, siz int, i int64) { + s.prepwrite(ctxt, off, siz) + switch siz { + default: + ctxt.Diag("WriteInt: bad integer size: %d", siz) + case 1: + s.P[off] = byte(i) + case 2: + ctxt.Arch.ByteOrder.PutUint16(s.P[off:], uint16(i)) + case 4: + ctxt.Arch.ByteOrder.PutUint32(s.P[off:], uint32(i)) + case 8: + ctxt.Arch.ByteOrder.PutUint64(s.P[off:], uint64(i)) + } +} + +func (s *LSym) writeAddr(ctxt *Link, off int64, siz int, rsym *LSym, roff int64, rtype objabi.RelocType) { + // Allow 4-byte addresses for DWARF. + if siz != ctxt.Arch.PtrSize && siz != 4 { + ctxt.Diag("WriteAddr: bad address size %d in %s", siz, s.Name) + } + s.prepwrite(ctxt, off, siz) + r := Addrel(s) + r.Off = int32(off) + if int64(r.Off) != off { + ctxt.Diag("WriteAddr: off overflow %d in %s", off, s.Name) + } + r.Siz = uint8(siz) + r.Sym = rsym + r.Type = rtype + r.Add = roff +} + +// WriteAddr writes an address of size siz into s at offset off. +// rsym and roff specify the relocation for the address. +func (s *LSym) WriteAddr(ctxt *Link, off int64, siz int, rsym *LSym, roff int64) { + s.writeAddr(ctxt, off, siz, rsym, roff, objabi.R_ADDR) +} + +// WriteWeakAddr writes an address of size siz into s at offset off. +// rsym and roff specify the relocation for the address. +// This is a weak reference. +func (s *LSym) WriteWeakAddr(ctxt *Link, off int64, siz int, rsym *LSym, roff int64) { + s.writeAddr(ctxt, off, siz, rsym, roff, objabi.R_WEAKADDR) +} + +// WriteCURelativeAddr writes a pointer-sized address into s at offset off. +// rsym and roff specify the relocation for the address which will be +// resolved by the linker to an offset from the DW_AT_low_pc attribute of +// the DWARF Compile Unit of rsym. +func (s *LSym) WriteCURelativeAddr(ctxt *Link, off int64, rsym *LSym, roff int64) { + s.writeAddr(ctxt, off, ctxt.Arch.PtrSize, rsym, roff, objabi.R_ADDRCUOFF) +} + +// WriteOff writes a 4 byte offset to rsym+roff into s at offset off. +// After linking the 4 bytes stored at s+off will be +// rsym+roff-(start of section that s is in). +func (s *LSym) WriteOff(ctxt *Link, off int64, rsym *LSym, roff int64) { + s.prepwrite(ctxt, off, 4) + r := Addrel(s) + r.Off = int32(off) + if int64(r.Off) != off { + ctxt.Diag("WriteOff: off overflow %d in %s", off, s.Name) + } + r.Siz = 4 + r.Sym = rsym + r.Type = objabi.R_ADDROFF + r.Add = roff +} + +// WriteWeakOff writes a weak 4 byte offset to rsym+roff into s at offset off. +// After linking the 4 bytes stored at s+off will be +// rsym+roff-(start of section that s is in). +func (s *LSym) WriteWeakOff(ctxt *Link, off int64, rsym *LSym, roff int64) { + s.prepwrite(ctxt, off, 4) + r := Addrel(s) + r.Off = int32(off) + if int64(r.Off) != off { + ctxt.Diag("WriteOff: off overflow %d in %s", off, s.Name) + } + r.Siz = 4 + r.Sym = rsym + r.Type = objabi.R_WEAKADDROFF + r.Add = roff +} + +// WriteString writes a string of size siz into s at offset off. +func (s *LSym) WriteString(ctxt *Link, off int64, siz int, str string) { + if siz < len(str) { + ctxt.Diag("WriteString: bad string size: %d < %d", siz, len(str)) + } + s.prepwrite(ctxt, off, siz) + copy(s.P[off:off+int64(siz)], str) +} + +// WriteBytes writes a slice of bytes into s at offset off. +func (s *LSym) WriteBytes(ctxt *Link, off int64, b []byte) int64 { + s.prepwrite(ctxt, off, len(b)) + copy(s.P[off:], b) + return off + int64(len(b)) +} + +func Addrel(s *LSym) *Reloc { + if s.R == nil { + s.R = make([]Reloc, 0, 4) + } + s.R = append(s.R, Reloc{}) + return &s.R[len(s.R)-1] +} diff --git a/src/cmd/internal/obj/dwarf.go b/src/cmd/internal/obj/dwarf.go new file mode 100644 index 0000000..a9c13fd --- /dev/null +++ b/src/cmd/internal/obj/dwarf.go @@ -0,0 +1,709 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Writes dwarf information to object files. + +package obj + +import ( + "cmd/internal/dwarf" + "cmd/internal/objabi" + "cmd/internal/src" + "fmt" + "sort" + "sync" +) + +// Generate a sequence of opcodes that is as short as possible. +// See section 6.2.5 +const ( + LINE_BASE = -4 + LINE_RANGE = 10 + PC_RANGE = (255 - OPCODE_BASE) / LINE_RANGE + OPCODE_BASE = 11 +) + +// generateDebugLinesSymbol fills the debug lines symbol of a given function. +// +// It's worth noting that this function doesn't generate the full debug_lines +// DWARF section, saving that for the linker. This function just generates the +// state machine part of debug_lines. The full table is generated by the +// linker. Also, we use the file numbers from the full package (not just the +// function in question) when generating the state machine. We do this so we +// don't have to do a fixup on the indices when writing the full section. +func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { + dctxt := dwCtxt{ctxt} + + // Emit a LNE_set_address extended opcode, so as to establish the + // starting text address of this function. + dctxt.AddUint8(lines, 0) + dwarf.Uleb128put(dctxt, lines, 1+int64(ctxt.Arch.PtrSize)) + dctxt.AddUint8(lines, dwarf.DW_LNE_set_address) + dctxt.AddAddress(lines, s, 0) + + // Set up the debug_lines state machine to the default values + // we expect at the start of a new sequence. + stmt := true + line := int64(1) + pc := s.Func().Text.Pc + var lastpc int64 // last PC written to line table, not last PC in func + name := "" + prologue, wrotePrologue := false, false + // Walk the progs, generating the DWARF table. + for p := s.Func().Text; p != nil; p = p.Link { + prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd) + // If we're not at a real instruction, keep looping! + if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == p.Pc) { + continue + } + newStmt := p.Pos.IsStmt() != src.PosNotStmt + newName, newLine := ctxt.getFileSymbolAndLine(p.Pos) + + // Output debug info. + wrote := false + if name != newName { + newFile := ctxt.PosTable.FileIndex(newName) + 1 // 1 indexing for the table. + dctxt.AddUint8(lines, dwarf.DW_LNS_set_file) + dwarf.Uleb128put(dctxt, lines, int64(newFile)) + name = newName + wrote = true + } + if prologue && !wrotePrologue { + dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end)) + wrotePrologue = true + wrote = true + } + if stmt != newStmt { + dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt)) + stmt = newStmt + wrote = true + } + + if line != int64(newLine) || wrote { + pcdelta := p.Pc - pc + lastpc = p.Pc + putpclcdelta(ctxt, dctxt, lines, uint64(pcdelta), int64(newLine)-line) + line, pc = int64(newLine), p.Pc + } + } + + // Because these symbols will be concatenated together by the + // linker, we need to reset the state machine that controls the + // debug symbols. Do this using an end-of-sequence operator. + // + // Note: at one point in time, Delve did not support multiple end + // sequence ops within a compilation unit (bug for this: + // https://github.com/go-delve/delve/issues/1694), however the bug + // has since been fixed (Oct 2019). + // + // Issue 38192: the DWARF standard specifies that when you issue + // an end-sequence op, the PC value should be one past the last + // text address in the translation unit, so apply a delta to the + // text address before the end sequence op. If this isn't done, + // GDB will assign a line number of zero the last row in the line + // table, which we don't want. + lastlen := uint64(s.Size - (lastpc - s.Func().Text.Pc)) + dctxt.AddUint8(lines, dwarf.DW_LNS_advance_pc) + dwarf.Uleb128put(dctxt, lines, int64(lastlen)) + dctxt.AddUint8(lines, 0) // start extended opcode + dwarf.Uleb128put(dctxt, lines, 1) + dctxt.AddUint8(lines, dwarf.DW_LNE_end_sequence) +} + +func putpclcdelta(linkctxt *Link, dctxt dwCtxt, s *LSym, deltaPC uint64, deltaLC int64) { + // Choose a special opcode that minimizes the number of bytes needed to + // encode the remaining PC delta and LC delta. + var opcode int64 + if deltaLC < LINE_BASE { + if deltaPC >= PC_RANGE { + opcode = OPCODE_BASE + (LINE_RANGE * PC_RANGE) + } else { + opcode = OPCODE_BASE + (LINE_RANGE * int64(deltaPC)) + } + } else if deltaLC < LINE_BASE+LINE_RANGE { + if deltaPC >= PC_RANGE { + opcode = OPCODE_BASE + (deltaLC - LINE_BASE) + (LINE_RANGE * PC_RANGE) + if opcode > 255 { + opcode -= LINE_RANGE + } + } else { + opcode = OPCODE_BASE + (deltaLC - LINE_BASE) + (LINE_RANGE * int64(deltaPC)) + } + } else { + if deltaPC <= PC_RANGE { + opcode = OPCODE_BASE + (LINE_RANGE - 1) + (LINE_RANGE * int64(deltaPC)) + if opcode > 255 { + opcode = 255 + } + } else { + // Use opcode 249 (pc+=23, lc+=5) or 255 (pc+=24, lc+=1). + // + // Let x=deltaPC-PC_RANGE. If we use opcode 255, x will be the remaining + // deltaPC that we need to encode separately before emitting 255. If we + // use opcode 249, we will need to encode x+1. If x+1 takes one more + // byte to encode than x, then we use opcode 255. + // + // In all other cases x and x+1 take the same number of bytes to encode, + // so we use opcode 249, which may save us a byte in encoding deltaLC, + // for similar reasons. + switch deltaPC - PC_RANGE { + // PC_RANGE is the largest deltaPC we can encode in one byte, using + // DW_LNS_const_add_pc. + // + // (1<<16)-1 is the largest deltaPC we can encode in three bytes, using + // DW_LNS_fixed_advance_pc. + // + // (1<<(7n))-1 is the largest deltaPC we can encode in n+1 bytes for + // n=1,3,4,5,..., using DW_LNS_advance_pc. + case PC_RANGE, (1 << 7) - 1, (1 << 16) - 1, (1 << 21) - 1, (1 << 28) - 1, + (1 << 35) - 1, (1 << 42) - 1, (1 << 49) - 1, (1 << 56) - 1, (1 << 63) - 1: + opcode = 255 + default: + opcode = OPCODE_BASE + LINE_RANGE*PC_RANGE - 1 // 249 + } + } + } + if opcode < OPCODE_BASE || opcode > 255 { + panic(fmt.Sprintf("produced invalid special opcode %d", opcode)) + } + + // Subtract from deltaPC and deltaLC the amounts that the opcode will add. + deltaPC -= uint64((opcode - OPCODE_BASE) / LINE_RANGE) + deltaLC -= (opcode-OPCODE_BASE)%LINE_RANGE + LINE_BASE + + // Encode deltaPC. + if deltaPC != 0 { + if deltaPC <= PC_RANGE { + // Adjust the opcode so that we can use the 1-byte DW_LNS_const_add_pc + // instruction. + opcode -= LINE_RANGE * int64(PC_RANGE-deltaPC) + if opcode < OPCODE_BASE { + panic(fmt.Sprintf("produced invalid special opcode %d", opcode)) + } + dctxt.AddUint8(s, dwarf.DW_LNS_const_add_pc) + } else if (1<<14) <= deltaPC && deltaPC < (1<<16) { + dctxt.AddUint8(s, dwarf.DW_LNS_fixed_advance_pc) + dctxt.AddUint16(s, uint16(deltaPC)) + } else { + dctxt.AddUint8(s, dwarf.DW_LNS_advance_pc) + dwarf.Uleb128put(dctxt, s, int64(deltaPC)) + } + } + + // Encode deltaLC. + if deltaLC != 0 { + dctxt.AddUint8(s, dwarf.DW_LNS_advance_line) + dwarf.Sleb128put(dctxt, s, deltaLC) + } + + // Output the special opcode. + dctxt.AddUint8(s, uint8(opcode)) +} + +// implement dwarf.Context +type dwCtxt struct{ *Link } + +func (c dwCtxt) PtrSize() int { + return c.Arch.PtrSize +} +func (c dwCtxt) AddInt(s dwarf.Sym, size int, i int64) { + ls := s.(*LSym) + ls.WriteInt(c.Link, ls.Size, size, i) +} +func (c dwCtxt) AddUint16(s dwarf.Sym, i uint16) { + c.AddInt(s, 2, int64(i)) +} +func (c dwCtxt) AddUint8(s dwarf.Sym, i uint8) { + b := []byte{byte(i)} + c.AddBytes(s, b) +} +func (c dwCtxt) AddBytes(s dwarf.Sym, b []byte) { + ls := s.(*LSym) + ls.WriteBytes(c.Link, ls.Size, b) +} +func (c dwCtxt) AddString(s dwarf.Sym, v string) { + ls := s.(*LSym) + ls.WriteString(c.Link, ls.Size, len(v), v) + ls.WriteInt(c.Link, ls.Size, 1, 0) +} +func (c dwCtxt) AddAddress(s dwarf.Sym, data interface{}, value int64) { + ls := s.(*LSym) + size := c.PtrSize() + if data != nil { + rsym := data.(*LSym) + ls.WriteAddr(c.Link, ls.Size, size, rsym, value) + } else { + ls.WriteInt(c.Link, ls.Size, size, value) + } +} +func (c dwCtxt) AddCURelativeAddress(s dwarf.Sym, data interface{}, value int64) { + ls := s.(*LSym) + rsym := data.(*LSym) + ls.WriteCURelativeAddr(c.Link, ls.Size, rsym, value) +} +func (c dwCtxt) AddSectionOffset(s dwarf.Sym, size int, t interface{}, ofs int64) { + panic("should be used only in the linker") +} +func (c dwCtxt) AddDWARFAddrSectionOffset(s dwarf.Sym, t interface{}, ofs int64) { + size := 4 + if isDwarf64(c.Link) { + size = 8 + } + + ls := s.(*LSym) + rsym := t.(*LSym) + ls.WriteAddr(c.Link, ls.Size, size, rsym, ofs) + r := &ls.R[len(ls.R)-1] + r.Type = objabi.R_DWARFSECREF +} + +func (c dwCtxt) AddFileRef(s dwarf.Sym, f interface{}) { + ls := s.(*LSym) + rsym := f.(*LSym) + fidx := c.Link.PosTable.FileIndex(rsym.Name) + // Note the +1 here -- the value we're writing is going to be an + // index into the DWARF line table file section, whose entries + // are numbered starting at 1, not 0. + ls.WriteInt(c.Link, ls.Size, 4, int64(fidx+1)) +} + +func (c dwCtxt) CurrentOffset(s dwarf.Sym) int64 { + ls := s.(*LSym) + return ls.Size +} + +// Here "from" is a symbol corresponding to an inlined or concrete +// function, "to" is the symbol for the corresponding abstract +// function, and "dclIdx" is the index of the symbol of interest with +// respect to the Dcl slice of the original pre-optimization version +// of the inlined function. +func (c dwCtxt) RecordDclReference(from dwarf.Sym, to dwarf.Sym, dclIdx int, inlIndex int) { + ls := from.(*LSym) + tls := to.(*LSym) + ridx := len(ls.R) - 1 + c.Link.DwFixups.ReferenceChildDIE(ls, ridx, tls, dclIdx, inlIndex) +} + +func (c dwCtxt) RecordChildDieOffsets(s dwarf.Sym, vars []*dwarf.Var, offsets []int32) { + ls := s.(*LSym) + c.Link.DwFixups.RegisterChildDIEOffsets(ls, vars, offsets) +} + +func (c dwCtxt) Logf(format string, args ...interface{}) { + c.Link.Logf(format, args...) +} + +func isDwarf64(ctxt *Link) bool { + return ctxt.Headtype == objabi.Haix +} + +func (ctxt *Link) dwarfSym(s *LSym) (dwarfInfoSym, dwarfLocSym, dwarfRangesSym, dwarfAbsFnSym, dwarfDebugLines *LSym) { + if s.Type != objabi.STEXT { + ctxt.Diag("dwarfSym of non-TEXT %v", s) + } + fn := s.Func() + if fn.dwarfInfoSym == nil { + fn.dwarfInfoSym = &LSym{ + Type: objabi.SDWARFFCN, + } + if ctxt.Flag_locationlists { + fn.dwarfLocSym = &LSym{ + Type: objabi.SDWARFLOC, + } + } + fn.dwarfRangesSym = &LSym{ + Type: objabi.SDWARFRANGE, + } + fn.dwarfDebugLinesSym = &LSym{ + Type: objabi.SDWARFLINES, + } + if s.WasInlined() { + fn.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) + } + } + return fn.dwarfInfoSym, fn.dwarfLocSym, fn.dwarfRangesSym, fn.dwarfAbsFnSym, fn.dwarfDebugLinesSym +} + +func (s *LSym) Length(dwarfContext interface{}) int64 { + return s.Size +} + +// fileSymbol returns a symbol corresponding to the source file of the +// first instruction (prog) of the specified function. This will +// presumably be the file in which the function is defined. +func (ctxt *Link) fileSymbol(fn *LSym) *LSym { + p := fn.Func().Text + if p != nil { + f, _ := ctxt.getFileSymbolAndLine(p.Pos) + fsym := ctxt.Lookup(f) + return fsym + } + return nil +} + +// populateDWARF fills in the DWARF Debugging Information Entries for +// TEXT symbol 's'. The various DWARF symbols must already have been +// initialized in InitTextSym. +func (ctxt *Link) populateDWARF(curfn interface{}, s *LSym, myimportpath string) { + info, loc, ranges, absfunc, lines := ctxt.dwarfSym(s) + if info.Size != 0 { + ctxt.Diag("makeFuncDebugEntry double process %v", s) + } + var scopes []dwarf.Scope + var inlcalls dwarf.InlCalls + if ctxt.DebugInfo != nil { + scopes, inlcalls = ctxt.DebugInfo(s, info, curfn) + } + var err error + dwctxt := dwCtxt{ctxt} + filesym := ctxt.fileSymbol(s) + fnstate := &dwarf.FnState{ + Name: s.Name, + Importpath: myimportpath, + Info: info, + Filesym: filesym, + Loc: loc, + Ranges: ranges, + Absfn: absfunc, + StartPC: s, + Size: s.Size, + External: !s.Static(), + Scopes: scopes, + InlCalls: inlcalls, + UseBASEntries: ctxt.UseBASEntries, + } + if absfunc != nil { + err = dwarf.PutAbstractFunc(dwctxt, fnstate) + if err != nil { + ctxt.Diag("emitting DWARF for %s failed: %v", s.Name, err) + } + err = dwarf.PutConcreteFunc(dwctxt, fnstate, s.Wrapper()) + } else { + err = dwarf.PutDefaultFunc(dwctxt, fnstate, s.Wrapper()) + } + if err != nil { + ctxt.Diag("emitting DWARF for %s failed: %v", s.Name, err) + } + // Fill in the debug lines symbol. + ctxt.generateDebugLinesSymbol(s, lines) +} + +// DwarfIntConst creates a link symbol for an integer constant with the +// given name, type and value. +func (ctxt *Link) DwarfIntConst(myimportpath, name, typename string, val int64) { + if myimportpath == "" { + return + } + s := ctxt.LookupInit(dwarf.ConstInfoPrefix+myimportpath, func(s *LSym) { + s.Type = objabi.SDWARFCONST + ctxt.Data = append(ctxt.Data, s) + }) + dwarf.PutIntConst(dwCtxt{ctxt}, s, ctxt.Lookup(dwarf.InfoPrefix+typename), myimportpath+"."+name, val) +} + +// DwarfGlobal creates a link symbol containing a DWARF entry for +// a global variable. +func (ctxt *Link) DwarfGlobal(myimportpath, typename string, varSym *LSym) { + if myimportpath == "" || varSym.Local() { + return + } + varname := varSym.Name + dieSymName := dwarf.InfoPrefix + varname + dieSym := ctxt.LookupInit(dieSymName, func(s *LSym) { + s.Type = objabi.SDWARFVAR + s.Set(AttrDuplicateOK, true) // needed for shared linkage + ctxt.Data = append(ctxt.Data, s) + }) + typeSym := ctxt.Lookup(dwarf.InfoPrefix + typename) + dwarf.PutGlobal(dwCtxt{ctxt}, dieSym, typeSym, varSym, varname) +} + +func (ctxt *Link) DwarfAbstractFunc(curfn interface{}, s *LSym, myimportpath string) { + absfn := ctxt.DwFixups.AbsFuncDwarfSym(s) + if absfn.Size != 0 { + ctxt.Diag("internal error: DwarfAbstractFunc double process %v", s) + } + if s.Func() == nil { + s.NewFuncInfo() + } + scopes, _ := ctxt.DebugInfo(s, absfn, curfn) + dwctxt := dwCtxt{ctxt} + filesym := ctxt.fileSymbol(s) + fnstate := dwarf.FnState{ + Name: s.Name, + Importpath: myimportpath, + Info: absfn, + Filesym: filesym, + Absfn: absfn, + External: !s.Static(), + Scopes: scopes, + UseBASEntries: ctxt.UseBASEntries, + } + if err := dwarf.PutAbstractFunc(dwctxt, &fnstate); err != nil { + ctxt.Diag("emitting DWARF for %s failed: %v", s.Name, err) + } +} + +// This table is designed to aid in the creation of references between +// DWARF subprogram DIEs. +// +// In most cases when one DWARF DIE has to refer to another DWARF DIE, +// the target of the reference has an LSym, which makes it easy to use +// the existing relocation mechanism. For DWARF inlined routine DIEs, +// however, the subprogram DIE has to refer to a child +// parameter/variable DIE of the abstract subprogram. This child DIE +// doesn't have an LSym, and also of interest is the fact that when +// DWARF generation is happening for inlined function F within caller +// G, it's possible that DWARF generation hasn't happened yet for F, +// so there is no way to know the offset of a child DIE within F's +// abstract function. Making matters more complex, each inlined +// instance of F may refer to a subset of the original F's variables +// (depending on what happens with optimization, some vars may be +// eliminated). +// +// The fixup table below helps overcome this hurdle. At the point +// where a parameter/variable reference is made (via a call to +// "ReferenceChildDIE"), a fixup record is generate that records +// the relocation that is targeting that child variable. At a later +// point when the abstract function DIE is emitted, there will be +// a call to "RegisterChildDIEOffsets", at which point the offsets +// needed to apply fixups are captured. Finally, once the parallel +// portion of the compilation is done, fixups can actually be applied +// during the "Finalize" method (this can't be done during the +// parallel portion of the compile due to the possibility of data +// races). +// +// This table is also used to record the "precursor" function node for +// each function that is the target of an inline -- child DIE references +// have to be made with respect to the original pre-optimization +// version of the function (to allow for the fact that each inlined +// body may be optimized differently). +type DwarfFixupTable struct { + ctxt *Link + mu sync.Mutex + symtab map[*LSym]int // maps abstract fn LSYM to index in svec + svec []symFixups + precursor map[*LSym]fnState // maps fn Lsym to precursor Node, absfn sym +} + +type symFixups struct { + fixups []relFixup + doffsets []declOffset + inlIndex int32 + defseen bool +} + +type declOffset struct { + // Index of variable within DCL list of pre-optimization function + dclIdx int32 + // Offset of var's child DIE with respect to containing subprogram DIE + offset int32 +} + +type relFixup struct { + refsym *LSym + relidx int32 + dclidx int32 +} + +type fnState struct { + // precursor function (really *gc.Node) + precursor interface{} + // abstract function symbol + absfn *LSym +} + +func NewDwarfFixupTable(ctxt *Link) *DwarfFixupTable { + return &DwarfFixupTable{ + ctxt: ctxt, + symtab: make(map[*LSym]int), + precursor: make(map[*LSym]fnState), + } +} + +func (ft *DwarfFixupTable) GetPrecursorFunc(s *LSym) interface{} { + if fnstate, found := ft.precursor[s]; found { + return fnstate.precursor + } + return nil +} + +func (ft *DwarfFixupTable) SetPrecursorFunc(s *LSym, fn interface{}) { + if _, found := ft.precursor[s]; found { + ft.ctxt.Diag("internal error: DwarfFixupTable.SetPrecursorFunc double call on %v", s) + } + + // initialize abstract function symbol now. This is done here so + // as to avoid data races later on during the parallel portion of + // the back end. + absfn := ft.ctxt.LookupDerived(s, dwarf.InfoPrefix+s.Name+dwarf.AbstractFuncSuffix) + absfn.Set(AttrDuplicateOK, true) + absfn.Type = objabi.SDWARFABSFCN + ft.ctxt.Data = append(ft.ctxt.Data, absfn) + + // In the case of "late" inlining (inlines that happen during + // wrapper generation as opposed to the main inlining phase) it's + // possible that we didn't cache the abstract function sym for the + // text symbol -- do so now if needed. See issue 38068. + if fn := s.Func(); fn != nil && fn.dwarfAbsFnSym == nil { + fn.dwarfAbsFnSym = absfn + } + + ft.precursor[s] = fnState{precursor: fn, absfn: absfn} +} + +// Make a note of a child DIE reference: relocation 'ridx' within symbol 's' +// is targeting child 'c' of DIE with symbol 'tgt'. +func (ft *DwarfFixupTable) ReferenceChildDIE(s *LSym, ridx int, tgt *LSym, dclidx int, inlIndex int) { + // Protect against concurrent access if multiple backend workers + ft.mu.Lock() + defer ft.mu.Unlock() + + // Create entry for symbol if not already present. + idx, found := ft.symtab[tgt] + if !found { + ft.svec = append(ft.svec, symFixups{inlIndex: int32(inlIndex)}) + idx = len(ft.svec) - 1 + ft.symtab[tgt] = idx + } + + // Do we have child DIE offsets available? If so, then apply them, + // otherwise create a fixup record. + sf := &ft.svec[idx] + if len(sf.doffsets) > 0 { + found := false + for _, do := range sf.doffsets { + if do.dclIdx == int32(dclidx) { + off := do.offset + s.R[ridx].Add += int64(off) + found = true + break + } + } + if !found { + ft.ctxt.Diag("internal error: DwarfFixupTable.ReferenceChildDIE unable to locate child DIE offset for dclIdx=%d src=%v tgt=%v", dclidx, s, tgt) + } + } else { + sf.fixups = append(sf.fixups, relFixup{s, int32(ridx), int32(dclidx)}) + } +} + +// Called once DWARF generation is complete for a given abstract function, +// whose children might have been referenced via a call above. Stores +// the offsets for any child DIEs (vars, params) so that they can be +// consumed later in on DwarfFixupTable.Finalize, which applies any +// outstanding fixups. +func (ft *DwarfFixupTable) RegisterChildDIEOffsets(s *LSym, vars []*dwarf.Var, coffsets []int32) { + // Length of these two slices should agree + if len(vars) != len(coffsets) { + ft.ctxt.Diag("internal error: RegisterChildDIEOffsets vars/offsets length mismatch") + return + } + + // Generate the slice of declOffset's based in vars/coffsets + doffsets := make([]declOffset, len(coffsets)) + for i := range coffsets { + doffsets[i].dclIdx = vars[i].ChildIndex + doffsets[i].offset = coffsets[i] + } + + ft.mu.Lock() + defer ft.mu.Unlock() + + // Store offsets for this symbol. + idx, found := ft.symtab[s] + if !found { + sf := symFixups{inlIndex: -1, defseen: true, doffsets: doffsets} + ft.svec = append(ft.svec, sf) + ft.symtab[s] = len(ft.svec) - 1 + } else { + sf := &ft.svec[idx] + sf.doffsets = doffsets + sf.defseen = true + } +} + +func (ft *DwarfFixupTable) processFixups(slot int, s *LSym) { + sf := &ft.svec[slot] + for _, f := range sf.fixups { + dfound := false + for _, doffset := range sf.doffsets { + if doffset.dclIdx == f.dclidx { + f.refsym.R[f.relidx].Add += int64(doffset.offset) + dfound = true + break + } + } + if !dfound { + ft.ctxt.Diag("internal error: DwarfFixupTable has orphaned fixup on %v targeting %v relidx=%d dclidx=%d", f.refsym, s, f.relidx, f.dclidx) + } + } +} + +// return the LSym corresponding to the 'abstract subprogram' DWARF +// info entry for a function. +func (ft *DwarfFixupTable) AbsFuncDwarfSym(fnsym *LSym) *LSym { + // Protect against concurrent access if multiple backend workers + ft.mu.Lock() + defer ft.mu.Unlock() + + if fnstate, found := ft.precursor[fnsym]; found { + return fnstate.absfn + } + ft.ctxt.Diag("internal error: AbsFuncDwarfSym requested for %v, not seen during inlining", fnsym) + return nil +} + +// Called after all functions have been compiled; the main job of this +// function is to identify cases where there are outstanding fixups. +// This scenario crops up when we have references to variables of an +// inlined routine, but that routine is defined in some other package. +// This helper walks through and locate these fixups, then invokes a +// helper to create an abstract subprogram DIE for each one. +func (ft *DwarfFixupTable) Finalize(myimportpath string, trace bool) { + if trace { + ft.ctxt.Logf("DwarfFixupTable.Finalize invoked for %s\n", myimportpath) + } + + // Collect up the keys from the precursor map, then sort the + // resulting list (don't want to rely on map ordering here). + fns := make([]*LSym, len(ft.precursor)) + idx := 0 + for fn := range ft.precursor { + fns[idx] = fn + idx++ + } + sort.Sort(BySymName(fns)) + + // Should not be called during parallel portion of compilation. + if ft.ctxt.InParallel { + ft.ctxt.Diag("internal error: DwarfFixupTable.Finalize call during parallel backend") + } + + // Generate any missing abstract functions. + for _, s := range fns { + absfn := ft.AbsFuncDwarfSym(s) + slot, found := ft.symtab[absfn] + if !found || !ft.svec[slot].defseen { + ft.ctxt.GenAbstractFunc(s) + } + } + + // Apply fixups. + for _, s := range fns { + absfn := ft.AbsFuncDwarfSym(s) + slot, found := ft.symtab[absfn] + if !found { + ft.ctxt.Diag("internal error: DwarfFixupTable.Finalize orphan abstract function for %v", s) + } else { + ft.processFixups(slot, s) + } + } +} + +type BySymName []*LSym + +func (s BySymName) Len() int { return len(s) } +func (s BySymName) Less(i, j int) bool { return s[i].Name < s[j].Name } +func (s BySymName) Swap(i, j int) { s[i], s[j] = s[j], s[i] } diff --git a/src/cmd/internal/obj/go.go b/src/cmd/internal/obj/go.go new file mode 100644 index 0000000..dbe9b40 --- /dev/null +++ b/src/cmd/internal/obj/go.go @@ -0,0 +1,16 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +// go-specific code shared across loaders (5l, 6l, 8l). + +func Nopout(p *Prog) { + p.As = ANOP + p.Scond = 0 + p.From = Addr{} + p.RestArgs = nil + p.Reg = 0 + p.To = Addr{} +} diff --git a/src/cmd/internal/obj/inl.go b/src/cmd/internal/obj/inl.go new file mode 100644 index 0000000..934f1c2 --- /dev/null +++ b/src/cmd/internal/obj/inl.go @@ -0,0 +1,132 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import "cmd/internal/src" + +// InlTree is a collection of inlined calls. The Parent field of an +// InlinedCall is the index of another InlinedCall in InlTree. +// +// The compiler maintains a global inlining tree and adds a node to it +// every time a function is inlined. For example, suppose f() calls g() +// and g has two calls to h(), and that f, g, and h are inlineable: +// +// 1 func main() { +// 2 f() +// 3 } +// 4 func f() { +// 5 g() +// 6 } +// 7 func g() { +// 8 h() +// 9 h() +// 10 } +// 11 func h() { +// 12 println("H") +// 13 } +// +// Assuming the global tree starts empty, inlining will produce the +// following tree: +// +// []InlinedCall{ +// {Parent: -1, Func: "f", Pos: <line 2>}, +// {Parent: 0, Func: "g", Pos: <line 5>}, +// {Parent: 1, Func: "h", Pos: <line 8>}, +// {Parent: 1, Func: "h", Pos: <line 9>}, +// } +// +// The nodes of h inlined into main will have inlining indexes 2 and 3. +// +// Eventually, the compiler extracts a per-function inlining tree from +// the global inlining tree (see pcln.go). +type InlTree struct { + nodes []InlinedCall +} + +// InlinedCall is a node in an InlTree. +type InlinedCall struct { + Parent int // index of the parent in the InlTree or < 0 if outermost call + Pos src.XPos // position of the inlined call + Func *LSym // function that was inlined + ParentPC int32 // PC of instruction just before inlined body. Only valid in local trees. +} + +// Add adds a new call to the tree, returning its index. +func (tree *InlTree) Add(parent int, pos src.XPos, func_ *LSym) int { + r := len(tree.nodes) + call := InlinedCall{ + Parent: parent, + Pos: pos, + Func: func_, + } + tree.nodes = append(tree.nodes, call) + return r +} + +func (tree *InlTree) Parent(inlIndex int) int { + return tree.nodes[inlIndex].Parent +} + +func (tree *InlTree) InlinedFunction(inlIndex int) *LSym { + return tree.nodes[inlIndex].Func +} + +func (tree *InlTree) CallPos(inlIndex int) src.XPos { + return tree.nodes[inlIndex].Pos +} + +func (tree *InlTree) setParentPC(inlIndex int, pc int32) { + tree.nodes[inlIndex].ParentPC = pc +} + +// OutermostPos returns the outermost position corresponding to xpos, +// which is where xpos was ultimately inlined to. In the example for +// InlTree, main() contains inlined AST nodes from h(), but the +// outermost position for those nodes is line 2. +func (ctxt *Link) OutermostPos(xpos src.XPos) src.Pos { + pos := ctxt.InnermostPos(xpos) + + outerxpos := xpos + for ix := pos.Base().InliningIndex(); ix >= 0; { + call := ctxt.InlTree.nodes[ix] + ix = call.Parent + outerxpos = call.Pos + } + return ctxt.PosTable.Pos(outerxpos) +} + +// InnermostPos returns the innermost position corresponding to xpos, +// that is, the code that is inlined and that inlines nothing else. +// In the example for InlTree above, the code for println within h +// would have an innermost position with line number 12, whether +// h was not inlined, inlined into g, g-then-f, or g-then-f-then-main. +// This corresponds to what someone debugging main, f, g, or h might +// expect to see while single-stepping. +func (ctxt *Link) InnermostPos(xpos src.XPos) src.Pos { + return ctxt.PosTable.Pos(xpos) +} + +// AllPos returns a slice of the positions inlined at xpos, from +// innermost (index zero) to outermost. To avoid allocation +// the input slice is truncated, and used for the result, extended +// as necessary. +func (ctxt *Link) AllPos(xpos src.XPos, result []src.Pos) []src.Pos { + pos := ctxt.InnermostPos(xpos) + result = result[:0] + result = append(result, ctxt.PosTable.Pos(xpos)) + for ix := pos.Base().InliningIndex(); ix >= 0; { + call := ctxt.InlTree.nodes[ix] + ix = call.Parent + result = append(result, ctxt.PosTable.Pos(call.Pos)) + } + return result +} + +func dumpInlTree(ctxt *Link, tree InlTree) { + for i, call := range tree.nodes { + pos := ctxt.PosTable.Pos(call.Pos) + ctxt.Logf("%0d | %0d | %s (%s) pc=%d\n", i, call.Parent, call.Func, pos, call.ParentPC) + } +} diff --git a/src/cmd/internal/obj/ld.go b/src/cmd/internal/obj/ld.go new file mode 100644 index 0000000..5d6c000 --- /dev/null +++ b/src/cmd/internal/obj/ld.go @@ -0,0 +1,85 @@ +// Derived from Inferno utils/6l/obj.c and utils/6l/span.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/obj.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package obj + +/* + * add library to library list. + * srcref: src file referring to package + * objref: object file referring to package + * file: object file, e.g., /home/rsc/go/pkg/container/vector.a + * pkg: package import path, e.g. container/vector + */ + +const ( + LOG = 5 +) + +func mkfwd(sym *LSym) { + var dwn [LOG]int32 + var cnt [LOG]int32 + var lst [LOG]*Prog + + for i := 0; i < LOG; i++ { + if i == 0 { + cnt[i] = 1 + } else { + cnt[i] = LOG * cnt[i-1] + } + dwn[i] = 1 + lst[i] = nil + } + + i := 0 + for p := sym.Func().Text; p != nil && p.Link != nil; p = p.Link { + i-- + if i < 0 { + i = LOG - 1 + } + p.Forwd = nil + dwn[i]-- + if dwn[i] <= 0 { + dwn[i] = cnt[i] + if lst[i] != nil { + lst[i].Forwd = p + } + lst[i] = p + } + } +} + +func Appendp(q *Prog, newprog ProgAlloc) *Prog { + p := newprog() + p.Link = q.Link + q.Link = p + p.Pos = q.Pos + return p +} diff --git a/src/cmd/internal/obj/line.go b/src/cmd/internal/obj/line.go new file mode 100644 index 0000000..20f03d9 --- /dev/null +++ b/src/cmd/internal/obj/line.go @@ -0,0 +1,35 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "cmd/internal/goobj" + "cmd/internal/src" +) + +// AddImport adds a package to the list of imported packages. +func (ctxt *Link) AddImport(pkg string, fingerprint goobj.FingerprintType) { + ctxt.Imports = append(ctxt.Imports, goobj.ImportedPkg{Pkg: pkg, Fingerprint: fingerprint}) +} + +// getFileSymbolAndLine returns the relative file symbol and relative line +// number for a position (i.e., as adjusted by a //line directive). This is the +// file/line visible in the final binary (pcfile, pcln, etc). +func (ctxt *Link) getFileSymbolAndLine(xpos src.XPos) (f string, l int32) { + pos := ctxt.InnermostPos(xpos) + if !pos.IsKnown() { + pos = src.Pos{} + } + return pos.SymFilename(), int32(pos.RelLine()) +} + +// getFileIndexAndLine returns the relative file index (local to the CU), and +// the relative line number for a position (i.e., as adjusted by a //line +// directive). This is the file/line visible in the final binary (pcfile, pcln, +// etc). +func (ctxt *Link) getFileIndexAndLine(xpos src.XPos) (int, int32) { + f, l := ctxt.getFileSymbolAndLine(xpos) + return ctxt.PosTable.FileIndex(f), l +} diff --git a/src/cmd/internal/obj/line_test.go b/src/cmd/internal/obj/line_test.go new file mode 100644 index 0000000..d3bb4e2 --- /dev/null +++ b/src/cmd/internal/obj/line_test.go @@ -0,0 +1,40 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "cmd/internal/src" + "fmt" + "testing" +) + +func TestGetFileSymbolAndLine(t *testing.T) { + ctxt := new(Link) + ctxt.hash = make(map[string]*LSym) + ctxt.statichash = make(map[string]*LSym) + + afile := src.NewFileBase("a.go", "a.go") + bfile := src.NewFileBase("b.go", "/foo/bar/b.go") + lfile := src.NewLinePragmaBase(src.MakePos(afile, 8, 1), "linedir", "linedir", 100, 1) + + var tests = []struct { + pos src.Pos + want string + }{ + {src.NoPos, "??:0"}, + {src.MakePos(afile, 1, 0), "a.go:1"}, + {src.MakePos(afile, 2, 0), "a.go:2"}, + {src.MakePos(bfile, 10, 4), "/foo/bar/b.go:10"}, + {src.MakePos(lfile, 10, 0), "linedir:102"}, // 102 == 100 + (10 - (7+1)) + } + + for _, test := range tests { + f, l := ctxt.getFileSymbolAndLine(ctxt.PosTable.XPos(test.pos)) + got := fmt.Sprintf("%s:%d", f, l) + if got != src.FileSymPrefix+test.want { + t.Errorf("ctxt.getFileSymbolAndLine(%v) = %q, want %q", test.pos, got, test.want) + } + } +} diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go new file mode 100644 index 0000000..8037017 --- /dev/null +++ b/src/cmd/internal/obj/link.go @@ -0,0 +1,1000 @@ +// Derived from Inferno utils/6l/l.h and related files. +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/l.h +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package obj + +import ( + "bufio" + "cmd/internal/dwarf" + "cmd/internal/goobj" + "cmd/internal/objabi" + "cmd/internal/src" + "cmd/internal/sys" + "fmt" + "sync" + "sync/atomic" +) + +// An Addr is an argument to an instruction. +// The general forms and their encodings are: +// +// sym±offset(symkind)(reg)(index*scale) +// Memory reference at address &sym(symkind) + offset + reg + index*scale. +// Any of sym(symkind), ±offset, (reg), (index*scale), and *scale can be omitted. +// If (reg) and *scale are both omitted, the resulting expression (index) is parsed as (reg). +// To force a parsing as index*scale, write (index*1). +// Encoding: +// type = TYPE_MEM +// name = symkind (NAME_AUTO, ...) or 0 (NAME_NONE) +// sym = sym +// offset = ±offset +// reg = reg (REG_*) +// index = index (REG_*) +// scale = scale (1, 2, 4, 8) +// +// $<mem> +// Effective address of memory reference <mem>, defined above. +// Encoding: same as memory reference, but type = TYPE_ADDR. +// +// $<±integer value> +// This is a special case of $<mem>, in which only ±offset is present. +// It has a separate type for easy recognition. +// Encoding: +// type = TYPE_CONST +// offset = ±integer value +// +// *<mem> +// Indirect reference through memory reference <mem>, defined above. +// Only used on x86 for CALL/JMP *sym(SB), which calls/jumps to a function +// pointer stored in the data word sym(SB), not a function named sym(SB). +// Encoding: same as above, but type = TYPE_INDIR. +// +// $*$<mem> +// No longer used. +// On machines with actual SB registers, $*$<mem> forced the +// instruction encoding to use a full 32-bit constant, never a +// reference relative to SB. +// +// $<floating point literal> +// Floating point constant value. +// Encoding: +// type = TYPE_FCONST +// val = floating point value +// +// $<string literal, up to 8 chars> +// String literal value (raw bytes used for DATA instruction). +// Encoding: +// type = TYPE_SCONST +// val = string +// +// <symbolic constant name> +// Special symbolic constants for ARM64, such as conditional flags, tlbi_op and so on. +// Encoding: +// type = TYPE_SPECIAL +// offset = The constant value corresponding to this symbol +// +// <register name> +// Any register: integer, floating point, control, segment, and so on. +// If looking for specific register kind, must check type and reg value range. +// Encoding: +// type = TYPE_REG +// reg = reg (REG_*) +// +// x(PC) +// Encoding: +// type = TYPE_BRANCH +// val = Prog* reference OR ELSE offset = target pc (branch takes priority) +// +// $±x-±y +// Final argument to TEXT, specifying local frame size x and argument size y. +// In this form, x and y are integer literals only, not arbitrary expressions. +// This avoids parsing ambiguities due to the use of - as a separator. +// The ± are optional. +// If the final argument to TEXT omits the -±y, the encoding should still +// use TYPE_TEXTSIZE (not TYPE_CONST), with u.argsize = ArgsSizeUnknown. +// Encoding: +// type = TYPE_TEXTSIZE +// offset = x +// val = int32(y) +// +// reg<<shift, reg>>shift, reg->shift, reg@>shift +// Shifted register value, for ARM and ARM64. +// In this form, reg must be a register and shift can be a register or an integer constant. +// Encoding: +// type = TYPE_SHIFT +// On ARM: +// offset = (reg&15) | shifttype<<5 | count +// shifttype = 0, 1, 2, 3 for <<, >>, ->, @> +// count = (reg&15)<<8 | 1<<4 for a register shift count, (n&31)<<7 for an integer constant. +// On ARM64: +// offset = (reg&31)<<16 | shifttype<<22 | (count&63)<<10 +// shifttype = 0, 1, 2 for <<, >>, -> +// +// (reg, reg) +// A destination register pair. When used as the last argument of an instruction, +// this form makes clear that both registers are destinations. +// Encoding: +// type = TYPE_REGREG +// reg = first register +// offset = second register +// +// [reg, reg, reg-reg] +// Register list for ARM, ARM64, 386/AMD64. +// Encoding: +// type = TYPE_REGLIST +// On ARM: +// offset = bit mask of registers in list; R0 is low bit. +// On ARM64: +// offset = register count (Q:size) | arrangement (opcode) | first register +// On 386/AMD64: +// reg = range low register +// offset = 2 packed registers + kind tag (see x86.EncodeRegisterRange) +// +// reg, reg +// Register pair for ARM. +// TYPE_REGREG2 +// +// (reg+reg) +// Register pair for PPC64. +// Encoding: +// type = TYPE_MEM +// reg = first register +// index = second register +// scale = 1 +// +// reg.[US]XT[BHWX] +// Register extension for ARM64 +// Encoding: +// type = TYPE_REG +// reg = REG_[US]XT[BHWX] + register + shift amount +// offset = ((reg&31) << 16) | (exttype << 13) | (amount<<10) +// +// reg.<T> +// Register arrangement for ARM64 SIMD register +// e.g.: V1.S4, V2.S2, V7.D2, V2.H4, V6.B16 +// Encoding: +// type = TYPE_REG +// reg = REG_ARNG + register + arrangement +// +// reg.<T>[index] +// Register element for ARM64 +// Encoding: +// type = TYPE_REG +// reg = REG_ELEM + register + arrangement +// index = element index + +type Addr struct { + Reg int16 + Index int16 + Scale int16 // Sometimes holds a register. + Type AddrType + Name AddrName + Class int8 + Offset int64 + Sym *LSym + + // argument value: + // for TYPE_SCONST, a string + // for TYPE_FCONST, a float64 + // for TYPE_BRANCH, a *Prog (optional) + // for TYPE_TEXTSIZE, an int32 (optional) + Val interface{} +} + +type AddrName int8 + +const ( + NAME_NONE AddrName = iota + NAME_EXTERN + NAME_STATIC + NAME_AUTO + NAME_PARAM + // A reference to name@GOT(SB) is a reference to the entry in the global offset + // table for 'name'. + NAME_GOTREF + // Indicates that this is a reference to a TOC anchor. + NAME_TOCREF +) + +//go:generate stringer -type AddrType + +type AddrType uint8 + +const ( + TYPE_NONE AddrType = iota + TYPE_BRANCH + TYPE_TEXTSIZE + TYPE_MEM + TYPE_CONST + TYPE_FCONST + TYPE_SCONST + TYPE_REG + TYPE_ADDR + TYPE_SHIFT + TYPE_REGREG + TYPE_REGREG2 + TYPE_INDIR + TYPE_REGLIST + TYPE_SPECIAL +) + +func (a *Addr) Target() *Prog { + if a.Type == TYPE_BRANCH && a.Val != nil { + return a.Val.(*Prog) + } + return nil +} +func (a *Addr) SetTarget(t *Prog) { + if a.Type != TYPE_BRANCH { + panic("setting branch target when type is not TYPE_BRANCH") + } + a.Val = t +} + +func (a *Addr) SetConst(v int64) { + a.Sym = nil + a.Type = TYPE_CONST + a.Offset = v +} + +// Prog describes a single machine instruction. +// +// The general instruction form is: +// +// (1) As.Scond From [, ...RestArgs], To +// (2) As.Scond From, Reg [, ...RestArgs], To, RegTo2 +// +// where As is an opcode and the others are arguments: +// From, Reg are sources, and To, RegTo2 are destinations. +// RestArgs can hold additional sources and destinations. +// Usually, not all arguments are present. +// For example, MOVL R1, R2 encodes using only As=MOVL, From=R1, To=R2. +// The Scond field holds additional condition bits for systems (like arm) +// that have generalized conditional execution. +// (2) form is present for compatibility with older code, +// to avoid too much changes in a single swing. +// (1) scheme is enough to express any kind of operand combination. +// +// Jump instructions use the To.Val field to point to the target *Prog, +// which must be in the same linked list as the jump instruction. +// +// The Progs for a given function are arranged in a list linked through the Link field. +// +// Each Prog is charged to a specific source line in the debug information, +// specified by Pos.Line(). +// Every Prog has a Ctxt field that defines its context. +// For performance reasons, Progs are usually bulk allocated, cached, and reused; +// those bulk allocators should always be used, rather than new(Prog). +// +// The other fields not yet mentioned are for use by the back ends and should +// be left zeroed by creators of Prog lists. +type Prog struct { + Ctxt *Link // linker context + Link *Prog // next Prog in linked list + From Addr // first source operand + RestArgs []AddrPos // can pack any operands that not fit into {Prog.From, Prog.To} + To Addr // destination operand (second is RegTo2 below) + Pool *Prog // constant pool entry, for arm,arm64 back ends + Forwd *Prog // for x86 back end + Rel *Prog // for x86, arm back ends + Pc int64 // for back ends or assembler: virtual or actual program counter, depending on phase + Pos src.XPos // source position of this instruction + Spadj int32 // effect of instruction on stack pointer (increment or decrement amount) + As As // assembler opcode + Reg int16 // 2nd source operand + RegTo2 int16 // 2nd destination operand + Mark uint16 // bitmask of arch-specific items + Optab uint16 // arch-specific opcode index + Scond uint8 // bits that describe instruction suffixes (e.g. ARM conditions) + Back uint8 // for x86 back end: backwards branch state + Ft uint8 // for x86 back end: type index of Prog.From + Tt uint8 // for x86 back end: type index of Prog.To + Isize uint8 // for x86 back end: size of the instruction in bytes +} + +// AddrPos indicates whether the operand is the source or the destination. +type AddrPos struct { + Addr + Pos OperandPos +} + +type OperandPos int8 + +const ( + Source OperandPos = iota + Destination +) + +// From3Type returns p.GetFrom3().Type, or TYPE_NONE when +// p.GetFrom3() returns nil. +// +// Deprecated: for the same reasons as Prog.GetFrom3. +func (p *Prog) From3Type() AddrType { + if p.RestArgs == nil { + return TYPE_NONE + } + return p.RestArgs[0].Type +} + +// GetFrom3 returns second source operand (the first is Prog.From). +// In combination with Prog.From and Prog.To it makes common 3 operand +// case easier to use. +// +// Should be used only when RestArgs is set with SetFrom3. +// +// Deprecated: better use RestArgs directly or define backend-specific getters. +// Introduced to simplify transition to []Addr. +// Usage of this is discouraged due to fragility and lack of guarantees. +func (p *Prog) GetFrom3() *Addr { + if p.RestArgs == nil { + return nil + } + return &p.RestArgs[0].Addr +} + +// SetFrom3 assigns []Args{{a, 0}} to p.RestArgs. +// In pair with Prog.GetFrom3 it can help in emulation of Prog.From3. +// +// Deprecated: for the same reasons as Prog.GetFrom3. +func (p *Prog) SetFrom3(a Addr) { + p.RestArgs = []AddrPos{{a, Source}} +} + +// SetFrom3Reg calls p.SetFrom3 with a register Addr containing reg. +// +// Deprecated: for the same reasons as Prog.GetFrom3. +func (p *Prog) SetFrom3Reg(reg int16) { + p.SetFrom3(Addr{Type: TYPE_REG, Reg: reg}) +} + +// SetFrom3Const calls p.SetFrom3 with a const Addr containing x. +// +// Deprecated: for the same reasons as Prog.GetFrom3. +func (p *Prog) SetFrom3Const(off int64) { + p.SetFrom3(Addr{Type: TYPE_CONST, Offset: off}) +} + +// SetTo2 assigns []Args{{a, 1}} to p.RestArgs when the second destination +// operand does not fit into prog.RegTo2. +func (p *Prog) SetTo2(a Addr) { + p.RestArgs = []AddrPos{{a, Destination}} +} + +// GetTo2 returns the second destination operand. +func (p *Prog) GetTo2() *Addr { + if p.RestArgs == nil { + return nil + } + return &p.RestArgs[0].Addr +} + +// SetRestArgs assigns more than one source operands to p.RestArgs. +func (p *Prog) SetRestArgs(args []Addr) { + for i := range args { + p.RestArgs = append(p.RestArgs, AddrPos{args[i], Source}) + } +} + +// An As denotes an assembler opcode. +// There are some portable opcodes, declared here in package obj, +// that are common to all architectures. +// However, the majority of opcodes are arch-specific +// and are declared in their respective architecture's subpackage. +type As int16 + +// These are the portable opcodes. +const ( + AXXX As = iota + ACALL + ADUFFCOPY + ADUFFZERO + AEND + AFUNCDATA + AJMP + ANOP + APCALIGN + APCDATA + ARET + AGETCALLERPC + ATEXT + AUNDEF + A_ARCHSPECIFIC +) + +// Each architecture is allotted a distinct subspace of opcode values +// for declaring its arch-specific opcodes. +// Within this subspace, the first arch-specific opcode should be +// at offset A_ARCHSPECIFIC. +// +// Subspaces are aligned to a power of two so opcodes can be masked +// with AMask and used as compact array indices. +const ( + ABase386 = (1 + iota) << 11 + ABaseARM + ABaseAMD64 + ABasePPC64 + ABaseARM64 + ABaseMIPS + ABaseLoong64 + ABaseRISCV + ABaseS390X + ABaseWasm + + AllowedOpCodes = 1 << 11 // The number of opcodes available for any given architecture. + AMask = AllowedOpCodes - 1 // AND with this to use the opcode as an array index. +) + +// An LSym is the sort of symbol that is written to an object file. +// It represents Go symbols in a flat pkg+"."+name namespace. +type LSym struct { + Name string + Type objabi.SymKind + Attribute + + Size int64 + Gotype *LSym + P []byte + R []Reloc + + Extra *interface{} // *FuncInfo or *FileInfo, if present + + Pkg string + PkgIdx int32 + SymIdx int32 +} + +// A FuncInfo contains extra fields for STEXT symbols. +type FuncInfo struct { + Args int32 + Locals int32 + Align int32 + FuncID objabi.FuncID + FuncFlag objabi.FuncFlag + StartLine int32 + Text *Prog + Autot map[*LSym]struct{} + Pcln Pcln + InlMarks []InlMark + spills []RegSpill + + dwarfInfoSym *LSym + dwarfLocSym *LSym + dwarfRangesSym *LSym + dwarfAbsFnSym *LSym + dwarfDebugLinesSym *LSym + + GCArgs *LSym + GCLocals *LSym + StackObjects *LSym + OpenCodedDeferInfo *LSym + ArgInfo *LSym // argument info for traceback + ArgLiveInfo *LSym // argument liveness info for traceback + WrapInfo *LSym // for wrapper, info of wrapped function + JumpTables []JumpTable + + FuncInfoSym *LSym +} + +// JumpTable represents a table used for implementing multi-way +// computed branching, used typically for implementing switches. +// Sym is the table itself, and Targets is a list of target +// instructions to go to for the computed branch index. +type JumpTable struct { + Sym *LSym + Targets []*Prog +} + +// NewFuncInfo allocates and returns a FuncInfo for LSym. +func (s *LSym) NewFuncInfo() *FuncInfo { + if s.Extra != nil { + panic(fmt.Sprintf("invalid use of LSym - NewFuncInfo with Extra of type %T", *s.Extra)) + } + f := new(FuncInfo) + s.Extra = new(interface{}) + *s.Extra = f + return f +} + +// Func returns the *FuncInfo associated with s, or else nil. +func (s *LSym) Func() *FuncInfo { + if s.Extra == nil { + return nil + } + f, _ := (*s.Extra).(*FuncInfo) + return f +} + +// A FileInfo contains extra fields for SDATA symbols backed by files. +// (If LSym.Extra is a *FileInfo, LSym.P == nil.) +type FileInfo struct { + Name string // name of file to read into object file + Size int64 // length of file +} + +// NewFileInfo allocates and returns a FileInfo for LSym. +func (s *LSym) NewFileInfo() *FileInfo { + if s.Extra != nil { + panic(fmt.Sprintf("invalid use of LSym - NewFileInfo with Extra of type %T", *s.Extra)) + } + f := new(FileInfo) + s.Extra = new(interface{}) + *s.Extra = f + return f +} + +// File returns the *FileInfo associated with s, or else nil. +func (s *LSym) File() *FileInfo { + if s.Extra == nil { + return nil + } + f, _ := (*s.Extra).(*FileInfo) + return f +} + +type InlMark struct { + // When unwinding from an instruction in an inlined body, mark + // where we should unwind to. + // id records the global inlining id of the inlined body. + // p records the location of an instruction in the parent (inliner) frame. + p *Prog + id int32 +} + +// Mark p as the instruction to set as the pc when +// "unwinding" the inlining global frame id. Usually it should be +// instruction with a file:line at the callsite, and occur +// just before the body of the inlined function. +func (fi *FuncInfo) AddInlMark(p *Prog, id int32) { + fi.InlMarks = append(fi.InlMarks, InlMark{p: p, id: id}) +} + +// AddSpill appends a spill record to the list for FuncInfo fi +func (fi *FuncInfo) AddSpill(s RegSpill) { + fi.spills = append(fi.spills, s) +} + +// Record the type symbol for an auto variable so that the linker +// an emit DWARF type information for the type. +func (fi *FuncInfo) RecordAutoType(gotype *LSym) { + if fi.Autot == nil { + fi.Autot = make(map[*LSym]struct{}) + } + fi.Autot[gotype] = struct{}{} +} + +//go:generate stringer -type ABI + +// ABI is the calling convention of a text symbol. +type ABI uint8 + +const ( + // ABI0 is the stable stack-based ABI. It's important that the + // value of this is "0": we can't distinguish between + // references to data and ABI0 text symbols in assembly code, + // and hence this doesn't distinguish between symbols without + // an ABI and text symbols with ABI0. + ABI0 ABI = iota + + // ABIInternal is the internal ABI that may change between Go + // versions. All Go functions use the internal ABI and the + // compiler generates wrappers for calls to and from other + // ABIs. + ABIInternal + + ABICount +) + +// ParseABI converts from a string representation in 'abistr' to the +// corresponding ABI value. Second return value is TRUE if the +// abi string is recognized, FALSE otherwise. +func ParseABI(abistr string) (ABI, bool) { + switch abistr { + default: + return ABI0, false + case "ABI0": + return ABI0, true + case "ABIInternal": + return ABIInternal, true + } +} + +// ABISet is a bit set of ABI values. +type ABISet uint8 + +const ( + // ABISetCallable is the set of all ABIs any function could + // potentially be called using. + ABISetCallable ABISet = (1 << ABI0) | (1 << ABIInternal) +) + +// Ensure ABISet is big enough to hold all ABIs. +var _ ABISet = 1 << (ABICount - 1) + +func ABISetOf(abi ABI) ABISet { + return 1 << abi +} + +func (a *ABISet) Set(abi ABI, value bool) { + if value { + *a |= 1 << abi + } else { + *a &^= 1 << abi + } +} + +func (a *ABISet) Get(abi ABI) bool { + return (*a>>abi)&1 != 0 +} + +func (a ABISet) String() string { + s := "{" + for i := ABI(0); a != 0; i++ { + if a&(1<<i) != 0 { + if s != "{" { + s += "," + } + s += i.String() + a &^= 1 << i + } + } + return s + "}" +} + +// Attribute is a set of symbol attributes. +type Attribute uint32 + +const ( + AttrDuplicateOK Attribute = 1 << iota + AttrCFunc + AttrNoSplit + AttrLeaf + AttrWrapper + AttrNeedCtxt + AttrNoFrame + AttrOnList + AttrStatic + + // MakeTypelink means that the type should have an entry in the typelink table. + AttrMakeTypelink + + // ReflectMethod means the function may call reflect.Type.Method or + // reflect.Type.MethodByName. Matching is imprecise (as reflect.Type + // can be used through a custom interface), so ReflectMethod may be + // set in some cases when the reflect package is not called. + // + // Used by the linker to determine what methods can be pruned. + AttrReflectMethod + + // Local means make the symbol local even when compiling Go code to reference Go + // symbols in other shared libraries, as in this mode symbols are global by + // default. "local" here means in the sense of the dynamic linker, i.e. not + // visible outside of the module (shared library or executable) that contains its + // definition. (When not compiling to support Go shared libraries, all symbols are + // local in this sense unless there is a cgo_export_* directive). + AttrLocal + + // For function symbols; indicates that the specified function was the + // target of an inline during compilation + AttrWasInlined + + // Indexed indicates this symbol has been assigned with an index (when using the + // new object file format). + AttrIndexed + + // Only applied on type descriptor symbols, UsedInIface indicates this type is + // converted to an interface. + // + // Used by the linker to determine what methods can be pruned. + AttrUsedInIface + + // ContentAddressable indicates this is a content-addressable symbol. + AttrContentAddressable + + // ABI wrapper is set for compiler-generated text symbols that + // convert between ABI0 and ABIInternal calling conventions. + AttrABIWrapper + + // IsPcdata indicates this is a pcdata symbol. + AttrPcdata + + // attrABIBase is the value at which the ABI is encoded in + // Attribute. This must be last; all bits after this are + // assumed to be an ABI value. + // + // MUST BE LAST since all bits above this comprise the ABI. + attrABIBase +) + +func (a *Attribute) load() Attribute { return Attribute(atomic.LoadUint32((*uint32)(a))) } + +func (a *Attribute) DuplicateOK() bool { return a.load()&AttrDuplicateOK != 0 } +func (a *Attribute) MakeTypelink() bool { return a.load()&AttrMakeTypelink != 0 } +func (a *Attribute) CFunc() bool { return a.load()&AttrCFunc != 0 } +func (a *Attribute) NoSplit() bool { return a.load()&AttrNoSplit != 0 } +func (a *Attribute) Leaf() bool { return a.load()&AttrLeaf != 0 } +func (a *Attribute) OnList() bool { return a.load()&AttrOnList != 0 } +func (a *Attribute) ReflectMethod() bool { return a.load()&AttrReflectMethod != 0 } +func (a *Attribute) Local() bool { return a.load()&AttrLocal != 0 } +func (a *Attribute) Wrapper() bool { return a.load()&AttrWrapper != 0 } +func (a *Attribute) NeedCtxt() bool { return a.load()&AttrNeedCtxt != 0 } +func (a *Attribute) NoFrame() bool { return a.load()&AttrNoFrame != 0 } +func (a *Attribute) Static() bool { return a.load()&AttrStatic != 0 } +func (a *Attribute) WasInlined() bool { return a.load()&AttrWasInlined != 0 } +func (a *Attribute) Indexed() bool { return a.load()&AttrIndexed != 0 } +func (a *Attribute) UsedInIface() bool { return a.load()&AttrUsedInIface != 0 } +func (a *Attribute) ContentAddressable() bool { return a.load()&AttrContentAddressable != 0 } +func (a *Attribute) ABIWrapper() bool { return a.load()&AttrABIWrapper != 0 } +func (a *Attribute) IsPcdata() bool { return a.load()&AttrPcdata != 0 } + +func (a *Attribute) Set(flag Attribute, value bool) { + for { + v0 := a.load() + v := v0 + if value { + v |= flag + } else { + v &^= flag + } + if atomic.CompareAndSwapUint32((*uint32)(a), uint32(v0), uint32(v)) { + break + } + } +} + +func (a *Attribute) ABI() ABI { return ABI(a.load() / attrABIBase) } +func (a *Attribute) SetABI(abi ABI) { + const mask = 1 // Only one ABI bit for now. + for { + v0 := a.load() + v := (v0 &^ (mask * attrABIBase)) | Attribute(abi)*attrABIBase + if atomic.CompareAndSwapUint32((*uint32)(a), uint32(v0), uint32(v)) { + break + } + } +} + +var textAttrStrings = [...]struct { + bit Attribute + s string +}{ + {bit: AttrDuplicateOK, s: "DUPOK"}, + {bit: AttrMakeTypelink, s: ""}, + {bit: AttrCFunc, s: "CFUNC"}, + {bit: AttrNoSplit, s: "NOSPLIT"}, + {bit: AttrLeaf, s: "LEAF"}, + {bit: AttrOnList, s: ""}, + {bit: AttrReflectMethod, s: "REFLECTMETHOD"}, + {bit: AttrLocal, s: "LOCAL"}, + {bit: AttrWrapper, s: "WRAPPER"}, + {bit: AttrNeedCtxt, s: "NEEDCTXT"}, + {bit: AttrNoFrame, s: "NOFRAME"}, + {bit: AttrStatic, s: "STATIC"}, + {bit: AttrWasInlined, s: ""}, + {bit: AttrIndexed, s: ""}, + {bit: AttrContentAddressable, s: ""}, + {bit: AttrABIWrapper, s: "ABIWRAPPER"}, +} + +// String formats a for printing in as part of a TEXT prog. +func (a Attribute) String() string { + var s string + for _, x := range textAttrStrings { + if a&x.bit != 0 { + if x.s != "" { + s += x.s + "|" + } + a &^= x.bit + } + } + switch a.ABI() { + case ABI0: + case ABIInternal: + s += "ABIInternal|" + a.SetABI(0) // Clear ABI so we don't print below. + } + if a != 0 { + s += fmt.Sprintf("UnknownAttribute(%d)|", a) + } + // Chop off trailing |, if present. + if len(s) > 0 { + s = s[:len(s)-1] + } + return s +} + +// TextAttrString formats the symbol attributes for printing in as part of a TEXT prog. +func (s *LSym) TextAttrString() string { + attr := s.Attribute.String() + if s.Func().FuncFlag&objabi.FuncFlag_TOPFRAME != 0 { + if attr != "" { + attr += "|" + } + attr += "TOPFRAME" + } + return attr +} + +func (s *LSym) String() string { + return s.Name +} + +// The compiler needs *LSym to be assignable to cmd/compile/internal/ssa.Sym. +func (*LSym) CanBeAnSSASym() {} +func (*LSym) CanBeAnSSAAux() {} + +type Pcln struct { + // Aux symbols for pcln + Pcsp *LSym + Pcfile *LSym + Pcline *LSym + Pcinline *LSym + Pcdata []*LSym + Funcdata []*LSym + UsedFiles map[goobj.CUFileIndex]struct{} // file indices used while generating pcfile + InlTree InlTree // per-function inlining tree extracted from the global tree +} + +type Reloc struct { + Off int32 + Siz uint8 + Type objabi.RelocType + Add int64 + Sym *LSym +} + +type Auto struct { + Asym *LSym + Aoffset int32 + Name AddrName + Gotype *LSym +} + +// RegSpill provides spill/fill information for a register-resident argument +// to a function. These need spilling/filling in the safepoint/stackgrowth case. +// At the time of fill/spill, the offset must be adjusted by the architecture-dependent +// adjustment to hardware SP that occurs in a call instruction. E.g., for AMD64, +// at Offset+8 because the return address was pushed. +type RegSpill struct { + Addr Addr + Reg int16 + Spill, Unspill As +} + +// Link holds the context for writing object code from a compiler +// to be linker input or for reading that input into the linker. +type Link struct { + Headtype objabi.HeadType + Arch *LinkArch + Debugasm int + Debugvlog bool + Debugpcln string + Flag_shared bool + Flag_dynlink bool + Flag_linkshared bool + Flag_optimize bool + Flag_locationlists bool + Flag_noRefName bool // do not include referenced symbol names in object file + Retpoline bool // emit use of retpoline stubs for indirect jmp/call + Flag_maymorestack string // If not "", call this function before stack checks + Bso *bufio.Writer + Pathname string + Pkgpath string // the current package's import path + hashmu sync.Mutex // protects hash, funchash + hash map[string]*LSym // name -> sym mapping + funchash map[string]*LSym // name -> sym mapping for ABIInternal syms + statichash map[string]*LSym // name -> sym mapping for static syms + PosTable src.PosTable + InlTree InlTree // global inlining tree used by gc/inl.go + DwFixups *DwarfFixupTable + Imports []goobj.ImportedPkg + DiagFunc func(string, ...interface{}) + DiagFlush func() + DebugInfo func(fn *LSym, info *LSym, curfn interface{}) ([]dwarf.Scope, dwarf.InlCalls) // if non-nil, curfn is a *gc.Node + GenAbstractFunc func(fn *LSym) + Errors int + + InParallel bool // parallel backend phase in effect + UseBASEntries bool // use Base Address Selection Entries in location lists and PC ranges + IsAsm bool // is the source assembly language, which may contain surprising idioms (e.g., call tables) + + // state for writing objects + Text []*LSym + Data []*LSym + + // Constant symbols (e.g. $i64.*) are data symbols created late + // in the concurrent phase. To ensure a deterministic order, we + // add them to a separate list, sort at the end, and append it + // to Data. + constSyms []*LSym + + // pkgIdx maps package path to index. The index is used for + // symbol reference in the object file. + pkgIdx map[string]int32 + + defs []*LSym // list of defined symbols in the current package + hashed64defs []*LSym // list of defined short (64-bit or less) hashed (content-addressable) symbols + hasheddefs []*LSym // list of defined hashed (content-addressable) symbols + nonpkgdefs []*LSym // list of defined non-package symbols + nonpkgrefs []*LSym // list of referenced non-package symbols + + Fingerprint goobj.FingerprintType // fingerprint of symbol indices, to catch index mismatch +} + +func (ctxt *Link) Diag(format string, args ...interface{}) { + ctxt.Errors++ + ctxt.DiagFunc(format, args...) +} + +func (ctxt *Link) Logf(format string, args ...interface{}) { + fmt.Fprintf(ctxt.Bso, format, args...) + ctxt.Bso.Flush() +} + +// SpillRegisterArgs emits the code to spill register args into whatever +// locations the spill records specify. +func (fi *FuncInfo) SpillRegisterArgs(last *Prog, pa ProgAlloc) *Prog { + // Spill register args. + for _, ra := range fi.spills { + spill := Appendp(last, pa) + spill.As = ra.Spill + spill.From.Type = TYPE_REG + spill.From.Reg = ra.Reg + spill.To = ra.Addr + last = spill + } + return last +} + +// UnspillRegisterArgs emits the code to restore register args from whatever +// locations the spill records specify. +func (fi *FuncInfo) UnspillRegisterArgs(last *Prog, pa ProgAlloc) *Prog { + // Unspill any spilled register args + for _, ra := range fi.spills { + unspill := Appendp(last, pa) + unspill.As = ra.Unspill + unspill.From = ra.Addr + unspill.To.Type = TYPE_REG + unspill.To.Reg = ra.Reg + last = unspill + } + return last +} + +// LinkArch is the definition of a single architecture. +type LinkArch struct { + *sys.Arch + Init func(*Link) + ErrorCheck func(*Link, *LSym) + Preprocess func(*Link, *LSym, ProgAlloc) + Assemble func(*Link, *LSym, ProgAlloc) + Progedit func(*Link, *Prog, ProgAlloc) + UnaryDst map[As]bool // Instruction takes one operand, a destination. + DWARFRegisters map[int16]int16 +} diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go new file mode 100644 index 0000000..88bf714 --- /dev/null +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -0,0 +1,419 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64 + +import ( + "cmd/internal/obj" +) + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p loong64 + +const ( + NSNAME = 8 + NSYM = 50 + NREG = 32 // number of general registers + NFREG = 32 // number of floating point registers +) + +const ( + REG_R0 = obj.RBaseLOONG64 + iota // must be a multiple of 32 + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + REG_R16 + REG_R17 + REG_R18 + REG_R19 + REG_R20 + REG_R21 + REG_R22 + REG_R23 + REG_R24 + REG_R25 + REG_R26 + REG_R27 + REG_R28 + REG_R29 + REG_R30 + REG_R31 + + REG_F0 // must be a multiple of 32 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + REG_F16 + REG_F17 + REG_F18 + REG_F19 + REG_F20 + REG_F21 + REG_F22 + REG_F23 + REG_F24 + REG_F25 + REG_F26 + REG_F27 + REG_F28 + REG_F29 + REG_F30 + REG_F31 + + REG_FCSR0 // must be a multiple of 32 + REG_FCSR1 + REG_FCSR2 + REG_FCSR3 // only four registers are needed + REG_FCSR4 + REG_FCSR5 + REG_FCSR6 + REG_FCSR7 + REG_FCSR8 + REG_FCSR9 + REG_FCSR10 + REG_FCSR11 + REG_FCSR12 + REG_FCSR13 + REG_FCSR14 + REG_FCSR15 + REG_FCSR16 + REG_FCSR17 + REG_FCSR18 + REG_FCSR19 + REG_FCSR20 + REG_FCSR21 + REG_FCSR22 + REG_FCSR23 + REG_FCSR24 + REG_FCSR25 + REG_FCSR26 + REG_FCSR27 + REG_FCSR28 + REG_FCSR29 + REG_FCSR30 + REG_FCSR31 + + REG_FCC0 // must be a multiple of 32 + REG_FCC1 + REG_FCC2 + REG_FCC3 + REG_FCC4 + REG_FCC5 + REG_FCC6 + REG_FCC7 // only eight registers are needed + REG_FCC8 + REG_FCC9 + REG_FCC10 + REG_FCC11 + REG_FCC12 + REG_FCC13 + REG_FCC14 + REG_FCC15 + REG_FCC16 + REG_FCC17 + REG_FCC18 + REG_FCC19 + REG_FCC20 + REG_FCC21 + REG_FCC22 + REG_FCC23 + REG_FCC24 + REG_FCC25 + REG_FCC26 + REG_FCC27 + REG_FCC28 + REG_FCC29 + REG_FCC30 + REG_FCC31 + + REG_LAST = REG_FCC31 // the last defined register + + REG_SPECIAL = REG_FCSR0 + + REGZERO = REG_R0 // set to zero + REGLINK = REG_R1 + REGSP = REG_R3 + REGRET = REG_R19 + REGARG = -1 // -1 disables passing the first argument in register + REGRT1 = REG_R19 // reserved for runtime, duffzero and duffcopy + REGRT2 = REG_R20 // reserved for runtime, duffcopy + REGCTXT = REG_R29 // context for closures + REGG = REG_R22 // G in loong64 + REGTMP = REG_R30 // used by the assembler + FREGRET = REG_F0 +) + +var LOONG64DWARFRegisters = map[int16]int16{} + +func init() { + // f assigns dwarfregisters[from:to] = (base):(to-from+base) + f := func(from, to, base int16) { + for r := int16(from); r <= to; r++ { + LOONG64DWARFRegisters[r] = (r - from) + base + } + } + f(REG_R0, REG_R31, 0) + f(REG_F0, REG_F31, 32) + +} + +const ( + BIG = 2046 +) + +const ( + // mark flags + LABEL = 1 << 0 + LEAF = 1 << 1 + SYNC = 1 << 2 + BRANCH = 1 << 3 +) + +const ( + C_NONE = iota + C_REG + C_FREG + C_FCSRREG + C_FCCREG + C_ZCON + C_SCON // 12 bit signed + C_UCON // 32 bit signed, low 12 bits 0 + C_ADD0CON + C_AND0CON + C_ADDCON // -0x800 <= v < 0 + C_ANDCON // 0 < v <= 0xFFF + C_LCON // other 32 + C_DCON // other 64 (could subdivide further) + C_SACON // $n(REG) where n <= int12 + C_SECON + C_LACON // $n(REG) where int12 < n <= int32 + C_LECON + C_DACON // $n(REG) where int32 < n + C_STCON // $tlsvar + C_SBRA + C_LBRA + C_SAUTO + C_LAUTO + C_SEXT + C_LEXT + C_ZOREG + C_SOREG + C_LOREG + C_GOK + C_ADDR + C_TLS + C_TEXTSIZE + + C_NCLASS // must be the last +) + +const ( + AABSD = obj.ABaseLoong64 + obj.A_ARCHSPECIFIC + iota + AABSF + AADD + AADDD + AADDF + AADDU + + AADDW + AAND + ABEQ + ABGEZ + ABLEZ + ABGTZ + ABLTZ + ABFPF + ABFPT + + ABNE + ABREAK + ACLO + ACLZ + + ACMPEQD + ACMPEQF + + ACMPGED // ACMPGED -> fcmp.sle.d + ACMPGEF // ACMPGEF -> fcmp.sle.s + ACMPGTD // ACMPGTD -> fcmp.slt.d + ACMPGTF // ACMPGTF -> fcmp.slt.s + + ALU12IW + ALU32ID + ALU52ID + APCADDU12I + AJIRL + ABGE + ABLT + ABLTU + ABGEU + + ADIV + ADIVD + ADIVF + ADIVU + ADIVW + + ALL + ALLV + + ALUI + + AMOVB + AMOVBU + + AMOVD + AMOVDF + AMOVDW + AMOVF + AMOVFD + AMOVFW + + AMOVH + AMOVHU + AMOVW + + AMOVWD + AMOVWF + + AMOVWL + AMOVWR + + AMUL + AMULD + AMULF + AMULU + AMULH + AMULHU + AMULW + ANEGD + ANEGF + + ANEGW + ANEGV + + ANOOP // hardware nop + ANOR + AOR + AREM + AREMU + + ARFE + + ASC + ASCV + + ASGT + ASGTU + + ASLL + ASQRTD + ASQRTF + ASRA + ASRL + AROTR + ASUB + ASUBD + ASUBF + + ASUBU + ASUBW + ADBAR + ASYSCALL + + ATEQ + ATNE + + AWORD + + AXOR + + AMASKEQZ + AMASKNEZ + + // 64-bit + AMOVV + AMOVVL + AMOVVR + + ASLLV + ASRAV + ASRLV + AROTRV + ADIVV + ADIVVU + + AREMV + AREMVU + + AMULV + AMULVU + AMULHV + AMULHVU + AADDV + AADDVU + ASUBV + ASUBVU + + // 64-bit FP + ATRUNCFV + ATRUNCDV + ATRUNCFW + ATRUNCDW + + AMOVWU + AMOVFV + AMOVDV + AMOVVF + AMOVVD + + ALAST + + // aliases + AJMP = obj.AJMP + AJAL = obj.ACALL + ARET = obj.ARET +) + +func init() { + // The asm encoder generally assumes that the lowest 5 bits of the + // REG_XX constants match the machine instruction encoding, i.e. + // the lowest 5 bits is the register number. + // Check this here. + if REG_R0%32 != 0 { + panic("REG_R0 is not a multiple of 32") + } + if REG_F0%32 != 0 { + panic("REG_F0 is not a multiple of 32") + } + if REG_FCSR0%32 != 0 { + panic("REG_FCSR0 is not a multiple of 32") + } + if REG_FCC0%32 != 0 { + panic("REG_FCC0 is not a multiple of 32") + } +} diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go new file mode 100644 index 0000000..20e7465 --- /dev/null +++ b/src/cmd/internal/obj/loong64/anames.go @@ -0,0 +1,134 @@ +// Code generated by stringer -i a.out.go -o anames.go -p loong64; DO NOT EDIT. + +package loong64 + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "ABSD", + "ABSF", + "ADD", + "ADDD", + "ADDF", + "ADDU", + "ADDW", + "AND", + "BEQ", + "BGEZ", + "BLEZ", + "BGTZ", + "BLTZ", + "BFPF", + "BFPT", + "BNE", + "BREAK", + "CLO", + "CLZ", + "CMPEQD", + "CMPEQF", + "CMPGED", + "CMPGEF", + "CMPGTD", + "CMPGTF", + "LU12IW", + "LU32ID", + "LU52ID", + "PCADDU12I", + "JIRL", + "BGE", + "BLT", + "BLTU", + "BGEU", + "DIV", + "DIVD", + "DIVF", + "DIVU", + "DIVW", + "LL", + "LLV", + "LUI", + "MOVB", + "MOVBU", + "MOVD", + "MOVDF", + "MOVDW", + "MOVF", + "MOVFD", + "MOVFW", + "MOVH", + "MOVHU", + "MOVW", + "MOVWD", + "MOVWF", + "MOVWL", + "MOVWR", + "MUL", + "MULD", + "MULF", + "MULU", + "MULH", + "MULHU", + "MULW", + "NEGD", + "NEGF", + "NEGW", + "NEGV", + "NOOP", + "NOR", + "OR", + "REM", + "REMU", + "RFE", + "SC", + "SCV", + "SGT", + "SGTU", + "SLL", + "SQRTD", + "SQRTF", + "SRA", + "SRL", + "ROTR", + "SUB", + "SUBD", + "SUBF", + "SUBU", + "SUBW", + "DBAR", + "SYSCALL", + "TEQ", + "TNE", + "WORD", + "XOR", + "MASKEQZ", + "MASKNEZ", + "MOVV", + "MOVVL", + "MOVVR", + "SLLV", + "SRAV", + "SRLV", + "ROTRV", + "DIVV", + "DIVVU", + "REMV", + "REMVU", + "MULV", + "MULVU", + "MULHV", + "MULHVU", + "ADDV", + "ADDVU", + "SUBV", + "SUBVU", + "TRUNCFV", + "TRUNCDV", + "TRUNCFW", + "TRUNCDW", + "MOVWU", + "MOVFV", + "MOVDV", + "MOVVF", + "MOVVD", + "LAST", +} diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go new file mode 100644 index 0000000..df3e9bf --- /dev/null +++ b/src/cmd/internal/obj/loong64/asm.go @@ -0,0 +1,1980 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "fmt" + "log" + "sort" +) + +// ctxt0 holds state while assembling a single function. +// Each function gets a fresh ctxt0. +// This allows for multiple functions to be safely concurrently assembled. +type ctxt0 struct { + ctxt *obj.Link + newprog obj.ProgAlloc + cursym *obj.LSym + autosize int32 + instoffset int64 + pc int64 +} + +// Instruction layout. + +const ( + FuncAlign = 4 +) + +type Optab struct { + as obj.As + a1 uint8 + a2 uint8 + a3 uint8 + type_ int8 + size int8 + param int16 + family sys.ArchFamily + flag uint8 +} + +const ( + NOTUSETMP = 1 << iota // p expands to multiple instructions, but does NOT use REGTMP +) + +var optab = []Optab{ + {obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0}, + + {AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, + {AMOVV, C_REG, C_NONE, C_REG, 1, 4, 0, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_REG, 12, 8, 0, 0, NOTUSETMP}, + {AMOVBU, C_REG, C_NONE, C_REG, 13, 4, 0, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_REG, 14, 8, 0, sys.Loong64, NOTUSETMP}, + + {ASUB, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + {ASUBV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0}, + {AADD, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + {AADDV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0}, + {AAND, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + {ASUB, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {ASUBV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0}, + {AADD, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {AADDV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0}, + {AAND, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {ANEGW, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {ANEGV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0}, + {AMASKEQZ, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + + {ASLL, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0}, + {ASLL, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0}, + {ASLLV, C_REG, C_NONE, C_REG, 9, 4, 0, sys.Loong64, 0}, + {ASLLV, C_REG, C_REG, C_REG, 9, 4, 0, sys.Loong64, 0}, + {ACLO, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0}, + + {AADDF, C_FREG, C_NONE, C_FREG, 32, 4, 0, 0, 0}, + {AADDF, C_FREG, C_REG, C_FREG, 32, 4, 0, 0, 0}, + {ACMPEQF, C_FREG, C_REG, C_NONE, 32, 4, 0, 0, 0}, + {AABSF, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, + {AMOVVF, C_FREG, C_NONE, C_FREG, 33, 4, 0, sys.Loong64, 0}, + {AMOVF, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, + + {AMOVW, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, + {AMOVWU, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, + {AMOVV, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, + {AMOVBU, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, + {AMOVWL, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, + {AMOVVL, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, + {AMOVW, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0}, + {AMOVV, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, + {AMOVWL, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, + {AMOVVL, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0}, + {AMOVW, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, + {AMOVV, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {AMOVWL, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {AMOVVL, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, + {ASC, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {ASCV, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, + + {AMOVW, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, + {AMOVWU, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, + {AMOVV, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, + {AMOVB, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, + {AMOVBU, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, + {AMOVWL, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, + {AMOVVL, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, + {AMOVW, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, + {AMOVWU, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0}, + {AMOVV, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0}, + {AMOVB, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, + {AMOVBU, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, + {AMOVWL, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, + {AMOVVL, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0}, + {AMOVW, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {AMOVWU, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0}, + {AMOVV, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0}, + {AMOVB, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {AMOVBU, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {AMOVWL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {AMOVVL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0}, + {ALL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {ALLV, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0}, + + {AMOVW, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, + {AMOVWU, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, + {AMOVV, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, + {AMOVBU, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, + {AMOVW, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, sys.Loong64, 0}, + {AMOVV, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, + {AMOVW, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, sys.Loong64, 0}, + {AMOVV, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, + {ASC, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, + {AMOVW, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, + {AMOVW, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, + {AMOVWU, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, + {AMOVV, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, + {AMOVB, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, + {AMOVBU, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, + {AMOVW, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_TLS, 53, 16, 0, sys.Loong64, 0}, + {AMOVV, C_REG, C_NONE, C_TLS, 53, 16, 0, sys.Loong64, 0}, + {AMOVB, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0}, + + {AMOVW, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, + {AMOVWU, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, + {AMOVV, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, + {AMOVB, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, + {AMOVBU, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, + {AMOVW, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, + {AMOVWU, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, sys.Loong64, 0}, + {AMOVV, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, sys.Loong64, 0}, + {AMOVB, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, + {AMOVBU, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, + {AMOVW, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, + {AMOVWU, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, sys.Loong64, 0}, + {AMOVV, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, sys.Loong64, 0}, + {AMOVB, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, + {AMOVBU, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, + {AMOVW, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0}, + {AMOVW, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, + {AMOVWU, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, + {AMOVV, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, + {AMOVB, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0}, + {AMOVB, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, + {AMOVBU, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0}, + {AMOVBU, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, + {AMOVW, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0}, + {AMOVWU, C_TLS, C_NONE, C_REG, 54, 16, 0, sys.Loong64, 0}, + {AMOVV, C_TLS, C_NONE, C_REG, 54, 16, 0, sys.Loong64, 0}, + {AMOVB, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0}, + {AMOVBU, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0}, + + {AMOVW, C_SECON, C_NONE, C_REG, 3, 4, 0, sys.Loong64, 0}, + {AMOVV, C_SECON, C_NONE, C_REG, 3, 4, 0, sys.Loong64, 0}, + {AMOVW, C_SACON, C_NONE, C_REG, 3, 4, REGSP, 0, 0}, + {AMOVV, C_SACON, C_NONE, C_REG, 3, 4, REGSP, sys.Loong64, 0}, + {AMOVW, C_LECON, C_NONE, C_REG, 52, 8, 0, 0, NOTUSETMP}, + {AMOVW, C_LECON, C_NONE, C_REG, 52, 8, 0, sys.Loong64, NOTUSETMP}, + {AMOVV, C_LECON, C_NONE, C_REG, 52, 8, 0, sys.Loong64, NOTUSETMP}, + + {AMOVW, C_LACON, C_NONE, C_REG, 26, 12, REGSP, 0, 0}, + {AMOVV, C_LACON, C_NONE, C_REG, 26, 12, REGSP, sys.Loong64, 0}, + {AMOVW, C_ADDCON, C_NONE, C_REG, 3, 4, REGZERO, 0, 0}, + {AMOVV, C_ADDCON, C_NONE, C_REG, 3, 4, REGZERO, sys.Loong64, 0}, + {AMOVW, C_ANDCON, C_NONE, C_REG, 3, 4, REGZERO, 0, 0}, + {AMOVV, C_ANDCON, C_NONE, C_REG, 3, 4, REGZERO, sys.Loong64, 0}, + {AMOVW, C_STCON, C_NONE, C_REG, 55, 12, 0, 0, 0}, + {AMOVV, C_STCON, C_NONE, C_REG, 55, 12, 0, sys.Loong64, 0}, + + {AMOVW, C_UCON, C_NONE, C_REG, 24, 4, 0, 0, 0}, + {AMOVV, C_UCON, C_NONE, C_REG, 24, 4, 0, sys.Loong64, 0}, + {AMOVW, C_LCON, C_NONE, C_REG, 19, 8, 0, 0, NOTUSETMP}, + {AMOVV, C_LCON, C_NONE, C_REG, 19, 8, 0, sys.Loong64, NOTUSETMP}, + {AMOVV, C_DCON, C_NONE, C_REG, 59, 16, 0, sys.Loong64, NOTUSETMP}, + + {AMUL, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {AMUL, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + {AMULV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0}, + {AMULV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0}, + + {AADD, C_ADD0CON, C_REG, C_REG, 4, 4, 0, 0, 0}, + {AADD, C_ADD0CON, C_NONE, C_REG, 4, 4, 0, 0, 0}, + {AADD, C_ANDCON, C_REG, C_REG, 10, 8, 0, 0, 0}, + {AADD, C_ANDCON, C_NONE, C_REG, 10, 8, 0, 0, 0}, + + {AADDV, C_ADD0CON, C_REG, C_REG, 4, 4, 0, sys.Loong64, 0}, + {AADDV, C_ADD0CON, C_NONE, C_REG, 4, 4, 0, sys.Loong64, 0}, + {AADDV, C_ANDCON, C_REG, C_REG, 10, 8, 0, sys.Loong64, 0}, + {AADDV, C_ANDCON, C_NONE, C_REG, 10, 8, 0, sys.Loong64, 0}, + + {AAND, C_AND0CON, C_REG, C_REG, 4, 4, 0, 0, 0}, + {AAND, C_AND0CON, C_NONE, C_REG, 4, 4, 0, 0, 0}, + {AAND, C_ADDCON, C_REG, C_REG, 10, 8, 0, 0, 0}, + {AAND, C_ADDCON, C_NONE, C_REG, 10, 8, 0, 0, 0}, + + {AADD, C_UCON, C_REG, C_REG, 25, 8, 0, 0, 0}, + {AADD, C_UCON, C_NONE, C_REG, 25, 8, 0, 0, 0}, + {AADDV, C_UCON, C_REG, C_REG, 25, 8, 0, sys.Loong64, 0}, + {AADDV, C_UCON, C_NONE, C_REG, 25, 8, 0, sys.Loong64, 0}, + {AAND, C_UCON, C_REG, C_REG, 25, 8, 0, 0, 0}, + {AAND, C_UCON, C_NONE, C_REG, 25, 8, 0, 0, 0}, + + {AADD, C_LCON, C_NONE, C_REG, 23, 12, 0, 0, 0}, + {AADDV, C_LCON, C_NONE, C_REG, 23, 12, 0, sys.Loong64, 0}, + {AAND, C_LCON, C_NONE, C_REG, 23, 12, 0, 0, 0}, + {AADD, C_LCON, C_REG, C_REG, 23, 12, 0, 0, 0}, + {AADDV, C_LCON, C_REG, C_REG, 23, 12, 0, sys.Loong64, 0}, + {AAND, C_LCON, C_REG, C_REG, 23, 12, 0, 0, 0}, + + {AADDV, C_DCON, C_NONE, C_REG, 60, 20, 0, sys.Loong64, 0}, + {AADDV, C_DCON, C_REG, C_REG, 60, 20, 0, sys.Loong64, 0}, + + {ASLL, C_SCON, C_REG, C_REG, 16, 4, 0, 0, 0}, + {ASLL, C_SCON, C_NONE, C_REG, 16, 4, 0, 0, 0}, + + {ASLLV, C_SCON, C_REG, C_REG, 16, 4, 0, sys.Loong64, 0}, + {ASLLV, C_SCON, C_NONE, C_REG, 16, 4, 0, sys.Loong64, 0}, + + {ASYSCALL, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, + + {ABEQ, C_REG, C_REG, C_SBRA, 6, 4, 0, 0, 0}, + {ABEQ, C_REG, C_NONE, C_SBRA, 6, 4, 0, 0, 0}, + {ABLEZ, C_REG, C_NONE, C_SBRA, 6, 4, 0, 0, 0}, + {ABFPT, C_NONE, C_NONE, C_SBRA, 6, 4, 0, 0, NOTUSETMP}, + + {AJMP, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // b + {AJAL, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // bl + + {AJMP, C_NONE, C_NONE, C_ZOREG, 18, 4, REGZERO, 0, 0}, // jirl r0, rj, 0 + {AJAL, C_NONE, C_NONE, C_ZOREG, 18, 4, REGLINK, 0, 0}, // jirl r1, rj, 0 + + {AMOVW, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0}, + {AMOVF, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0}, + {AMOVD, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0}, + {AMOVW, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, sys.Loong64, 0}, + {AMOVF, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, 0, 0}, + {AMOVD, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, 0, 0}, + {AMOVW, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, sys.Loong64, 0}, + {AMOVF, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, 0, 0}, + {AMOVD, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, 0, 0}, + + {AMOVW, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0}, + {AMOVF, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0}, + {AMOVD, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0}, + {AMOVW, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, sys.Loong64, 0}, + {AMOVF, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, 0, 0}, + {AMOVD, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, 0, 0}, + {AMOVW, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, sys.Loong64, 0}, + {AMOVF, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, 0, 0}, + {AMOVD, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, 0, 0}, + {AMOVF, C_ADDR, C_NONE, C_FREG, 51, 8, 0, 0, 0}, + {AMOVF, C_ADDR, C_NONE, C_FREG, 51, 8, 0, sys.Loong64, 0}, + {AMOVD, C_ADDR, C_NONE, C_FREG, 51, 8, 0, 0, 0}, + {AMOVD, C_ADDR, C_NONE, C_FREG, 51, 8, 0, sys.Loong64, 0}, + + {AMOVW, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0}, + {AMOVF, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0}, + {AMOVD, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0}, + {AMOVW, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, sys.Loong64, 0}, + {AMOVF, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, 0, 0}, + {AMOVW, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, sys.Loong64, 0}, + {AMOVF, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, 0, 0}, + + {AMOVW, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0}, + {AMOVF, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0}, + {AMOVD, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0}, + {AMOVW, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, sys.Loong64, 0}, + {AMOVF, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, 0, 0}, + {AMOVW, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, sys.Loong64, 0}, + {AMOVF, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, 0, 0}, + {AMOVF, C_FREG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, + {AMOVF, C_FREG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, + {AMOVD, C_FREG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, + + {AMOVW, C_REG, C_NONE, C_FREG, 30, 4, 0, 0, 0}, + {AMOVW, C_FREG, C_NONE, C_REG, 31, 4, 0, 0, 0}, + {AMOVV, C_REG, C_NONE, C_FREG, 47, 4, 0, sys.Loong64, 0}, + {AMOVV, C_FREG, C_NONE, C_REG, 48, 4, 0, sys.Loong64, 0}, + + {AMOVW, C_ADDCON, C_NONE, C_FREG, 34, 8, 0, sys.Loong64, 0}, + {AMOVW, C_ANDCON, C_NONE, C_FREG, 34, 8, 0, sys.Loong64, 0}, + + {AWORD, C_LCON, C_NONE, C_NONE, 40, 4, 0, 0, 0}, + {AWORD, C_DCON, C_NONE, C_NONE, 61, 4, 0, 0, 0}, + + {ATEQ, C_SCON, C_REG, C_REG, 15, 8, 0, 0, 0}, + {ATEQ, C_SCON, C_NONE, C_REG, 15, 8, 0, 0, 0}, + + {ABREAK, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, // really CACHE instruction + {ABREAK, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0}, + {ABREAK, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, + {ABREAK, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, + + {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0}, + {obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0}, + {obj.APCDATA, C_DCON, C_NONE, C_DCON, 0, 0, 0, 0, 0}, + {obj.AFUNCDATA, C_SCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0}, + {obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ANOP, C_LCON, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_DCON, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_REG, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ANOP, C_FREG, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ADUFFZERO, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP + {obj.ADUFFCOPY, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP + + {obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0}, +} + +var oprange [ALAST & obj.AMask][]Optab + +var xcmp [C_NCLASS][C_NCLASS]bool + +func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on loong64") + ctxt.Retpoline = false // don't keep printing + } + + p := cursym.Func().Text + if p == nil || p.Link == nil { // handle external functions and ELF section symbols + return + } + + c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset + ctxt.Arch.FixedFrameSize)} + + if oprange[AOR&obj.AMask] == nil { + c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first") + } + + pc := int64(0) + p.Pc = pc + + var m int + var o *Optab + for p = p.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + m = int(o.size) + if m == 0 { + if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { + c.ctxt.Diag("zero-width instruction\n%v", p) + } + continue + } + + pc += int64(m) + } + + c.cursym.Size = pc + + /* + * if any procedure is large enough to + * generate a large SBRA branch, then + * generate extra passes putting branches + * around jmps to fix. this is rare. + */ + bflag := 1 + + var otxt int64 + var q *obj.Prog + for bflag != 0 { + bflag = 0 + pc = 0 + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + + // very large conditional branches + if o.type_ == 6 && p.To.Target() != nil { + otxt = p.To.Target().Pc - pc + if otxt < -(1<<17)+10 || otxt >= (1<<17)-10 { + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = AJMP + q.Pos = p.Pos + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.To.Target()) + p.To.SetTarget(q) + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = AJMP + q.Pos = p.Pos + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(q.Link.Link) + + c.addnop(p.Link) + c.addnop(p) + bflag = 1 + } + } + + m = int(o.size) + if m == 0 { + if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { + c.ctxt.Diag("zero-width instruction\n%v", p) + } + continue + } + + pc += int64(m) + } + + c.cursym.Size = pc + } + pc += -pc & (FuncAlign - 1) + c.cursym.Size = pc + + // lay out the code, emitting code and data relocations. + + c.cursym.Grow(c.cursym.Size) + + bp := c.cursym.P + var i int32 + var out [5]uint32 + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { + c.pc = p.Pc + o = c.oplook(p) + if int(o.size) > 4*len(out) { + log.Fatalf("out array in span0 is too small, need at least %d for %v", o.size/4, p) + } + c.asmout(p, o, out[:]) + for i = 0; i < int32(o.size/4); i++ { + c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i]) + bp = bp[4:] + } + } + + // Mark nonpreemptible instruction sequences. + // We use REGTMP as a scratch register during call injection, + // so instruction sequences that use REGTMP are unsafe to + // preempt asynchronously. + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) +} + +// isUnsafePoint returns whether p is an unsafe point. +func (c *ctxt0) isUnsafePoint(p *obj.Prog) bool { + // If p explicitly uses REGTMP, it's unsafe to preempt, because the + // preemption sequence clobbers REGTMP. + return p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP +} + +// isRestartable returns whether p is a multi-instruction sequence that, +// if preempted, can be restarted. +func (c *ctxt0) isRestartable(p *obj.Prog) bool { + if c.isUnsafePoint(p) { + return false + } + // If p is a multi-instruction sequence with uses REGTMP inserted by + // the assembler in order to materialize a large constant/offset, we + // can restart p (at the start of the instruction sequence), recompute + // the content of REGTMP, upon async preemption. Currently, all cases + // of assembler-inserted REGTMP fall into this category. + // If p doesn't use REGTMP, it can be simply preempted, so we don't + // mark it. + o := c.oplook(p) + return o.size > 4 && o.flag&NOTUSETMP == 0 +} + +func isint32(v int64) bool { + return int64(int32(v)) == v +} + +func isuint32(v uint64) bool { + return uint64(uint32(v)) == v +} + +func (c *ctxt0) aclass(a *obj.Addr) int { + switch a.Type { + case obj.TYPE_NONE: + return C_NONE + + case obj.TYPE_REG: + if REG_R0 <= a.Reg && a.Reg <= REG_R31 { + return C_REG + } + if REG_F0 <= a.Reg && a.Reg <= REG_F31 { + return C_FREG + } + if REG_FCSR0 <= a.Reg && a.Reg <= REG_FCSR31 { + return C_FCSRREG + } + if REG_FCC0 <= a.Reg && a.Reg <= REG_FCC31 { + return C_FCCREG + } + return C_GOK + + case obj.TYPE_MEM: + switch a.Name { + case obj.NAME_EXTERN, + obj.NAME_STATIC: + if a.Sym == nil { + break + } + c.instoffset = a.Offset + if a.Sym != nil { // use relocation + if a.Sym.Type == objabi.STLSBSS { + return C_TLS + } + return C_ADDR + } + return C_LEXT + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SAUTO + } + return C_LAUTO + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SAUTO + } + return C_LAUTO + + case obj.NAME_NONE: + c.instoffset = a.Offset + if c.instoffset == 0 { + return C_ZOREG + } + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SOREG + } + return C_LOREG + } + + return C_GOK + + case obj.TYPE_TEXTSIZE: + return C_TEXTSIZE + + case obj.TYPE_CONST, + obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Reg != 0 { + if -BIG <= c.instoffset && c.instoffset <= BIG { + return C_SACON + } + if isint32(c.instoffset) { + return C_LACON + } + return C_DACON + } + + case obj.NAME_EXTERN, + obj.NAME_STATIC: + s := a.Sym + if s == nil { + return C_GOK + } + + c.instoffset = a.Offset + if s.Type == objabi.STLSBSS { + return C_STCON // address of TLS variable + } + return C_LECON + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SACON + } + return C_LACON + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SACON + } + return C_LACON + + default: + return C_GOK + } + + if c.instoffset != int64(int32(c.instoffset)) { + return C_DCON + } + + if c.instoffset >= 0 { + if c.instoffset == 0 { + return C_ZCON + } + if c.instoffset <= 0x7ff { + return C_SCON + } + if c.instoffset <= 0xfff { + return C_ANDCON + } + if c.instoffset&0xfff == 0 && isuint32(uint64(c.instoffset)) { // && (instoffset & (1<<31)) == 0) + return C_UCON + } + if isint32(c.instoffset) || isuint32(uint64(c.instoffset)) { + return C_LCON + } + return C_LCON + } + + if c.instoffset >= -0x800 { + return C_ADDCON + } + if c.instoffset&0xfff == 0 && isint32(c.instoffset) { + return C_UCON + } + if isint32(c.instoffset) { + return C_LCON + } + return C_LCON + + case obj.TYPE_BRANCH: + return C_SBRA + } + + return C_GOK +} + +func prasm(p *obj.Prog) { + fmt.Printf("%v\n", p) +} + +func (c *ctxt0) oplook(p *obj.Prog) *Optab { + if oprange[AOR&obj.AMask] == nil { + c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first") + } + + a1 := int(p.Optab) + if a1 != 0 { + return &optab[a1-1] + } + a1 = int(p.From.Class) + if a1 == 0 { + a1 = c.aclass(&p.From) + 1 + p.From.Class = int8(a1) + } + + a1-- + a3 := int(p.To.Class) + if a3 == 0 { + a3 = c.aclass(&p.To) + 1 + p.To.Class = int8(a3) + } + + a3-- + a2 := C_NONE + if p.Reg != 0 { + a2 = C_REG + } + + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] + c3 := &xcmp[a3] + for i := range ops { + op := &ops[i] + if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && (op.family == 0 || c.ctxt.Arch.Family == op.family) { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } + } + + c.ctxt.Diag("illegal combination %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3)) + prasm(p) + // Turn illegal instruction into an UNDEF, avoid crashing in asmout. + return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0} +} + +func cmp(a int, b int) bool { + if a == b { + return true + } + switch a { + case C_DCON: + if b == C_LCON { + return true + } + fallthrough + case C_LCON: + if b == C_ZCON || b == C_SCON || b == C_UCON || b == C_ADDCON || b == C_ANDCON { + return true + } + + case C_ADD0CON: + if b == C_ADDCON { + return true + } + fallthrough + + case C_ADDCON: + if b == C_ZCON || b == C_SCON { + return true + } + + case C_AND0CON: + if b == C_ANDCON { + return true + } + fallthrough + + case C_ANDCON: + if b == C_ZCON || b == C_SCON { + return true + } + + case C_UCON: + if b == C_ZCON { + return true + } + + case C_SCON: + if b == C_ZCON { + return true + } + + case C_LACON: + if b == C_SACON { + return true + } + + case C_LBRA: + if b == C_SBRA { + return true + } + + case C_LEXT: + if b == C_SEXT { + return true + } + + case C_LAUTO: + if b == C_SAUTO { + return true + } + + case C_REG: + if b == C_ZCON { + return true + } + + case C_LOREG: + if b == C_ZOREG || b == C_SOREG { + return true + } + + case C_SOREG: + if b == C_ZOREG { + return true + } + } + + return false +} + +type ocmp []Optab + +func (x ocmp) Len() int { + return len(x) +} + +func (x ocmp) Swap(i, j int) { + x[i], x[j] = x[j], x[i] +} + +func (x ocmp) Less(i, j int) bool { + p1 := &x[i] + p2 := &x[j] + n := int(p1.as) - int(p2.as) + if n != 0 { + return n < 0 + } + n = int(p1.a1) - int(p2.a1) + if n != 0 { + return n < 0 + } + n = int(p1.a2) - int(p2.a2) + if n != 0 { + return n < 0 + } + n = int(p1.a3) - int(p2.a3) + if n != 0 { + return n < 0 + } + return false +} + +func opset(a, b0 obj.As) { + oprange[a&obj.AMask] = oprange[b0] +} + +func buildop(ctxt *obj.Link) { + if ctxt.DiagFunc == nil { + ctxt.DiagFunc = func(format string, args ...interface{}) { + log.Printf(format, args...) + } + } + + if oprange[AOR&obj.AMask] != nil { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + var n int + + for i := 0; i < C_NCLASS; i++ { + for n = 0; n < C_NCLASS; n++ { + if cmp(n, i) { + xcmp[i][n] = true + } + } + } + for n = 0; optab[n].as != obj.AXXX; n++ { + } + sort.Sort(ocmp(optab[:n])) + for i := 0; i < n; i++ { + r := optab[i].as + r0 := r & obj.AMask + start := i + for optab[i].as == r { + i++ + } + oprange[r0] = optab[start:i] + i-- + + switch r { + default: + ctxt.Diag("unknown op in build: %v", r) + ctxt.DiagFlush() + log.Fatalf("bad code") + + case AABSF: + opset(AMOVFD, r0) + opset(AMOVDF, r0) + opset(AMOVWF, r0) + opset(AMOVFW, r0) + opset(AMOVWD, r0) + opset(AMOVDW, r0) + opset(ANEGF, r0) + opset(ANEGD, r0) + opset(AABSD, r0) + opset(ATRUNCDW, r0) + opset(ATRUNCFW, r0) + opset(ASQRTF, r0) + opset(ASQRTD, r0) + + case AMOVVF: + opset(AMOVVD, r0) + opset(AMOVFV, r0) + opset(AMOVDV, r0) + opset(ATRUNCDV, r0) + opset(ATRUNCFV, r0) + + case AADD: + opset(ASGT, r0) + opset(ASGTU, r0) + opset(AADDU, r0) + + case AADDV: + opset(AADDVU, r0) + + case AADDF: + opset(ADIVF, r0) + opset(ADIVD, r0) + opset(AMULF, r0) + opset(AMULD, r0) + opset(ASUBF, r0) + opset(ASUBD, r0) + opset(AADDD, r0) + + case AAND: + opset(AOR, r0) + opset(AXOR, r0) + + case ABEQ: + opset(ABNE, r0) + opset(ABLT, r0) + opset(ABGE, r0) + opset(ABGEU, r0) + opset(ABLTU, r0) + + case ABLEZ: + opset(ABGEZ, r0) + opset(ABLTZ, r0) + opset(ABGTZ, r0) + + case AMOVB: + opset(AMOVH, r0) + + case AMOVBU: + opset(AMOVHU, r0) + + case AMUL: + opset(AMULU, r0) + opset(AMULH, r0) + opset(AMULHU, r0) + opset(AREM, r0) + opset(AREMU, r0) + opset(ADIV, r0) + opset(ADIVU, r0) + + case AMULV: + opset(AMULVU, r0) + opset(AMULHV, r0) + opset(AMULHVU, r0) + opset(AREMV, r0) + opset(AREMVU, r0) + opset(ADIVV, r0) + opset(ADIVVU, r0) + + case ASLL: + opset(ASRL, r0) + opset(ASRA, r0) + opset(AROTR, r0) + + case ASLLV: + opset(ASRAV, r0) + opset(ASRLV, r0) + opset(AROTRV, r0) + + case ASUB: + opset(ASUBU, r0) + opset(ANOR, r0) + + case ASUBV: + opset(ASUBVU, r0) + + case ASYSCALL: + opset(ADBAR, r0) + opset(ANOOP, r0) + + case ACMPEQF: + opset(ACMPGTF, r0) + opset(ACMPGTD, r0) + opset(ACMPGEF, r0) + opset(ACMPGED, r0) + opset(ACMPEQD, r0) + + case ABFPT: + opset(ABFPF, r0) + + case AMOVWL: + opset(AMOVWR, r0) + + case AMOVVL: + opset(AMOVVR, r0) + + case AMOVW, + AMOVD, + AMOVF, + AMOVV, + ABREAK, + ARFE, + AJAL, + AJMP, + AMOVWU, + ALL, + ALLV, + ASC, + ASCV, + ANEGW, + ANEGV, + AWORD, + obj.ANOP, + obj.ATEXT, + obj.AUNDEF, + obj.AFUNCDATA, + obj.APCDATA, + obj.ADUFFZERO, + obj.ADUFFCOPY: + break + + case ACLO: + opset(ACLZ, r0) + + case ATEQ: + opset(ATNE, r0) + + case AMASKEQZ: + opset(AMASKNEZ, r0) + } + } +} + +func OP(x uint32, y uint32) uint32 { + return x<<3 | y<<0 +} + +func SP(x uint32, y uint32) uint32 { + return x<<29 | y<<26 +} + +func OP_TEN(x uint32, y uint32) uint32 { + return x<<21 | y<<10 +} + +// r1 -> rk +// r2 -> rj +// r3 -> rd +func OP_RRR(op uint32, r1 uint32, r2 uint32, r3 uint32) uint32 { + return op | (r1&0x1F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 +} + +// r2 -> rj +// r3 -> rd +func OP_RR(op uint32, r2 uint32, r3 uint32) uint32 { + return op | (r2&0x1F)<<5 | (r3&0x1F)<<0 +} + +func OP_16IR_5I(op uint32, i uint32, r2 uint32) uint32 { + return op | (i&0xFFFF)<<10 | (r2&0x7)<<5 | ((i >> 16) & 0x1F) +} + +func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { + return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 +} + +func OP_12IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { + return op | (i&0xFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 +} + +func OP_IR(op uint32, i uint32, r2 uint32) uint32 { + return op | (i&0xFFFFF)<<5 | (r2&0x1F)<<0 // ui20, rd5 +} + +// Encoding for the 'b' or 'bl' instruction. +func OP_B_BL(op uint32, i uint32) uint32 { + return op | ((i & 0xFFFF) << 10) | ((i >> 16) & 0x3FF) +} + +func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o1 := uint32(0) + o2 := uint32(0) + o3 := uint32(0) + o4 := uint32(0) + o5 := uint32(0) + + add := AADDU + add = AADDVU + + switch o.type_ { + default: + c.ctxt.Diag("unknown type %d %v", o.type_) + prasm(p) + + case 0: // pseudo ops + break + + case 1: // mov r1,r2 ==> OR r1,r0,r2 + a := AOR + if p.As == AMOVW { + a = ASLL + } + o1 = OP_RRR(c.oprrr(a), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg)) + + case 2: // add/sub r1,[r2],r3 + r := int(p.Reg) + if p.As == ANEGW || p.As == ANEGV { + r = REGZERO + } + if r == 0 { + r = int(p.To.Reg) + } + o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg)) + + case 3: // mov $soreg, r ==> or/add $i,o,r + v := c.regoff(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + a := add + if o.a1 == C_ANDCON { + a = AOR + } + + o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 4: // add $scon,[r1],r2 + v := c.regoff(&p.From) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + + o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 5: // syscall + o1 = c.oprrr(p.As) + + case 6: // beq r1,[r2],sbra + v := int32(0) + vcmp := int32(0) + if p.To.Target() != nil { + v = int32(p.To.Target().Pc-p.Pc) >> 2 + } + if v < 0 { + vcmp = -v + } + if (p.As == ABFPT || p.As == ABFPF) && ((uint32(vcmp))>>21)&0x7FF != 0 { + c.ctxt.Diag("21 bit-width, short branch too far\n%v", p) + } else if p.As != ABFPT && p.As != ABFPF && (v<<16)>>16 != v { + c.ctxt.Diag("16 bit-width, short branch too far\n%v", p) + } + if p.As == ABGTZ || p.As == ABLEZ { + o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(p.Reg), uint32(p.From.Reg)) + } else if p.As == ABFPT || p.As == ABFPF { + // BCNEZ cj offset21 ,cj = fcc0 + // BCEQZ cj offset21 ,cj = fcc0 + o1 = OP_16IR_5I(c.opirr(p.As), uint32(v), uint32(REG_FCC0)) + } else { + o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(p.From.Reg), uint32(p.Reg)) + } + + case 7: // mov r, soreg + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + v := c.regoff(&p.To) + o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.From.Reg)) + + case 8: // mov soreg, r + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + v := c.regoff(&p.From) + o1 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 9: // sll r1,[r2],r3 + if p.As != ACLO && p.As != ACLZ { + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg)) + } else { // clo r1,r2 + o1 = OP_RR(c.oprr(p.As), uint32(p.From.Reg), uint32(p.To.Reg)) + } + + case 10: // add $con,[r1],r2 ==> mov $con, t; add t,[r1],r2 + v := c.regoff(&p.From) + a := AOR + if v < 0 { + a = AADDU + } + o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 11: // jmp lbra + v := int32(0) + if c.aclass(&p.To) == C_SBRA && p.To.Sym == nil && p.As == AJMP { + // use PC-relative branch for short branches + // BEQ R0, R0, sbra + if p.To.Target() != nil { + v = int32(p.To.Target().Pc-p.Pc) >> 2 + } + if (v<<16)>>16 == v { + o1 = OP_16IRR(c.opirr(ABEQ), uint32(v), uint32(REGZERO), uint32(REGZERO)) + break + } + } + if p.To.Target() == nil { + v = int32(p.Pc) >> 2 + } else { + v = int32(p.To.Target().Pc) >> 2 + } + o1 = OP_B_BL(c.opirr(p.As), uint32(v)) + if p.To.Sym == nil { + p.To.Sym = c.cursym.Func().Text.From.Sym + p.To.Offset = p.To.Target().Pc + } + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_CALLLOONG64 + + case 12: // movbs r,r + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + v := 16 + if p.As == AMOVB { + v = 24 + } + o1 = OP_16IRR(c.opirr(ASLL), uint32(v), uint32(p.From.Reg), uint32(p.To.Reg)) + o2 = OP_16IRR(c.opirr(ASRA), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) + + case 13: // movbu r,r + if p.As == AMOVBU { + o1 = OP_12IRR(c.opirr(AAND), uint32(0xff), uint32(p.From.Reg), uint32(p.To.Reg)) + } else { + // bstrpick.d (msbd=15, lsbd=0) + o1 = (0x33c0 << 10) | ((uint32(p.From.Reg) & 0x1f) << 5) | (uint32(p.To.Reg) & 0x1F) + } + + case 14: // movwu r,r + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = OP_16IRR(c.opirr(-ASLLV), uint32(32)&0x3f, uint32(p.From.Reg), uint32(p.To.Reg)) + o2 = OP_16IRR(c.opirr(-ASRLV), uint32(32)&0x3f, uint32(p.To.Reg), uint32(p.To.Reg)) + + case 15: // teq $c r,r + v := c.regoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = REGZERO + } + /* + teq c, r1, r2 + fallthrough + ==> + bne r1, r2, 2 + break c + fallthrough + */ + if p.As == ATEQ { + o1 = OP_16IRR(c.opirr(ABNE), uint32(2), uint32(r), uint32(p.To.Reg)) + } else { // ATNE + o1 = OP_16IRR(c.opirr(ABEQ), uint32(2), uint32(r), uint32(p.To.Reg)) + } + o2 = c.oprrr(ABREAK) | (uint32(v) & 0x7FFF) + + case 16: // sll $c,[r1],r2 + v := c.regoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + + // instruction ending with V:6-digit immediate, others:5-digit immediate + if v >= 32 && vshift(p.As) { + o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x3f, uint32(r), uint32(p.To.Reg)) + } else { + o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x1f, uint32(r), uint32(p.To.Reg)) + } + + case 17: + o1 = OP_RRR(c.oprrr(p.As), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg)) + + case 18: // jmp [r1],0(r2) + r := int(p.Reg) + if r == 0 { + r = int(o.param) + } + o1 = OP_RRR(c.oprrr(p.As), uint32(0), uint32(p.To.Reg), uint32(r)) + if p.As == obj.ACALL { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 0 + rel.Type = objabi.R_CALLIND + } + + case 19: // mov $lcon,r + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + v := c.regoff(&p.From) + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) + + case 23: // add $lcon,r1,r2 + v := c.regoff(&p.From) + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o3 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 24: // mov $ucon,r + v := c.regoff(&p.From) + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) + + case 25: // add/and $ucon,[r1],r2 + v := c.regoff(&p.From) + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 26: // mov $lsext/auto/oreg,r + v := c.regoff(&p.From) + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o3 = OP_RRR(c.oprrr(add), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 27: // mov [sl]ext/auto/oreg,fr + v := c.regoff(&p.From) + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + a := -AMOVF + if p.As == AMOVD { + a = -AMOVD + } + switch o.size { + case 12: + o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_12IRR(c.opirr(a), uint32(v), uint32(REGTMP), uint32(p.To.Reg)) + + case 4: + o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.To.Reg)) + } + + case 28: // mov fr,[sl]ext/auto/oreg + v := c.regoff(&p.To) + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + a := AMOVF + if p.As == AMOVD { + a = AMOVD + } + switch o.size { + case 12: + o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_12IRR(c.opirr(a), uint32(v), uint32(REGTMP), uint32(p.From.Reg)) + + case 4: + o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.From.Reg)) + } + + case 30: // movw r,fr + a := OP_TEN(8, 1321) // movgr2fr.w + o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) + + case 31: // movw fr,r + a := OP_TEN(8, 1325) // movfr2gr.s + o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) + + case 32: // fadd fr1,[fr2],fr3 + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg)) + + case 33: // fabs fr1, fr3 + o1 = OP_RRR(c.oprrr(p.As), uint32(0), uint32(p.From.Reg), uint32(p.To.Reg)) + + case 34: // mov $con,fr + v := c.regoff(&p.From) + a := AADDU + if o.a1 == C_ANDCON { + a = AOR + } + o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP)) + o2 = OP_RR(OP_TEN(8, 1321), uint32(REGTMP), uint32(p.To.Reg)) // movgr2fr.w + + case 35: // mov r,lext/auto/oreg + v := c.regoff(&p.To) + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(REGTMP), uint32(p.From.Reg)) + + case 36: // mov lext/auto/oreg,r + v := c.regoff(&p.From) + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg)) + + case 40: // word + o1 = uint32(c.regoff(&p.From)) + + case 47: // movv r,fr + a := OP_TEN(8, 1322) // movgr2fr.d + o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) + + case 48: // movv fr,r + a := OP_TEN(8, 1326) // movfr2gr.d + o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) + + case 49: // undef + o1 = c.oprrr(ABREAK) + + // relocation operations + case 50: // mov r,addr ==> pcaddu12i + sw + o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDRLOONG64U + + o2 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.To.Sym + rel2.Add = p.To.Offset + rel2.Type = objabi.R_ADDRLOONG64 + + case 51: // mov addr,r ==> pcaddu12i + lw + o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRLOONG64U + o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.From.Sym + rel2.Add = p.From.Offset + rel2.Type = objabi.R_ADDRLOONG64 + + case 52: // mov $lext, r + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRLOONG64U + o2 = OP_12IRR(c.opirr(add), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.From.Sym + rel2.Add = p.From.Offset + rel2.Type = objabi.R_ADDRLOONG64 + + case 53: // mov r, tlsvar ==> lu12i.w + ori + add r2, regtmp + sw o(regtmp) + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDRLOONG64TLSU + o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.To.Sym + rel2.Add = p.To.Offset + rel2.Type = objabi.R_ADDRLOONG64TLS + o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP)) + o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg)) + + case 54: // lu12i.w + ori + add r2, regtmp + lw o(regtmp) + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRLOONG64TLSU + o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.From.Sym + rel2.Add = p.From.Offset + rel2.Type = objabi.R_ADDRLOONG64TLS + o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP)) + o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg)) + + case 55: // lu12i.w + ori + add r2, regtmp + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRLOONG64TLSU + o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.From.Sym + rel2.Add = p.From.Offset + rel2.Type = objabi.R_ADDRLOONG64TLS + o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(p.To.Reg)) + + case 59: // mov $dcon,r + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + v := c.vregoff(&p.From) + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) + o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) + o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) + + case 60: // add $dcon,r1,r2 + v := c.vregoff(&p.From) + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) + o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o5 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 61: // word C_DCON + o1 = uint32(c.vregoff(&p.From)) + o2 = uint32(c.vregoff(&p.From) >> 32) + } + + out[0] = o1 + out[1] = o2 + out[2] = o3 + out[3] = o4 + out[4] = o5 +} + +func (c *ctxt0) vregoff(a *obj.Addr) int64 { + c.instoffset = 0 + c.aclass(a) + return c.instoffset +} + +func (c *ctxt0) regoff(a *obj.Addr) int32 { + return int32(c.vregoff(a)) +} + +func (c *ctxt0) oprrr(a obj.As) uint32 { + switch a { + case AADD: + return 0x20 << 15 + case AADDU: + return 0x20 << 15 + case ASGT: + return 0x24 << 15 // SLT + case ASGTU: + return 0x25 << 15 // SLTU + case AMASKEQZ: + return 0x26 << 15 + case AMASKNEZ: + return 0x27 << 15 + case AAND: + return 0x29 << 15 + case AOR: + return 0x2a << 15 + case AXOR: + return 0x2b << 15 + case ASUB: + return 0x22 << 15 + case ASUBU, ANEGW: + return 0x22 << 15 + case ANOR: + return 0x28 << 15 + case ASLL: + return 0x2e << 15 + case ASRL: + return 0x2f << 15 + case ASRA: + return 0x30 << 15 + case AROTR: + return 0x36 << 15 + case ASLLV: + return 0x31 << 15 + case ASRLV: + return 0x32 << 15 + case ASRAV: + return 0x33 << 15 + case AROTRV: + return 0x37 << 15 + case AADDV: + return 0x21 << 15 + case AADDVU: + return 0x21 << 15 + case ASUBV: + return 0x23 << 15 + case ASUBVU, ANEGV: + return 0x23 << 15 + + case AMUL: + return 0x38 << 15 // mul.w + case AMULU: + return 0x38 << 15 // mul.w + case AMULH: + return 0x39 << 15 // mulh.w + case AMULHU: + return 0x3a << 15 // mulhu.w + case AMULV: + return 0x3b << 15 // mul.d + case AMULVU: + return 0x3b << 15 // mul.d + case AMULHV: + return 0x3c << 15 // mulh.d + case AMULHVU: + return 0x3d << 15 // mulhu.d + case ADIV: + return 0x40 << 15 // div.w + case ADIVU: + return 0x42 << 15 // div.wu + case ADIVV: + return 0x44 << 15 // div.d + case ADIVVU: + return 0x46 << 15 // div.du + case AREM: + return 0x41 << 15 // mod.w + case AREMU: + return 0x43 << 15 // mod.wu + case AREMV: + return 0x45 << 15 // mod.d + case AREMVU: + return 0x47 << 15 // mod.du + + case AJMP: + return 0x13 << 26 // jirl r0, rj, 0 + case AJAL: + return (0x13 << 26) | 1 // jirl r1, rj, 0 + + case ABREAK: + return 0x54 << 15 + case ASYSCALL: + return 0x56 << 15 + case ADIVF: + return 0x20d << 15 + case ADIVD: + return 0x20e << 15 + case AMULF: + return 0x209 << 15 + case AMULD: + return 0x20a << 15 + case ASUBF: + return 0x205 << 15 + case ASUBD: + return 0x206 << 15 + case AADDF: + return 0x201 << 15 + case AADDD: + return 0x202 << 15 + case ATRUNCFV: + return 0x46a9 << 10 + case ATRUNCDV: + return 0x46aa << 10 + case ATRUNCFW: + return 0x46a1 << 10 + case ATRUNCDW: + return 0x46a2 << 10 + case AMOVFV: + return 0x46c9 << 10 + case AMOVDV: + return 0x46ca << 10 + case AMOVVF: + return 0x4746 << 10 + case AMOVVD: + return 0x474a << 10 + case AMOVFW: + return 0x46c1 << 10 + case AMOVDW: + return 0x46c2 << 10 + case AMOVWF: + return 0x4744 << 10 + case AMOVDF: + return 0x4646 << 10 + case AMOVWD: + return 0x4748 << 10 + case AMOVFD: + return 0x4649 << 10 + case AABSF: + return 0x4501 << 10 + case AABSD: + return 0x4502 << 10 + case AMOVF: + return 0x4525 << 10 + case AMOVD: + return 0x4526 << 10 + case ANEGF: + return 0x4505 << 10 + case ANEGD: + return 0x4506 << 10 + case ACMPEQF: + return 0x0c1<<20 | 0x4<<15 // FCMP.CEQ.S + case ACMPEQD: + return 0x0c2<<20 | 0x4<<15 // FCMP.CEQ.D + case ACMPGED: + return 0x0c2<<20 | 0x7<<15 // FCMP.SLE.D + case ACMPGEF: + return 0x0c1<<20 | 0x7<<15 // FCMP.SLE.S + case ACMPGTD: + return 0x0c2<<20 | 0x3<<15 // FCMP.SLT.D + case ACMPGTF: + return 0x0c1<<20 | 0x3<<15 // FCMP.SLT.S + + case ASQRTF: + return 0x4511 << 10 + case ASQRTD: + return 0x4512 << 10 + + case ADBAR: + return 0x70e4 << 15 + case ANOOP: + // andi r0, r0, 0 + return 0x03400000 + } + + if a < 0 { + c.ctxt.Diag("bad rrr opcode -%v", -a) + } else { + c.ctxt.Diag("bad rrr opcode %v", a) + } + return 0 +} + +func (c *ctxt0) oprr(a obj.As) uint32 { + switch a { + case ACLO: + return 0x4 << 10 + case ACLZ: + return 0x5 << 10 + } + + c.ctxt.Diag("bad rr opcode %v", a) + return 0 +} + +func (c *ctxt0) opir(a obj.As) uint32 { + switch a { + case ALU12IW: + return 0x0a << 25 + case ALU32ID: + return 0x0b << 25 + case APCADDU12I: + return 0x0e << 25 + } + return 0 +} + +func (c *ctxt0) opirr(a obj.As) uint32 { + switch a { + case AADD, AADDU: + return 0x00a << 22 + case ASGT: + return 0x008 << 22 + case ASGTU: + return 0x009 << 22 + case AAND: + return 0x00d << 22 + case AOR: + return 0x00e << 22 + case ALU52ID: + return 0x00c << 22 + case AXOR: + return 0x00f << 22 + case ASLL: + return 0x00081 << 15 + case ASRL: + return 0x00089 << 15 + case ASRA: + return 0x00091 << 15 + case AROTR: + return 0x00099 << 15 + case AADDV: + return 0x00b << 22 + case AADDVU: + return 0x00b << 22 + + case AJMP: + return 0x14 << 26 + case AJAL, + obj.ADUFFZERO, + obj.ADUFFCOPY: + return 0x15 << 26 + + case AJIRL: + return 0x13 << 26 + case ABLTU: + return 0x1a << 26 + case ABLT, ABLTZ, ABGTZ: + return 0x18 << 26 + case ABGEU: + return 0x1b << 26 + case ABGE, ABGEZ, ABLEZ: + return 0x19 << 26 + case ABEQ: + return 0x16 << 26 + case ABNE: + return 0x17 << 26 + case ABFPT: + return 0x12<<26 | 0x1<<8 + case ABFPF: + return 0x12<<26 | 0x0<<8 + + case AMOVB, + AMOVBU: + return 0x0a4 << 22 + case AMOVH, + AMOVHU: + return 0x0a5 << 22 + case AMOVW, + AMOVWU: + return 0x0a6 << 22 + case AMOVV: + return 0x0a7 << 22 + case AMOVF: + return 0x0ad << 22 + case AMOVD: + return 0x0af << 22 + case AMOVWL: + return 0x0bc << 22 + case AMOVWR: + return 0x0bd << 22 + case AMOVVL: + return 0x0be << 22 + case AMOVVR: + return 0x0bf << 22 + + case ABREAK: + return 0x018 << 22 + + case -AMOVWL: + return 0x0b8 << 22 + case -AMOVWR: + return 0x0b9 << 22 + case -AMOVVL: + return 0x0ba << 22 + case -AMOVVR: + return 0x0bb << 22 + case -AMOVB: + return 0x0a0 << 22 + case -AMOVBU: + return 0x0a8 << 22 + case -AMOVH: + return 0x0a1 << 22 + case -AMOVHU: + return 0x0a9 << 22 + case -AMOVW: + return 0x0a2 << 22 + case -AMOVWU: + return 0x0aa << 22 + case -AMOVV: + return 0x0a3 << 22 + case -AMOVF: + return 0x0ac << 22 + case -AMOVD: + return 0x0ae << 22 + + case ASLLV, + -ASLLV: + return 0x0041 << 16 + case ASRLV, + -ASRLV: + return 0x0045 << 16 + case ASRAV, + -ASRAV: + return 0x0049 << 16 + case AROTRV, + -AROTRV: + return 0x004d << 16 + case -ALL: + return 0x020 << 24 + case -ALLV: + return 0x022 << 24 + case ASC: + return 0x021 << 24 + case ASCV: + return 0x023 << 24 + } + + if a < 0 { + c.ctxt.Diag("bad irr opcode -%v", -a) + } else { + c.ctxt.Diag("bad irr opcode %v", a) + } + return 0 +} + +func vshift(a obj.As) bool { + switch a { + case ASLLV, + ASRLV, + ASRAV, + AROTRV: + return true + } + return false +} diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go new file mode 100644 index 0000000..f397077 --- /dev/null +++ b/src/cmd/internal/obj/loong64/cnames.go @@ -0,0 +1,43 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64 + +var cnames0 = []string{ + "NONE", + "REG", + "FREG", + "FCREG", + "FCSRREG", + "FCCREG", + "ZCON", + "SCON", + "UCON", + "ADD0CON", + "AND0CON", + "ADDCON", + "ANDCON", + "LCON", + "DCON", + "SACON", + "SECON", + "LACON", + "LECON", + "DACON", + "STCON", + "SBRA", + "LBRA", + "SAUTO", + "LAUTO", + "SEXT", + "LEXT", + "ZOREG", + "SOREG", + "LOREG", + "GOK", + "ADDR", + "TLS", + "TEXTSIZE", + "NCLASS", +} diff --git a/src/cmd/internal/obj/loong64/list.go b/src/cmd/internal/obj/loong64/list.go new file mode 100644 index 0000000..4890430 --- /dev/null +++ b/src/cmd/internal/obj/loong64/list.go @@ -0,0 +1,46 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64 + +import ( + "cmd/internal/obj" + "fmt" +) + +func init() { + obj.RegisterRegister(obj.RBaseLOONG64, REG_LAST+1, rconv) + obj.RegisterOpcode(obj.ABaseLoong64, Anames) +} + +func rconv(r int) string { + if r == 0 { + return "NONE" + } + if r == REGG { + // Special case. + return "g" + } + if REG_R0 <= r && r <= REG_R31 { + return fmt.Sprintf("R%d", r-REG_R0) + } + if REG_F0 <= r && r <= REG_F31 { + return fmt.Sprintf("F%d", r-REG_F0) + } + if REG_FCSR0 <= r && r <= REG_FCSR31 { + return fmt.Sprintf("FCSR%d", r-REG_FCSR0) + } + if REG_FCC0 <= r && r <= REG_FCC31 { + return fmt.Sprintf("FCC%d", r-REG_FCC0) + } + return fmt.Sprintf("Rgok(%d)", r-obj.RBaseLOONG64) +} + +func DRconv(a int) string { + s := "C_??" + if a >= C_NONE && a <= C_NCLASS { + s = cnames0[a] + } + return s +} diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go new file mode 100644 index 0000000..dc05e18 --- /dev/null +++ b/src/cmd/internal/obj/loong64/obj.go @@ -0,0 +1,716 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "log" + "math" +) + +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + // Rewrite JMP/JAL to symbol as TYPE_BRANCH. + switch p.As { + case AJMP, + AJAL, + ARET, + obj.ADUFFZERO, + obj.ADUFFCOPY: + if p.To.Sym != nil { + p.To.Type = obj.TYPE_BRANCH + } + } + + // Rewrite float constants to values stored in memory. + switch p.As { + case AMOVF: + if p.From.Type == obj.TYPE_FCONST { + f32 := float32(p.From.Val.(float64)) + if math.Float32bits(f32) == 0 { + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REGZERO + break + } + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float32Sym(f32) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + case AMOVD: + if p.From.Type == obj.TYPE_FCONST { + f64 := p.From.Val.(float64) + if math.Float64bits(f64) == 0 { + p.As = AMOVV + p.From.Type = obj.TYPE_REG + p.From.Reg = REGZERO + break + } + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float64Sym(f64) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + } + + // Rewrite SUB constants into ADD. + switch p.As { + case ASUB: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADD + } + + case ASUBU: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADDU + } + + case ASUBV: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADDV + } + + case ASUBVU: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADDVU + } + } +} + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym} + + p := c.cursym.Func().Text + textstksiz := p.To.Offset + + if textstksiz < 0 { + c.ctxt.Diag("negative frame size %d - did you mean NOFRAME?", textstksiz) + } + if p.From.Sym.NoFrame() { + if textstksiz != 0 { + c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) + } + } + + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) + + /* + * find leaf subroutines + * expand RET + */ + + for p := c.cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.ATEXT: + p.Mark |= LABEL | LEAF | SYNC + if p.Link != nil { + p.Link.Mark |= LABEL + } + + case AMOVW, + AMOVV: + if p.To.Type == obj.TYPE_REG && p.To.Reg >= REG_SPECIAL { + p.Mark |= LABEL | SYNC + break + } + if p.From.Type == obj.TYPE_REG && p.From.Reg >= REG_SPECIAL { + p.Mark |= LABEL | SYNC + } + + case ASYSCALL, + AWORD: + p.Mark |= LABEL | SYNC + + case ANOR: + if p.To.Type == obj.TYPE_REG { + if p.To.Reg == REGZERO { + p.Mark |= LABEL | SYNC + } + } + + case AJAL, + obj.ADUFFZERO, + obj.ADUFFCOPY: + c.cursym.Func().Text.Mark &^= LEAF + fallthrough + + case AJMP, + ABEQ, + ABGEU, + ABLTU, + ABLTZ, + ABNE, + ABFPT, ABFPF: + p.Mark |= BRANCH + q1 := p.To.Target() + if q1 != nil { + for q1.As == obj.ANOP { + q1 = q1.Link + p.To.SetTarget(q1) + } + + if q1.Mark&LEAF == 0 { + q1.Mark |= LABEL + } + } + q1 = p.Link + if q1 != nil { + q1.Mark |= LABEL + } + + case ARET: + if p.Link != nil { + p.Link.Mark |= LABEL + } + } + } + + var mov, add obj.As + + add = AADDV + mov = AMOVV + + var q *obj.Prog + var q1 *obj.Prog + autosize := int32(0) + var p1 *obj.Prog + var p2 *obj.Prog + for p := c.cursym.Func().Text; p != nil; p = p.Link { + o := p.As + switch o { + case obj.ATEXT: + autosize = int32(textstksiz) + + if p.Mark&LEAF != 0 && autosize == 0 { + // A leaf function with no locals has no frame. + p.From.Sym.Set(obj.AttrNoFrame, true) + } + + if !p.From.Sym.NoFrame() { + // If there is a stack frame at all, it includes + // space to save the LR. + autosize += int32(c.ctxt.Arch.FixedFrameSize) + } + + if autosize&4 != 0 { + autosize += 4 + } + + if autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.NoSplit() { + if ctxt.Debugvlog { + ctxt.Logf("save suppressed in: %s\n", c.cursym.Name) + } + + c.cursym.Func().Text.Mark |= LEAF + } + } + + p.To.Offset = int64(autosize) - ctxt.Arch.FixedFrameSize + + if c.cursym.Func().Text.Mark&LEAF != 0 { + c.cursym.Set(obj.AttrLeaf, true) + if p.From.Sym.NoFrame() { + break + } + } + + if !p.From.Sym.NoSplit() { + p = c.stacksplit(p, autosize) // emit split check + } + + q = p + + if autosize != 0 { + // Make sure to save link register for non-empty frame, even if + // it is a leaf function, so that traceback works. + // Store link register before decrement SP, so if a signal comes + // during the execution of the function prologue, the traceback + // code will not see a half-updated stack frame. + // This sequence is not async preemptible, as if we open a frame + // at the current SP, it will clobber the saved LR. + q = c.ctxt.StartUnsafePoint(q, c.newprog) + + q = obj.Appendp(q, newprog) + q.As = mov + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REGLINK + q.To.Type = obj.TYPE_MEM + q.To.Offset = int64(-autosize) + q.To.Reg = REGSP + + q = obj.Appendp(q, newprog) + q.As = add + q.Pos = p.Pos + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(-autosize) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = +autosize + + q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) + } + + if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOV g_panic(g), R1 + // BEQ R1, end + // MOV panic_argp(R1), R2 + // ADD $(autosize+FIXED_FRAME), R29, R3 + // BNE R2, R3, end + // ADD $FIXED_FRAME, R29, R2 + // MOV R2, panic_argp(R1) + // end: + // NOP + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not an hardware NOP: it encodes to 0 instruction bytes. + // + // We don't generate this for leafs because that means the wrapped + // function was inlined into the wrapper. + + q = obj.Appendp(q, newprog) + + q.As = mov + q.From.Type = obj.TYPE_MEM + q.From.Reg = REGG + q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R19 + + q = obj.Appendp(q, newprog) + q.As = ABEQ + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R19 + q.To.Type = obj.TYPE_BRANCH + q.Mark |= BRANCH + p1 = q + + q = obj.Appendp(q, newprog) + q.As = mov + q.From.Type = obj.TYPE_MEM + q.From.Reg = REG_R19 + q.From.Offset = 0 // Panic.argp + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R4 + + q = obj.Appendp(q, newprog) + q.As = add + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(autosize) + ctxt.Arch.FixedFrameSize + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R5 + + q = obj.Appendp(q, newprog) + q.As = ABNE + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R4 + q.Reg = REG_R5 + q.To.Type = obj.TYPE_BRANCH + q.Mark |= BRANCH + p2 = q + + q = obj.Appendp(q, newprog) + q.As = add + q.From.Type = obj.TYPE_CONST + q.From.Offset = ctxt.Arch.FixedFrameSize + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R4 + + q = obj.Appendp(q, newprog) + q.As = mov + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R4 + q.To.Type = obj.TYPE_MEM + q.To.Reg = REG_R19 + q.To.Offset = 0 // Panic.argp + + q = obj.Appendp(q, newprog) + + q.As = obj.ANOP + p1.To.SetTarget(q) + p2.To.SetTarget(q) + } + + case ARET: + if p.From.Type == obj.TYPE_CONST { + ctxt.Diag("using BECOME (%v) is not supported!", p) + break + } + + retSym := p.To.Sym + p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction + p.To.Sym = nil + + if c.cursym.Func().Text.Mark&LEAF != 0 { + if autosize == 0 { + p.As = AJMP + p.From = obj.Addr{} + if retSym != nil { // retjmp + p.To.Type = obj.TYPE_BRANCH + p.To.Name = obj.NAME_EXTERN + p.To.Sym = retSym + } else { + p.To.Type = obj.TYPE_MEM + p.To.Reg = REGLINK + p.To.Offset = 0 + } + p.Mark |= BRANCH + break + } + + p.As = add + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(autosize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = -autosize + + q = c.newprog() + q.As = AJMP + q.Pos = p.Pos + if retSym != nil { // retjmp + q.To.Type = obj.TYPE_BRANCH + q.To.Name = obj.NAME_EXTERN + q.To.Sym = retSym + } else { + q.To.Type = obj.TYPE_MEM + q.To.Offset = 0 + q.To.Reg = REGLINK + } + q.Mark |= BRANCH + q.Spadj = +autosize + + q.Link = p.Link + p.Link = q + break + } + + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Offset = 0 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + + if autosize != 0 { + q = c.newprog() + q.As = add + q.Pos = p.Pos + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(autosize) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -autosize + + q.Link = p.Link + p.Link = q + } + + q1 = c.newprog() + q1.As = AJMP + q1.Pos = p.Pos + if retSym != nil { // retjmp + q1.To.Type = obj.TYPE_BRANCH + q1.To.Name = obj.NAME_EXTERN + q1.To.Sym = retSym + } else { + q1.To.Type = obj.TYPE_MEM + q1.To.Offset = 0 + q1.To.Reg = REGLINK + } + q1.Mark |= BRANCH + q1.Spadj = +autosize + + q1.Link = q.Link + q.Link = q1 + + case AADD, + AADDU, + AADDV, + AADDVU: + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { + p.Spadj = int32(-p.From.Offset) + } + + case obj.AGETCALLERPC: + if cursym.Leaf() { + // MOV LR, Rd + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + } else { + // MOV (RSP), Rd + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + } + } + + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { + f := c.cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + } +} + +func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { + var mov, add obj.As + + add = AADDV + mov = AMOVV + if c.ctxt.Flag_maymorestack != "" { + // Save LR and REGCTXT. + frameSize := 2 * c.ctxt.Arch.PtrSize + + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + // MOV REGLINK, -8/-16(SP) + p = obj.Appendp(p, c.newprog) + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_MEM + p.To.Offset = int64(-frameSize) + p.To.Reg = REGSP + + // MOV REGCTXT, -4/-8(SP) + p = obj.Appendp(p, c.newprog) + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REGCTXT + p.To.Type = obj.TYPE_MEM + p.To.Offset = -int64(c.ctxt.Arch.PtrSize) + p.To.Reg = REGSP + + // ADD $-8/$-16, SP + p = obj.Appendp(p, c.newprog) + p.As = add + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(-frameSize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = int32(frameSize) + + // JAL maymorestack + p = obj.Appendp(p, c.newprog) + p.As = AJAL + p.To.Type = obj.TYPE_BRANCH + // See ../x86/obj6.go + p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI()) + p.Mark |= BRANCH + + // Restore LR and REGCTXT. + + // MOV 0(SP), REGLINK + p = obj.Appendp(p, c.newprog) + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Offset = 0 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + + // MOV 4/8(SP), REGCTXT + p = obj.Appendp(p, c.newprog) + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Offset = int64(c.ctxt.Arch.PtrSize) + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGCTXT + + // ADD $8/$16, SP + p = obj.Appendp(p, c.newprog) + p.As = add + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(frameSize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = int32(-frameSize) + + p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + } + + // Jump back to here after morestack returns. + startPred := p + + // MOV g_stackguard(g), R19 + p = obj.Appendp(p, c.newprog) + + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0 + if c.cursym.CFunc() { + p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R19 + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + var q *obj.Prog + if framesize <= objabi.StackSmall { + // small stack: SP < stackguard + // AGTU SP, stackguard, R19 + p = obj.Appendp(p, c.newprog) + + p.As = ASGTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REGSP + p.Reg = REG_R19 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R19 + } else { + // large stack: SP-framesize < stackguard-StackSmall + offset := int64(framesize) - objabi.StackSmall + if framesize > objabi.StackBig { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // SGTU $(framesize-StackSmall), SP, R4 + // BNE R4, label-of-call-to-morestack + + p = obj.Appendp(p, c.newprog) + p.As = ASGTU + p.From.Type = obj.TYPE_CONST + p.From.Offset = offset + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R4 + + p = obj.Appendp(p, c.newprog) + q = p + p.As = ABNE + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R4 + p.To.Type = obj.TYPE_BRANCH + p.Mark |= BRANCH + } + + p = obj.Appendp(p, c.newprog) + + p.As = add + p.From.Type = obj.TYPE_CONST + p.From.Offset = -offset + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R4 + + p = obj.Appendp(p, c.newprog) + p.As = ASGTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R4 + p.Reg = REG_R19 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R19 + } + + // q1: BNE R19, done + p = obj.Appendp(p, c.newprog) + q1 := p + + p.As = ABNE + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R19 + p.To.Type = obj.TYPE_BRANCH + p.Mark |= BRANCH + + // MOV LINK, R5 + p = obj.Appendp(p, c.newprog) + + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R5 + if q != nil { + q.To.SetTarget(p) + p.Mark |= LABEL + } + + p = c.ctxt.EmitEntryStackMap(c.cursym, p, c.newprog) + + // JAL runtime.morestack(SB) + p = obj.Appendp(p, c.newprog) + + p.As = AJAL + p.To.Type = obj.TYPE_BRANCH + if c.cursym.CFunc() { + p.To.Sym = c.ctxt.Lookup("runtime.morestackc") + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { + p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") + } else { + p.To.Sym = c.ctxt.Lookup("runtime.morestack") + } + p.Mark |= BRANCH + + p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + + // JMP start + p = obj.Appendp(p, c.newprog) + + p.As = AJMP + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(startPred.Link) + startPred.Link.Mark |= LABEL + p.Mark |= BRANCH + + // placeholder for q1's jump target + p = obj.Appendp(p, c.newprog) + + p.As = obj.ANOP // zero-width place holder + q1.To.SetTarget(p) + + return p +} + +func (c *ctxt0) addnop(p *obj.Prog) { + q := c.newprog() + q.As = ANOOP + q.Pos = p.Pos + q.Link = p.Link + p.Link = q +} + +var Linkloong64 = obj.LinkArch{ + Arch: sys.ArchLoong64, + Init: buildop, + Preprocess: preprocess, + Assemble: span0, + Progedit: progedit, + DWARFRegisters: LOONG64DWARFRegisters, +} diff --git a/src/cmd/internal/obj/mips/a.out.go b/src/cmd/internal/obj/mips/a.out.go new file mode 100644 index 0000000..c6ce53a --- /dev/null +++ b/src/cmd/internal/obj/mips/a.out.go @@ -0,0 +1,483 @@ +// cmd/9c/9.out.h from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package mips + +import ( + "cmd/internal/obj" +) + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p mips + +/* + * mips 64 + */ +const ( + NSNAME = 8 + NSYM = 50 + NREG = 32 /* number of general registers */ + NFREG = 32 /* number of floating point registers */ + NWREG = 32 /* number of MSA registers */ +) + +const ( + REG_R0 = obj.RBaseMIPS + iota // must be a multiple of 32 + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + REG_R16 + REG_R17 + REG_R18 + REG_R19 + REG_R20 + REG_R21 + REG_R22 + REG_R23 + REG_R24 + REG_R25 + REG_R26 + REG_R27 + REG_R28 + REG_R29 + REG_R30 + REG_R31 + + REG_F0 // must be a multiple of 32 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + REG_F16 + REG_F17 + REG_F18 + REG_F19 + REG_F20 + REG_F21 + REG_F22 + REG_F23 + REG_F24 + REG_F25 + REG_F26 + REG_F27 + REG_F28 + REG_F29 + REG_F30 + REG_F31 + + // co-processor 0 control registers + REG_M0 // must be a multiple of 32 + REG_M1 + REG_M2 + REG_M3 + REG_M4 + REG_M5 + REG_M6 + REG_M7 + REG_M8 + REG_M9 + REG_M10 + REG_M11 + REG_M12 + REG_M13 + REG_M14 + REG_M15 + REG_M16 + REG_M17 + REG_M18 + REG_M19 + REG_M20 + REG_M21 + REG_M22 + REG_M23 + REG_M24 + REG_M25 + REG_M26 + REG_M27 + REG_M28 + REG_M29 + REG_M30 + REG_M31 + + // FPU control registers + REG_FCR0 // must be a multiple of 32 + REG_FCR1 + REG_FCR2 + REG_FCR3 + REG_FCR4 + REG_FCR5 + REG_FCR6 + REG_FCR7 + REG_FCR8 + REG_FCR9 + REG_FCR10 + REG_FCR11 + REG_FCR12 + REG_FCR13 + REG_FCR14 + REG_FCR15 + REG_FCR16 + REG_FCR17 + REG_FCR18 + REG_FCR19 + REG_FCR20 + REG_FCR21 + REG_FCR22 + REG_FCR23 + REG_FCR24 + REG_FCR25 + REG_FCR26 + REG_FCR27 + REG_FCR28 + REG_FCR29 + REG_FCR30 + REG_FCR31 + + // MSA registers + // The lower bits of W registers are alias to F registers + REG_W0 // must be a multiple of 32 + REG_W1 + REG_W2 + REG_W3 + REG_W4 + REG_W5 + REG_W6 + REG_W7 + REG_W8 + REG_W9 + REG_W10 + REG_W11 + REG_W12 + REG_W13 + REG_W14 + REG_W15 + REG_W16 + REG_W17 + REG_W18 + REG_W19 + REG_W20 + REG_W21 + REG_W22 + REG_W23 + REG_W24 + REG_W25 + REG_W26 + REG_W27 + REG_W28 + REG_W29 + REG_W30 + REG_W31 + + REG_HI + REG_LO + + REG_LAST = REG_LO // the last defined register + + REG_SPECIAL = REG_M0 + + REGZERO = REG_R0 /* set to zero */ + REGSP = REG_R29 + REGSB = REG_R28 + REGLINK = REG_R31 + REGRET = REG_R1 + REGARG = -1 /* -1 disables passing the first argument in register */ + REGRT1 = REG_R1 /* reserved for runtime, duffzero and duffcopy */ + REGRT2 = REG_R2 /* reserved for runtime, duffcopy */ + REGCTXT = REG_R22 /* context for closures */ + REGG = REG_R30 /* G */ + REGTMP = REG_R23 /* used by the linker */ + FREGRET = REG_F0 +) + +// https://llvm.org/svn/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.td search for DwarfRegNum +// https://gcc.gnu.org/viewcvs/gcc/trunk/gcc/config/mips/mips.c?view=co&revision=258099&content-type=text%2Fplain search for mips_dwarf_regno +// For now, this is adequate for both 32 and 64 bit. +var MIPSDWARFRegisters = map[int16]int16{} + +func init() { + // f assigns dwarfregisters[from:to] = (base):(to-from+base) + f := func(from, to, base int16) { + for r := int16(from); r <= to; r++ { + MIPSDWARFRegisters[r] = (r - from) + base + } + } + f(REG_R0, REG_R31, 0) + f(REG_F0, REG_F31, 32) // For 32-bit MIPS, compiler only uses even numbered registers -- see cmd/compile/internal/ssa/gen/MIPSOps.go + MIPSDWARFRegisters[REG_HI] = 64 + MIPSDWARFRegisters[REG_LO] = 65 + // The lower bits of W registers are alias to F registers + f(REG_W0, REG_W31, 32) +} + +const ( + BIG = 32766 +) + +const ( + /* mark flags */ + FOLL = 1 << 0 + LABEL = 1 << 1 + LEAF = 1 << 2 + SYNC = 1 << 3 + BRANCH = 1 << 4 + LOAD = 1 << 5 + FCMP = 1 << 6 + NOSCHED = 1 << 7 + + NSCHED = 20 +) + +const ( + C_NONE = iota + C_REG + C_FREG + C_FCREG + C_MREG /* special processor register */ + C_WREG /* MSA registers */ + C_HI + C_LO + C_ZCON + C_SCON /* 16 bit signed */ + C_UCON /* 32 bit signed, low 16 bits 0 */ + C_ADD0CON + C_AND0CON + C_ADDCON /* -0x8000 <= v < 0 */ + C_ANDCON /* 0 < v <= 0xFFFF */ + C_LCON /* other 32 */ + C_DCON /* other 64 (could subdivide further) */ + C_SACON /* $n(REG) where n <= int16 */ + C_SECON + C_LACON /* $n(REG) where int16 < n <= int32 */ + C_LECON + C_DACON /* $n(REG) where int32 < n */ + C_STCON /* $tlsvar */ + C_SBRA + C_LBRA + C_SAUTO + C_LAUTO + C_SEXT + C_LEXT + C_ZOREG + C_SOREG + C_LOREG + C_GOK + C_ADDR + C_TLS + C_TEXTSIZE + + C_NCLASS /* must be the last */ +) + +const ( + AABSD = obj.ABaseMIPS + obj.A_ARCHSPECIFIC + iota + AABSF + AABSW + AADD + AADDD + AADDF + AADDU + AADDW + AAND + ABEQ + ABFPF + ABFPT + ABGEZ + ABGEZAL + ABGTZ + ABLEZ + ABLTZ + ABLTZAL + ABNE + ABREAK + ACLO + ACLZ + ACMOVF + ACMOVN + ACMOVT + ACMOVZ + ACMPEQD + ACMPEQF + ACMPGED + ACMPGEF + ACMPGTD + ACMPGTF + ADIV + ADIVD + ADIVF + ADIVU + ADIVW + AGOK + ALL + ALLV + ALUI + AMADD + AMOVB + AMOVBU + AMOVD + AMOVDF + AMOVDW + AMOVF + AMOVFD + AMOVFW + AMOVH + AMOVHU + AMOVW + AMOVWD + AMOVWF + AMOVWL + AMOVWR + AMSUB + AMUL + AMULD + AMULF + AMULU + AMULW + ANEGD + ANEGF + ANEGW + ANEGV + ANOOP // hardware nop + ANOR + AOR + AREM + AREMU + ARFE + AROTR + AROTRV + ASC + ASCV + ASGT + ASGTU + ASLL + ASQRTD + ASQRTF + ASRA + ASRL + ASUB + ASUBD + ASUBF + ASUBU + ASUBW + ASYNC + ASYSCALL + ATEQ + ATLBP + ATLBR + ATLBWI + ATLBWR + ATNE + AWORD + AXOR + + /* 64-bit */ + AMOVV + AMOVVL + AMOVVR + ASLLV + ASRAV + ASRLV + ADIVV + ADIVVU + AREMV + AREMVU + AMULV + AMULVU + AADDV + AADDVU + ASUBV + ASUBVU + + /* 64-bit FP */ + ATRUNCFV + ATRUNCDV + ATRUNCFW + ATRUNCDW + AMOVWU + AMOVFV + AMOVDV + AMOVVF + AMOVVD + + /* MSA */ + AVMOVB + AVMOVH + AVMOVW + AVMOVD + + ALAST + + // aliases + AJMP = obj.AJMP + AJAL = obj.ACALL + ARET = obj.ARET +) + +func init() { + // The asm encoder generally assumes that the lowest 5 bits of the + // REG_XX constants match the machine instruction encoding, i.e. + // the lowest 5 bits is the register number. + // Check this here. + if REG_R0%32 != 0 { + panic("REG_R0 is not a multiple of 32") + } + if REG_F0%32 != 0 { + panic("REG_F0 is not a multiple of 32") + } + if REG_M0%32 != 0 { + panic("REG_M0 is not a multiple of 32") + } + if REG_FCR0%32 != 0 { + panic("REG_FCR0 is not a multiple of 32") + } + if REG_W0%32 != 0 { + panic("REG_W0 is not a multiple of 32") + } +} diff --git a/src/cmd/internal/obj/mips/anames.go b/src/cmd/internal/obj/mips/anames.go new file mode 100644 index 0000000..ca2ad5a --- /dev/null +++ b/src/cmd/internal/obj/mips/anames.go @@ -0,0 +1,137 @@ +// Code generated by stringer -i a.out.go -o anames.go -p mips; DO NOT EDIT. + +package mips + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "ABSD", + "ABSF", + "ABSW", + "ADD", + "ADDD", + "ADDF", + "ADDU", + "ADDW", + "AND", + "BEQ", + "BFPF", + "BFPT", + "BGEZ", + "BGEZAL", + "BGTZ", + "BLEZ", + "BLTZ", + "BLTZAL", + "BNE", + "BREAK", + "CLO", + "CLZ", + "CMOVF", + "CMOVN", + "CMOVT", + "CMOVZ", + "CMPEQD", + "CMPEQF", + "CMPGED", + "CMPGEF", + "CMPGTD", + "CMPGTF", + "DIV", + "DIVD", + "DIVF", + "DIVU", + "DIVW", + "GOK", + "LL", + "LLV", + "LUI", + "MADD", + "MOVB", + "MOVBU", + "MOVD", + "MOVDF", + "MOVDW", + "MOVF", + "MOVFD", + "MOVFW", + "MOVH", + "MOVHU", + "MOVW", + "MOVWD", + "MOVWF", + "MOVWL", + "MOVWR", + "MSUB", + "MUL", + "MULD", + "MULF", + "MULU", + "MULW", + "NEGD", + "NEGF", + "NEGW", + "NEGV", + "NOOP", + "NOR", + "OR", + "REM", + "REMU", + "RFE", + "ROTR", + "ROTRV", + "SC", + "SCV", + "SGT", + "SGTU", + "SLL", + "SQRTD", + "SQRTF", + "SRA", + "SRL", + "SUB", + "SUBD", + "SUBF", + "SUBU", + "SUBW", + "SYNC", + "SYSCALL", + "TEQ", + "TLBP", + "TLBR", + "TLBWI", + "TLBWR", + "TNE", + "WORD", + "XOR", + "MOVV", + "MOVVL", + "MOVVR", + "SLLV", + "SRAV", + "SRLV", + "DIVV", + "DIVVU", + "REMV", + "REMVU", + "MULV", + "MULVU", + "ADDV", + "ADDVU", + "SUBV", + "SUBVU", + "TRUNCFV", + "TRUNCDV", + "TRUNCFW", + "TRUNCDW", + "MOVWU", + "MOVFV", + "MOVDV", + "MOVVF", + "MOVVD", + "VMOVB", + "VMOVH", + "VMOVW", + "VMOVD", + "LAST", +} diff --git a/src/cmd/internal/obj/mips/anames0.go b/src/cmd/internal/obj/mips/anames0.go new file mode 100644 index 0000000..c300696 --- /dev/null +++ b/src/cmd/internal/obj/mips/anames0.go @@ -0,0 +1,45 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mips + +var cnames0 = []string{ + "NONE", + "REG", + "FREG", + "FCREG", + "MREG", + "WREG", + "HI", + "LO", + "ZCON", + "SCON", + "UCON", + "ADD0CON", + "AND0CON", + "ADDCON", + "ANDCON", + "LCON", + "DCON", + "SACON", + "SECON", + "LACON", + "LECON", + "DACON", + "STCON", + "SBRA", + "LBRA", + "SAUTO", + "LAUTO", + "SEXT", + "LEXT", + "ZOREG", + "SOREG", + "LOREG", + "GOK", + "ADDR", + "TLS", + "TEXTSIZE", + "NCLASS", +} diff --git a/src/cmd/internal/obj/mips/asm0.go b/src/cmd/internal/obj/mips/asm0.go new file mode 100644 index 0000000..ab8d37b --- /dev/null +++ b/src/cmd/internal/obj/mips/asm0.go @@ -0,0 +1,2121 @@ +// cmd/9l/optab.c, cmd/9l/asmout.c from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package mips + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "fmt" + "log" + "sort" +) + +// ctxt0 holds state while assembling a single function. +// Each function gets a fresh ctxt0. +// This allows for multiple functions to be safely concurrently assembled. +type ctxt0 struct { + ctxt *obj.Link + newprog obj.ProgAlloc + cursym *obj.LSym + autosize int32 + instoffset int64 + pc int64 +} + +// Instruction layout. + +const ( + mips64FuncAlign = 8 +) + +const ( + r0iszero = 1 +) + +type Optab struct { + as obj.As + a1 uint8 + a2 uint8 + a3 uint8 + type_ int8 + size int8 + param int16 + family sys.ArchFamily // 0 means both sys.MIPS and sys.MIPS64 + flag uint8 +} + +const ( + // Optab.flag + NOTUSETMP = 1 << iota // p expands to multiple instructions, but does NOT use REGTMP +) + +var optab = []Optab{ + {obj.ATEXT, C_LEXT, C_NONE, C_TEXTSIZE, 0, 0, 0, sys.MIPS64, 0}, + {obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0}, + + {AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, + {AMOVV, C_REG, C_NONE, C_REG, 1, 4, 0, sys.MIPS64, 0}, + {AMOVB, C_REG, C_NONE, C_REG, 12, 8, 0, 0, NOTUSETMP}, + {AMOVBU, C_REG, C_NONE, C_REG, 13, 4, 0, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_REG, 14, 8, 0, sys.MIPS64, NOTUSETMP}, + + {ASUB, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + {ASUBV, C_REG, C_REG, C_REG, 2, 4, 0, sys.MIPS64, 0}, + {AADD, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + {AADDV, C_REG, C_REG, C_REG, 2, 4, 0, sys.MIPS64, 0}, + {AAND, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + {ASUB, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {ASUBV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.MIPS64, 0}, + {AADD, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {AADDV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.MIPS64, 0}, + {AAND, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {ACMOVN, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, + {ANEGW, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {ANEGV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.MIPS64, 0}, + + {ASLL, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0}, + {ASLL, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0}, + {ASLLV, C_REG, C_NONE, C_REG, 9, 4, 0, sys.MIPS64, 0}, + {ASLLV, C_REG, C_REG, C_REG, 9, 4, 0, sys.MIPS64, 0}, + {ACLO, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0}, + + {AADDF, C_FREG, C_NONE, C_FREG, 32, 4, 0, 0, 0}, + {AADDF, C_FREG, C_REG, C_FREG, 32, 4, 0, 0, 0}, + {ACMPEQF, C_FREG, C_REG, C_NONE, 32, 4, 0, 0, 0}, + {AABSF, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, + {AMOVVF, C_FREG, C_NONE, C_FREG, 33, 4, 0, sys.MIPS64, 0}, + {AMOVF, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, + + {AMOVW, C_REG, C_NONE, C_SEXT, 7, 4, REGSB, sys.MIPS64, 0}, + {AMOVWU, C_REG, C_NONE, C_SEXT, 7, 4, REGSB, sys.MIPS64, 0}, + {AMOVV, C_REG, C_NONE, C_SEXT, 7, 4, REGSB, sys.MIPS64, 0}, + {AMOVB, C_REG, C_NONE, C_SEXT, 7, 4, REGSB, sys.MIPS64, 0}, + {AMOVBU, C_REG, C_NONE, C_SEXT, 7, 4, REGSB, sys.MIPS64, 0}, + {AMOVWL, C_REG, C_NONE, C_SEXT, 7, 4, REGSB, sys.MIPS64, 0}, + {AMOVVL, C_REG, C_NONE, C_SEXT, 7, 4, REGSB, sys.MIPS64, 0}, + {AMOVW, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.MIPS64, 0}, + {AMOVV, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.MIPS64, 0}, + {AMOVB, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, + {AMOVWL, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, + {AMOVVL, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.MIPS64, 0}, + {AMOVW, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.MIPS64, 0}, + {AMOVV, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.MIPS64, 0}, + {AMOVB, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {AMOVWL, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {AMOVVL, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.MIPS64, 0}, + {ASC, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, + {ASCV, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.MIPS64, 0}, + + {AMOVW, C_SEXT, C_NONE, C_REG, 8, 4, REGSB, sys.MIPS64, 0}, + {AMOVWU, C_SEXT, C_NONE, C_REG, 8, 4, REGSB, sys.MIPS64, 0}, + {AMOVV, C_SEXT, C_NONE, C_REG, 8, 4, REGSB, sys.MIPS64, 0}, + {AMOVB, C_SEXT, C_NONE, C_REG, 8, 4, REGSB, sys.MIPS64, 0}, + {AMOVBU, C_SEXT, C_NONE, C_REG, 8, 4, REGSB, sys.MIPS64, 0}, + {AMOVWL, C_SEXT, C_NONE, C_REG, 8, 4, REGSB, sys.MIPS64, 0}, + {AMOVVL, C_SEXT, C_NONE, C_REG, 8, 4, REGSB, sys.MIPS64, 0}, + {AMOVW, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, + {AMOVWU, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.MIPS64, 0}, + {AMOVV, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.MIPS64, 0}, + {AMOVB, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, + {AMOVBU, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, + {AMOVWL, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, + {AMOVVL, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.MIPS64, 0}, + {AMOVW, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {AMOVWU, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.MIPS64, 0}, + {AMOVV, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.MIPS64, 0}, + {AMOVB, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {AMOVBU, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {AMOVWL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {AMOVVL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.MIPS64, 0}, + {ALL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, + {ALLV, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.MIPS64, 0}, + + {AMOVW, C_REG, C_NONE, C_LEXT, 35, 12, REGSB, sys.MIPS64, 0}, + {AMOVWU, C_REG, C_NONE, C_LEXT, 35, 12, REGSB, sys.MIPS64, 0}, + {AMOVV, C_REG, C_NONE, C_LEXT, 35, 12, REGSB, sys.MIPS64, 0}, + {AMOVB, C_REG, C_NONE, C_LEXT, 35, 12, REGSB, sys.MIPS64, 0}, + {AMOVBU, C_REG, C_NONE, C_LEXT, 35, 12, REGSB, sys.MIPS64, 0}, + {AMOVW, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, sys.MIPS64, 0}, + {AMOVV, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, sys.MIPS64, 0}, + {AMOVB, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, + {AMOVW, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, + {AMOVWU, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, sys.MIPS64, 0}, + {AMOVV, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, sys.MIPS64, 0}, + {AMOVB, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, + {ASC, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, + {AMOVW, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.MIPS, 0}, + {AMOVW, C_REG, C_NONE, C_ADDR, 50, 12, 0, sys.MIPS64, 0}, + {AMOVWU, C_REG, C_NONE, C_ADDR, 50, 12, 0, sys.MIPS64, 0}, + {AMOVV, C_REG, C_NONE, C_ADDR, 50, 12, 0, sys.MIPS64, 0}, + {AMOVB, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.MIPS, 0}, + {AMOVB, C_REG, C_NONE, C_ADDR, 50, 12, 0, sys.MIPS64, 0}, + {AMOVBU, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.MIPS, 0}, + {AMOVBU, C_REG, C_NONE, C_ADDR, 50, 12, 0, sys.MIPS64, 0}, + {AMOVW, C_REG, C_NONE, C_TLS, 53, 8, 0, 0, NOTUSETMP}, + {AMOVWU, C_REG, C_NONE, C_TLS, 53, 8, 0, sys.MIPS64, NOTUSETMP}, + {AMOVV, C_REG, C_NONE, C_TLS, 53, 8, 0, sys.MIPS64, NOTUSETMP}, + {AMOVB, C_REG, C_NONE, C_TLS, 53, 8, 0, 0, NOTUSETMP}, + {AMOVBU, C_REG, C_NONE, C_TLS, 53, 8, 0, 0, NOTUSETMP}, + + {AMOVW, C_LEXT, C_NONE, C_REG, 36, 12, REGSB, sys.MIPS64, 0}, + {AMOVWU, C_LEXT, C_NONE, C_REG, 36, 12, REGSB, sys.MIPS64, 0}, + {AMOVV, C_LEXT, C_NONE, C_REG, 36, 12, REGSB, sys.MIPS64, 0}, + {AMOVB, C_LEXT, C_NONE, C_REG, 36, 12, REGSB, sys.MIPS64, 0}, + {AMOVBU, C_LEXT, C_NONE, C_REG, 36, 12, REGSB, sys.MIPS64, 0}, + {AMOVW, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, + {AMOVWU, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, sys.MIPS64, 0}, + {AMOVV, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, sys.MIPS64, 0}, + {AMOVB, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, + {AMOVBU, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, + {AMOVW, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, + {AMOVWU, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, sys.MIPS64, 0}, + {AMOVV, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, sys.MIPS64, 0}, + {AMOVB, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, + {AMOVBU, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, + {AMOVW, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.MIPS, 0}, + {AMOVW, C_ADDR, C_NONE, C_REG, 51, 12, 0, sys.MIPS64, 0}, + {AMOVWU, C_ADDR, C_NONE, C_REG, 51, 12, 0, sys.MIPS64, 0}, + {AMOVV, C_ADDR, C_NONE, C_REG, 51, 12, 0, sys.MIPS64, 0}, + {AMOVB, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.MIPS, 0}, + {AMOVB, C_ADDR, C_NONE, C_REG, 51, 12, 0, sys.MIPS64, 0}, + {AMOVBU, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.MIPS, 0}, + {AMOVBU, C_ADDR, C_NONE, C_REG, 51, 12, 0, sys.MIPS64, 0}, + {AMOVW, C_TLS, C_NONE, C_REG, 54, 8, 0, 0, NOTUSETMP}, + {AMOVWU, C_TLS, C_NONE, C_REG, 54, 8, 0, sys.MIPS64, NOTUSETMP}, + {AMOVV, C_TLS, C_NONE, C_REG, 54, 8, 0, sys.MIPS64, NOTUSETMP}, + {AMOVB, C_TLS, C_NONE, C_REG, 54, 8, 0, 0, NOTUSETMP}, + {AMOVBU, C_TLS, C_NONE, C_REG, 54, 8, 0, 0, NOTUSETMP}, + + {AMOVW, C_SECON, C_NONE, C_REG, 3, 4, REGSB, sys.MIPS64, 0}, + {AMOVV, C_SECON, C_NONE, C_REG, 3, 4, REGSB, sys.MIPS64, 0}, + {AMOVW, C_SACON, C_NONE, C_REG, 3, 4, REGSP, 0, 0}, + {AMOVV, C_SACON, C_NONE, C_REG, 3, 4, REGSP, sys.MIPS64, 0}, + {AMOVW, C_LECON, C_NONE, C_REG, 52, 8, REGSB, sys.MIPS, NOTUSETMP}, + {AMOVW, C_LECON, C_NONE, C_REG, 52, 12, REGSB, sys.MIPS64, NOTUSETMP}, + {AMOVV, C_LECON, C_NONE, C_REG, 52, 12, REGSB, sys.MIPS64, NOTUSETMP}, + + {AMOVW, C_LACON, C_NONE, C_REG, 26, 12, REGSP, 0, 0}, + {AMOVV, C_LACON, C_NONE, C_REG, 26, 12, REGSP, sys.MIPS64, 0}, + {AMOVW, C_ADDCON, C_NONE, C_REG, 3, 4, REGZERO, 0, 0}, + {AMOVV, C_ADDCON, C_NONE, C_REG, 3, 4, REGZERO, sys.MIPS64, 0}, + {AMOVW, C_ANDCON, C_NONE, C_REG, 3, 4, REGZERO, 0, 0}, + {AMOVV, C_ANDCON, C_NONE, C_REG, 3, 4, REGZERO, sys.MIPS64, 0}, + {AMOVW, C_STCON, C_NONE, C_REG, 55, 8, 0, 0, NOTUSETMP}, + {AMOVV, C_STCON, C_NONE, C_REG, 55, 8, 0, sys.MIPS64, NOTUSETMP}, + + {AMOVW, C_UCON, C_NONE, C_REG, 24, 4, 0, 0, 0}, + {AMOVV, C_UCON, C_NONE, C_REG, 24, 4, 0, sys.MIPS64, 0}, + {AMOVW, C_LCON, C_NONE, C_REG, 19, 8, 0, 0, NOTUSETMP}, + {AMOVV, C_LCON, C_NONE, C_REG, 19, 8, 0, sys.MIPS64, NOTUSETMP}, + + {AMOVW, C_HI, C_NONE, C_REG, 20, 4, 0, 0, 0}, + {AMOVV, C_HI, C_NONE, C_REG, 20, 4, 0, sys.MIPS64, 0}, + {AMOVW, C_LO, C_NONE, C_REG, 20, 4, 0, 0, 0}, + {AMOVV, C_LO, C_NONE, C_REG, 20, 4, 0, sys.MIPS64, 0}, + {AMOVW, C_REG, C_NONE, C_HI, 21, 4, 0, 0, 0}, + {AMOVV, C_REG, C_NONE, C_HI, 21, 4, 0, sys.MIPS64, 0}, + {AMOVW, C_REG, C_NONE, C_LO, 21, 4, 0, 0, 0}, + {AMOVV, C_REG, C_NONE, C_LO, 21, 4, 0, sys.MIPS64, 0}, + + {AMUL, C_REG, C_REG, C_NONE, 22, 4, 0, 0, 0}, + {AMUL, C_REG, C_REG, C_REG, 22, 4, 0, 0, 0}, + {AMULV, C_REG, C_REG, C_NONE, 22, 4, 0, sys.MIPS64, 0}, + + {AADD, C_ADD0CON, C_REG, C_REG, 4, 4, 0, 0, 0}, + {AADD, C_ADD0CON, C_NONE, C_REG, 4, 4, 0, 0, 0}, + {AADD, C_ANDCON, C_REG, C_REG, 10, 8, 0, 0, 0}, + {AADD, C_ANDCON, C_NONE, C_REG, 10, 8, 0, 0, 0}, + + {AADDV, C_ADD0CON, C_REG, C_REG, 4, 4, 0, sys.MIPS64, 0}, + {AADDV, C_ADD0CON, C_NONE, C_REG, 4, 4, 0, sys.MIPS64, 0}, + {AADDV, C_ANDCON, C_REG, C_REG, 10, 8, 0, sys.MIPS64, 0}, + {AADDV, C_ANDCON, C_NONE, C_REG, 10, 8, 0, sys.MIPS64, 0}, + + {AAND, C_AND0CON, C_REG, C_REG, 4, 4, 0, 0, 0}, + {AAND, C_AND0CON, C_NONE, C_REG, 4, 4, 0, 0, 0}, + {AAND, C_ADDCON, C_REG, C_REG, 10, 8, 0, 0, 0}, + {AAND, C_ADDCON, C_NONE, C_REG, 10, 8, 0, 0, 0}, + + {AADD, C_UCON, C_REG, C_REG, 25, 8, 0, 0, 0}, + {AADD, C_UCON, C_NONE, C_REG, 25, 8, 0, 0, 0}, + {AADDV, C_UCON, C_REG, C_REG, 25, 8, 0, sys.MIPS64, 0}, + {AADDV, C_UCON, C_NONE, C_REG, 25, 8, 0, sys.MIPS64, 0}, + {AAND, C_UCON, C_REG, C_REG, 25, 8, 0, 0, 0}, + {AAND, C_UCON, C_NONE, C_REG, 25, 8, 0, 0, 0}, + + {AADD, C_LCON, C_NONE, C_REG, 23, 12, 0, 0, 0}, + {AADDV, C_LCON, C_NONE, C_REG, 23, 12, 0, sys.MIPS64, 0}, + {AAND, C_LCON, C_NONE, C_REG, 23, 12, 0, 0, 0}, + {AADD, C_LCON, C_REG, C_REG, 23, 12, 0, 0, 0}, + {AADDV, C_LCON, C_REG, C_REG, 23, 12, 0, sys.MIPS64, 0}, + {AAND, C_LCON, C_REG, C_REG, 23, 12, 0, 0, 0}, + + {ASLL, C_SCON, C_REG, C_REG, 16, 4, 0, 0, 0}, + {ASLL, C_SCON, C_NONE, C_REG, 16, 4, 0, 0, 0}, + + {ASLLV, C_SCON, C_REG, C_REG, 16, 4, 0, sys.MIPS64, 0}, + {ASLLV, C_SCON, C_NONE, C_REG, 16, 4, 0, sys.MIPS64, 0}, + + {ASYSCALL, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, + + {ABEQ, C_REG, C_REG, C_SBRA, 6, 4, 0, 0, 0}, + {ABEQ, C_REG, C_NONE, C_SBRA, 6, 4, 0, 0, 0}, + {ABLEZ, C_REG, C_NONE, C_SBRA, 6, 4, 0, 0, 0}, + {ABFPT, C_NONE, C_NONE, C_SBRA, 6, 8, 0, 0, NOTUSETMP}, + + {AJMP, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, + {AJAL, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, + + {AJMP, C_NONE, C_NONE, C_ZOREG, 18, 4, REGZERO, 0, 0}, + {AJAL, C_NONE, C_NONE, C_ZOREG, 18, 4, REGLINK, 0, 0}, + + {AMOVW, C_SEXT, C_NONE, C_FREG, 27, 4, REGSB, sys.MIPS64, 0}, + {AMOVF, C_SEXT, C_NONE, C_FREG, 27, 4, REGSB, sys.MIPS64, 0}, + {AMOVD, C_SEXT, C_NONE, C_FREG, 27, 4, REGSB, sys.MIPS64, 0}, + {AMOVW, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, sys.MIPS64, 0}, + {AMOVF, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, 0, 0}, + {AMOVD, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, 0, 0}, + {AMOVW, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, sys.MIPS64, 0}, + {AMOVF, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, 0, 0}, + {AMOVD, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, 0, 0}, + + {AMOVW, C_LEXT, C_NONE, C_FREG, 27, 12, REGSB, sys.MIPS64, 0}, + {AMOVF, C_LEXT, C_NONE, C_FREG, 27, 12, REGSB, sys.MIPS64, 0}, + {AMOVD, C_LEXT, C_NONE, C_FREG, 27, 12, REGSB, sys.MIPS64, 0}, + {AMOVW, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, sys.MIPS64, 0}, + {AMOVF, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, 0, 0}, + {AMOVD, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, 0, 0}, + {AMOVW, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, sys.MIPS64, 0}, + {AMOVF, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, 0, 0}, + {AMOVD, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, 0, 0}, + {AMOVF, C_ADDR, C_NONE, C_FREG, 51, 8, 0, sys.MIPS, 0}, + {AMOVF, C_ADDR, C_NONE, C_FREG, 51, 12, 0, sys.MIPS64, 0}, + {AMOVD, C_ADDR, C_NONE, C_FREG, 51, 8, 0, sys.MIPS, 0}, + {AMOVD, C_ADDR, C_NONE, C_FREG, 51, 12, 0, sys.MIPS64, 0}, + + {AMOVW, C_FREG, C_NONE, C_SEXT, 28, 4, REGSB, sys.MIPS64, 0}, + {AMOVF, C_FREG, C_NONE, C_SEXT, 28, 4, REGSB, sys.MIPS64, 0}, + {AMOVD, C_FREG, C_NONE, C_SEXT, 28, 4, REGSB, sys.MIPS64, 0}, + {AMOVW, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, sys.MIPS64, 0}, + {AMOVF, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, 0, 0}, + {AMOVW, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, sys.MIPS64, 0}, + {AMOVF, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, 0, 0}, + + {AMOVW, C_FREG, C_NONE, C_LEXT, 28, 12, REGSB, sys.MIPS64, 0}, + {AMOVF, C_FREG, C_NONE, C_LEXT, 28, 12, REGSB, sys.MIPS64, 0}, + {AMOVD, C_FREG, C_NONE, C_LEXT, 28, 12, REGSB, sys.MIPS64, 0}, + {AMOVW, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, sys.MIPS64, 0}, + {AMOVF, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, 0, 0}, + {AMOVW, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, sys.MIPS64, 0}, + {AMOVF, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, 0, 0}, + {AMOVF, C_FREG, C_NONE, C_ADDR, 50, 8, 0, sys.MIPS, 0}, + {AMOVF, C_FREG, C_NONE, C_ADDR, 50, 12, 0, sys.MIPS64, 0}, + {AMOVD, C_FREG, C_NONE, C_ADDR, 50, 8, 0, sys.MIPS, 0}, + {AMOVD, C_FREG, C_NONE, C_ADDR, 50, 12, 0, sys.MIPS64, 0}, + + {AMOVW, C_REG, C_NONE, C_FREG, 30, 4, 0, 0, 0}, + {AMOVW, C_FREG, C_NONE, C_REG, 31, 4, 0, 0, 0}, + {AMOVV, C_REG, C_NONE, C_FREG, 47, 4, 0, sys.MIPS64, 0}, + {AMOVV, C_FREG, C_NONE, C_REG, 48, 4, 0, sys.MIPS64, 0}, + + {AMOVW, C_ADDCON, C_NONE, C_FREG, 34, 8, 0, sys.MIPS64, 0}, + {AMOVW, C_ANDCON, C_NONE, C_FREG, 34, 8, 0, sys.MIPS64, 0}, + + {AMOVW, C_REG, C_NONE, C_MREG, 37, 4, 0, 0, 0}, + {AMOVV, C_REG, C_NONE, C_MREG, 37, 4, 0, sys.MIPS64, 0}, + {AMOVW, C_MREG, C_NONE, C_REG, 38, 4, 0, 0, 0}, + {AMOVV, C_MREG, C_NONE, C_REG, 38, 4, 0, sys.MIPS64, 0}, + + {AWORD, C_LCON, C_NONE, C_NONE, 40, 4, 0, 0, 0}, + + {AMOVW, C_REG, C_NONE, C_FCREG, 41, 4, 0, 0, 0}, + {AMOVV, C_REG, C_NONE, C_FCREG, 41, 4, 0, sys.MIPS64, 0}, + {AMOVW, C_FCREG, C_NONE, C_REG, 42, 4, 0, 0, 0}, + {AMOVV, C_FCREG, C_NONE, C_REG, 42, 4, 0, sys.MIPS64, 0}, + + {ATEQ, C_SCON, C_REG, C_REG, 15, 4, 0, 0, 0}, + {ATEQ, C_SCON, C_NONE, C_REG, 15, 4, 0, 0, 0}, + {ACMOVT, C_REG, C_NONE, C_REG, 17, 4, 0, 0, 0}, + + {AVMOVB, C_SCON, C_NONE, C_WREG, 56, 4, 0, sys.MIPS64, 0}, + {AVMOVB, C_ADDCON, C_NONE, C_WREG, 56, 4, 0, sys.MIPS64, 0}, + {AVMOVB, C_SOREG, C_NONE, C_WREG, 57, 4, 0, sys.MIPS64, 0}, + {AVMOVB, C_WREG, C_NONE, C_SOREG, 58, 4, 0, sys.MIPS64, 0}, + + {ABREAK, C_REG, C_NONE, C_SEXT, 7, 4, REGSB, sys.MIPS64, 0}, /* really CACHE instruction */ + {ABREAK, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.MIPS64, 0}, + {ABREAK, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.MIPS64, 0}, + {ABREAK, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, + + {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0}, + {obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0}, + {obj.AFUNCDATA, C_SCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0}, + {obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ANOP, C_LCON, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_REG, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ANOP, C_FREG, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ADUFFZERO, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP + {obj.ADUFFCOPY, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP + + {obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0}, +} + +var oprange [ALAST & obj.AMask][]Optab + +var xcmp [C_NCLASS][C_NCLASS]bool + +func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on mips") + ctxt.Retpoline = false // don't keep printing + } + + p := cursym.Func().Text + if p == nil || p.Link == nil { // handle external functions and ELF section symbols + return + } + + c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset + ctxt.Arch.FixedFrameSize)} + + if oprange[AOR&obj.AMask] == nil { + c.ctxt.Diag("mips ops not initialized, call mips.buildop first") + } + + pc := int64(0) + p.Pc = pc + + var m int + var o *Optab + for p = p.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + m = int(o.size) + if m == 0 { + if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { + c.ctxt.Diag("zero-width instruction\n%v", p) + } + continue + } + + pc += int64(m) + } + + c.cursym.Size = pc + + /* + * if any procedure is large enough to + * generate a large SBRA branch, then + * generate extra passes putting branches + * around jmps to fix. this is rare. + */ + bflag := 1 + + var otxt int64 + var q *obj.Prog + for bflag != 0 { + bflag = 0 + pc = 0 + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + + // very large conditional branches + if o.type_ == 6 && p.To.Target() != nil { + otxt = p.To.Target().Pc - pc + if otxt < -(1<<17)+10 || otxt >= (1<<17)-10 { + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = AJMP + q.Pos = p.Pos + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.To.Target()) + p.To.SetTarget(q) + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = AJMP + q.Pos = p.Pos + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(q.Link.Link) + + c.addnop(p.Link) + c.addnop(p) + bflag = 1 + } + } + + m = int(o.size) + if m == 0 { + if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { + c.ctxt.Diag("zero-width instruction\n%v", p) + } + continue + } + + pc += int64(m) + } + + c.cursym.Size = pc + } + if c.ctxt.Arch.Family == sys.MIPS64 { + pc += -pc & (mips64FuncAlign - 1) + } + c.cursym.Size = pc + + /* + * lay out the code, emitting code and data relocations. + */ + + c.cursym.Grow(c.cursym.Size) + + bp := c.cursym.P + var i int32 + var out [4]uint32 + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { + c.pc = p.Pc + o = c.oplook(p) + if int(o.size) > 4*len(out) { + log.Fatalf("out array in span0 is too small, need at least %d for %v", o.size/4, p) + } + c.asmout(p, o, out[:]) + for i = 0; i < int32(o.size/4); i++ { + c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i]) + bp = bp[4:] + } + } + + // Mark nonpreemptible instruction sequences. + // We use REGTMP as a scratch register during call injection, + // so instruction sequences that use REGTMP are unsafe to + // preempt asynchronously. + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) +} + +// isUnsafePoint returns whether p is an unsafe point. +func (c *ctxt0) isUnsafePoint(p *obj.Prog) bool { + // If p explicitly uses REGTMP, it's unsafe to preempt, because the + // preemption sequence clobbers REGTMP. + return p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP +} + +// isRestartable returns whether p is a multi-instruction sequence that, +// if preempted, can be restarted. +func (c *ctxt0) isRestartable(p *obj.Prog) bool { + if c.isUnsafePoint(p) { + return false + } + // If p is a multi-instruction sequence with uses REGTMP inserted by + // the assembler in order to materialize a large constant/offset, we + // can restart p (at the start of the instruction sequence), recompute + // the content of REGTMP, upon async preemption. Currently, all cases + // of assembler-inserted REGTMP fall into this category. + // If p doesn't use REGTMP, it can be simply preempted, so we don't + // mark it. + o := c.oplook(p) + return o.size > 4 && o.flag&NOTUSETMP == 0 +} + +func isint32(v int64) bool { + return int64(int32(v)) == v +} + +func isuint32(v uint64) bool { + return uint64(uint32(v)) == v +} + +func (c *ctxt0) aclass(a *obj.Addr) int { + switch a.Type { + case obj.TYPE_NONE: + return C_NONE + + case obj.TYPE_REG: + if REG_R0 <= a.Reg && a.Reg <= REG_R31 { + return C_REG + } + if REG_F0 <= a.Reg && a.Reg <= REG_F31 { + return C_FREG + } + if REG_M0 <= a.Reg && a.Reg <= REG_M31 { + return C_MREG + } + if REG_FCR0 <= a.Reg && a.Reg <= REG_FCR31 { + return C_FCREG + } + if REG_W0 <= a.Reg && a.Reg <= REG_W31 { + return C_WREG + } + if a.Reg == REG_LO { + return C_LO + } + if a.Reg == REG_HI { + return C_HI + } + return C_GOK + + case obj.TYPE_MEM: + switch a.Name { + case obj.NAME_EXTERN, + obj.NAME_STATIC: + if a.Sym == nil { + break + } + c.instoffset = a.Offset + if a.Sym != nil { // use relocation + if a.Sym.Type == objabi.STLSBSS { + return C_TLS + } + return C_ADDR + } + return C_LEXT + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SAUTO + } + return C_LAUTO + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SAUTO + } + return C_LAUTO + + case obj.NAME_NONE: + c.instoffset = a.Offset + if c.instoffset == 0 { + return C_ZOREG + } + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SOREG + } + return C_LOREG + } + + return C_GOK + + case obj.TYPE_TEXTSIZE: + return C_TEXTSIZE + + case obj.TYPE_CONST, + obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Reg != 0 { + if -BIG <= c.instoffset && c.instoffset <= BIG { + return C_SACON + } + if isint32(c.instoffset) { + return C_LACON + } + return C_DACON + } + + case obj.NAME_EXTERN, + obj.NAME_STATIC: + s := a.Sym + if s == nil { + return C_GOK + } + + c.instoffset = a.Offset + if s.Type == objabi.STLSBSS { + return C_STCON // address of TLS variable + } + return C_LECON + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SACON + } + return C_LACON + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SACON + } + return C_LACON + + default: + return C_GOK + } + + if c.instoffset >= 0 { + if c.instoffset == 0 { + return C_ZCON + } + if c.instoffset <= 0x7fff { + return C_SCON + } + if c.instoffset <= 0xffff { + return C_ANDCON + } + if c.instoffset&0xffff == 0 && isuint32(uint64(c.instoffset)) { /* && (instoffset & (1<<31)) == 0) */ + return C_UCON + } + if isint32(c.instoffset) || isuint32(uint64(c.instoffset)) { + return C_LCON + } + return C_LCON // C_DCON + } + + if c.instoffset >= -0x8000 { + return C_ADDCON + } + if c.instoffset&0xffff == 0 && isint32(c.instoffset) { + return C_UCON + } + if isint32(c.instoffset) { + return C_LCON + } + return C_LCON // C_DCON + + case obj.TYPE_BRANCH: + return C_SBRA + } + + return C_GOK +} + +func prasm(p *obj.Prog) { + fmt.Printf("%v\n", p) +} + +func (c *ctxt0) oplook(p *obj.Prog) *Optab { + if oprange[AOR&obj.AMask] == nil { + c.ctxt.Diag("mips ops not initialized, call mips.buildop first") + } + + a1 := int(p.Optab) + if a1 != 0 { + return &optab[a1-1] + } + a1 = int(p.From.Class) + if a1 == 0 { + a1 = c.aclass(&p.From) + 1 + p.From.Class = int8(a1) + } + + a1-- + a3 := int(p.To.Class) + if a3 == 0 { + a3 = c.aclass(&p.To) + 1 + p.To.Class = int8(a3) + } + + a3-- + a2 := C_NONE + if p.Reg != 0 { + a2 = C_REG + } + + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] + c3 := &xcmp[a3] + for i := range ops { + op := &ops[i] + if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && (op.family == 0 || c.ctxt.Arch.Family == op.family) { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } + } + + c.ctxt.Diag("illegal combination %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3)) + prasm(p) + // Turn illegal instruction into an UNDEF, avoid crashing in asmout. + return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0} +} + +func cmp(a int, b int) bool { + if a == b { + return true + } + switch a { + case C_LCON: + if b == C_ZCON || b == C_SCON || b == C_UCON || b == C_ADDCON || b == C_ANDCON { + return true + } + + case C_ADD0CON: + if b == C_ADDCON { + return true + } + fallthrough + + case C_ADDCON: + if b == C_ZCON || b == C_SCON { + return true + } + + case C_AND0CON: + if b == C_ANDCON { + return true + } + fallthrough + + case C_ANDCON: + if b == C_ZCON || b == C_SCON { + return true + } + + case C_UCON: + if b == C_ZCON { + return true + } + + case C_SCON: + if b == C_ZCON { + return true + } + + case C_LACON: + if b == C_SACON { + return true + } + + case C_LBRA: + if b == C_SBRA { + return true + } + + case C_LEXT: + if b == C_SEXT { + return true + } + + case C_LAUTO: + if b == C_SAUTO { + return true + } + + case C_REG: + if b == C_ZCON { + return r0iszero != 0 /*TypeKind(100016)*/ + } + + case C_LOREG: + if b == C_ZOREG || b == C_SOREG { + return true + } + + case C_SOREG: + if b == C_ZOREG { + return true + } + } + + return false +} + +type ocmp []Optab + +func (x ocmp) Len() int { + return len(x) +} + +func (x ocmp) Swap(i, j int) { + x[i], x[j] = x[j], x[i] +} + +func (x ocmp) Less(i, j int) bool { + p1 := &x[i] + p2 := &x[j] + n := int(p1.as) - int(p2.as) + if n != 0 { + return n < 0 + } + n = int(p1.a1) - int(p2.a1) + if n != 0 { + return n < 0 + } + n = int(p1.a2) - int(p2.a2) + if n != 0 { + return n < 0 + } + n = int(p1.a3) - int(p2.a3) + if n != 0 { + return n < 0 + } + return false +} + +func opset(a, b0 obj.As) { + oprange[a&obj.AMask] = oprange[b0] +} + +func buildop(ctxt *obj.Link) { + if oprange[AOR&obj.AMask] != nil { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + var n int + + for i := 0; i < C_NCLASS; i++ { + for n = 0; n < C_NCLASS; n++ { + if cmp(n, i) { + xcmp[i][n] = true + } + } + } + for n = 0; optab[n].as != obj.AXXX; n++ { + } + sort.Sort(ocmp(optab[:n])) + for i := 0; i < n; i++ { + r := optab[i].as + r0 := r & obj.AMask + start := i + for optab[i].as == r { + i++ + } + oprange[r0] = optab[start:i] + i-- + + switch r { + default: + ctxt.Diag("unknown op in build: %v", r) + ctxt.DiagFlush() + log.Fatalf("bad code") + + case AABSF: + opset(AMOVFD, r0) + opset(AMOVDF, r0) + opset(AMOVWF, r0) + opset(AMOVFW, r0) + opset(AMOVWD, r0) + opset(AMOVDW, r0) + opset(ANEGF, r0) + opset(ANEGD, r0) + opset(AABSD, r0) + opset(ATRUNCDW, r0) + opset(ATRUNCFW, r0) + opset(ASQRTF, r0) + opset(ASQRTD, r0) + + case AMOVVF: + opset(AMOVVD, r0) + opset(AMOVFV, r0) + opset(AMOVDV, r0) + opset(ATRUNCDV, r0) + opset(ATRUNCFV, r0) + + case AADD: + opset(ASGT, r0) + opset(ASGTU, r0) + opset(AADDU, r0) + + case AADDV: + opset(AADDVU, r0) + + case AADDF: + opset(ADIVF, r0) + opset(ADIVD, r0) + opset(AMULF, r0) + opset(AMULD, r0) + opset(ASUBF, r0) + opset(ASUBD, r0) + opset(AADDD, r0) + + case AAND: + opset(AOR, r0) + opset(AXOR, r0) + + case ABEQ: + opset(ABNE, r0) + + case ABLEZ: + opset(ABGEZ, r0) + opset(ABGEZAL, r0) + opset(ABLTZ, r0) + opset(ABLTZAL, r0) + opset(ABGTZ, r0) + + case AMOVB: + opset(AMOVH, r0) + + case AMOVBU: + opset(AMOVHU, r0) + + case AMUL: + opset(AREM, r0) + opset(AREMU, r0) + opset(ADIVU, r0) + opset(AMULU, r0) + opset(ADIV, r0) + opset(AMADD, r0) + opset(AMSUB, r0) + + case AMULV: + opset(ADIVV, r0) + opset(ADIVVU, r0) + opset(AMULVU, r0) + opset(AREMV, r0) + opset(AREMVU, r0) + + case ASLL: + opset(ASRL, r0) + opset(ASRA, r0) + opset(AROTR, r0) + + case ASLLV: + opset(ASRAV, r0) + opset(ASRLV, r0) + opset(AROTRV, r0) + + case ASUB: + opset(ASUBU, r0) + opset(ANOR, r0) + + case ASUBV: + opset(ASUBVU, r0) + + case ASYSCALL: + opset(ASYNC, r0) + opset(ANOOP, r0) + opset(ATLBP, r0) + opset(ATLBR, r0) + opset(ATLBWI, r0) + opset(ATLBWR, r0) + + case ACMPEQF: + opset(ACMPGTF, r0) + opset(ACMPGTD, r0) + opset(ACMPGEF, r0) + opset(ACMPGED, r0) + opset(ACMPEQD, r0) + + case ABFPT: + opset(ABFPF, r0) + + case AMOVWL: + opset(AMOVWR, r0) + + case AMOVVL: + opset(AMOVVR, r0) + + case AVMOVB: + opset(AVMOVH, r0) + opset(AVMOVW, r0) + opset(AVMOVD, r0) + + case AMOVW, + AMOVD, + AMOVF, + AMOVV, + ABREAK, + ARFE, + AJAL, + AJMP, + AMOVWU, + ALL, + ALLV, + ASC, + ASCV, + ANEGW, + ANEGV, + AWORD, + obj.ANOP, + obj.ATEXT, + obj.AUNDEF, + obj.AFUNCDATA, + obj.APCDATA, + obj.ADUFFZERO, + obj.ADUFFCOPY: + break + + case ACMOVN: + opset(ACMOVZ, r0) + + case ACMOVT: + opset(ACMOVF, r0) + + case ACLO: + opset(ACLZ, r0) + + case ATEQ: + opset(ATNE, r0) + } + } +} + +func OP(x uint32, y uint32) uint32 { + return x<<3 | y<<0 +} + +func SP(x uint32, y uint32) uint32 { + return x<<29 | y<<26 +} + +func BCOND(x uint32, y uint32) uint32 { + return x<<19 | y<<16 +} + +func MMU(x uint32, y uint32) uint32 { + return SP(2, 0) | 16<<21 | x<<3 | y<<0 +} + +func FPF(x uint32, y uint32) uint32 { + return SP(2, 1) | 16<<21 | x<<3 | y<<0 +} + +func FPD(x uint32, y uint32) uint32 { + return SP(2, 1) | 17<<21 | x<<3 | y<<0 +} + +func FPW(x uint32, y uint32) uint32 { + return SP(2, 1) | 20<<21 | x<<3 | y<<0 +} + +func FPV(x uint32, y uint32) uint32 { + return SP(2, 1) | 21<<21 | x<<3 | y<<0 +} + +func OP_RRR(op uint32, r1 uint32, r2 uint32, r3 uint32) uint32 { + return op | (r1&31)<<16 | (r2&31)<<21 | (r3&31)<<11 +} + +func OP_IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { + return op | i&0xFFFF | (r2&31)<<21 | (r3&31)<<16 +} + +func OP_SRR(op uint32, s uint32, r2 uint32, r3 uint32) uint32 { + return op | (s&31)<<6 | (r2&31)<<16 | (r3&31)<<11 +} + +func OP_FRRR(op uint32, r1 uint32, r2 uint32, r3 uint32) uint32 { + return op | (r1&31)<<16 | (r2&31)<<11 | (r3&31)<<6 +} + +func OP_JMP(op uint32, i uint32) uint32 { + return op | i&0x3FFFFFF +} + +func OP_VI10(op uint32, df uint32, s10 int32, wd uint32, minor uint32) uint32 { + return 0x1e<<26 | (op&7)<<23 | (df&3)<<21 | uint32(s10&0x3FF)<<11 | (wd&31)<<6 | minor&0x3F +} + +func OP_VMI10(s10 int32, rs uint32, wd uint32, minor uint32, df uint32) uint32 { + return 0x1e<<26 | uint32(s10&0x3FF)<<16 | (rs&31)<<11 | (wd&31)<<6 | (minor&15)<<2 | df&3 +} + +func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o1 := uint32(0) + o2 := uint32(0) + o3 := uint32(0) + o4 := uint32(0) + + add := AADDU + + if c.ctxt.Arch.Family == sys.MIPS64 { + add = AADDVU + } + switch o.type_ { + default: + c.ctxt.Diag("unknown type %d %v", o.type_) + prasm(p) + + case 0: /* pseudo ops */ + break + + case 1: /* mov r1,r2 ==> OR r1,r0,r2 */ + a := AOR + if p.As == AMOVW && c.ctxt.Arch.Family == sys.MIPS64 { + // on MIPS64, most of the 32-bit instructions have unpredictable behavior, + // but SLL is special that the result is always sign-extended to 64-bit. + a = ASLL + } + o1 = OP_RRR(c.oprrr(a), uint32(p.From.Reg), uint32(REGZERO), uint32(p.To.Reg)) + + case 2: /* add/sub r1,[r2],r3 */ + r := int(p.Reg) + if p.As == ANEGW || p.As == ANEGV { + r = REGZERO + } + if r == 0 { + r = int(p.To.Reg) + } + o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg)) + + case 3: /* mov $soreg, r ==> or/add $i,o,r */ + v := c.regoff(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + a := add + if o.a1 == C_ANDCON { + a = AOR + } + + o1 = OP_IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 4: /* add $scon,[r1],r2 */ + v := c.regoff(&p.From) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + + o1 = OP_IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 5: /* syscall */ + o1 = c.oprrr(p.As) + + case 6: /* beq r1,[r2],sbra */ + v := int32(0) + if p.To.Target() == nil { + v = int32(-4) >> 2 + } else { + v = int32(p.To.Target().Pc-p.Pc-4) >> 2 + } + if (v<<16)>>16 != v { + c.ctxt.Diag("short branch too far\n%v", p) + } + o1 = OP_IRR(c.opirr(p.As), uint32(v), uint32(p.From.Reg), uint32(p.Reg)) + // for ABFPT and ABFPF only: always fill delay slot with 0 + // see comments in func preprocess for details. + o2 = 0 + + case 7: /* mov r, soreg ==> sw o(r) */ + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + v := c.regoff(&p.To) + o1 = OP_IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.From.Reg)) + + case 8: /* mov soreg, r ==> lw o(r) */ + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + v := c.regoff(&p.From) + o1 = OP_IRR(c.opirr(-p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 9: /* sll r1,[r2],r3 */ + r := int(p.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + o1 = OP_RRR(c.oprrr(p.As), uint32(r), uint32(p.From.Reg), uint32(p.To.Reg)) + + case 10: /* add $con,[r1],r2 ==> mov $con, t; add t,[r1],r2 */ + v := c.regoff(&p.From) + a := AOR + if v < 0 { + a = AADDU + } + o1 = OP_IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 11: /* jmp lbra */ + v := int32(0) + if c.aclass(&p.To) == C_SBRA && p.To.Sym == nil && p.As == AJMP { + // use PC-relative branch for short branches + // BEQ R0, R0, sbra + if p.To.Target() == nil { + v = int32(-4) >> 2 + } else { + v = int32(p.To.Target().Pc-p.Pc-4) >> 2 + } + if (v<<16)>>16 == v { + o1 = OP_IRR(c.opirr(ABEQ), uint32(v), uint32(REGZERO), uint32(REGZERO)) + break + } + } + if p.To.Target() == nil { + v = int32(p.Pc) >> 2 + } else { + v = int32(p.To.Target().Pc) >> 2 + } + o1 = OP_JMP(c.opirr(p.As), uint32(v)) + if p.To.Sym == nil { + p.To.Sym = c.cursym.Func().Text.From.Sym + p.To.Offset = p.To.Target().Pc + } + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + if p.As == AJAL { + rel.Type = objabi.R_CALLMIPS + } else { + rel.Type = objabi.R_JMPMIPS + } + + case 12: /* movbs r,r */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + v := 16 + if p.As == AMOVB { + v = 24 + } + o1 = OP_SRR(c.opirr(ASLL), uint32(v), uint32(p.From.Reg), uint32(p.To.Reg)) + o2 = OP_SRR(c.opirr(ASRA), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) + + case 13: /* movbu r,r */ + if p.As == AMOVBU { + o1 = OP_IRR(c.opirr(AAND), uint32(0xff), uint32(p.From.Reg), uint32(p.To.Reg)) + } else { + o1 = OP_IRR(c.opirr(AAND), uint32(0xffff), uint32(p.From.Reg), uint32(p.To.Reg)) + } + + case 14: /* movwu r,r */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = OP_SRR(c.opirr(-ASLLV), uint32(0), uint32(p.From.Reg), uint32(p.To.Reg)) + o2 = OP_SRR(c.opirr(-ASRLV), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg)) + + case 15: /* teq $c r,r */ + v := c.regoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = REGZERO + } + /* only use 10 bits of trap code */ + o1 = OP_IRR(c.opirr(p.As), (uint32(v)&0x3FF)<<6, uint32(r), uint32(p.To.Reg)) + + case 16: /* sll $c,[r1],r2 */ + v := c.regoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + + /* OP_SRR will use only the low 5 bits of the shift value */ + if v >= 32 && vshift(p.As) { + o1 = OP_SRR(c.opirr(-p.As), uint32(v-32), uint32(r), uint32(p.To.Reg)) + } else { + o1 = OP_SRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + } + + case 17: + o1 = OP_RRR(c.oprrr(p.As), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg)) + + case 18: /* jmp [r1],0(r2) */ + r := int(p.Reg) + if r == 0 { + r = int(o.param) + } + o1 = OP_RRR(c.oprrr(p.As), uint32(0), uint32(p.To.Reg), uint32(r)) + if p.As == obj.ACALL { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 0 + rel.Type = objabi.R_CALLIND + } + + case 19: /* mov $lcon,r ==> lu+or */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + v := c.regoff(&p.From) + o1 = OP_IRR(c.opirr(ALUI), uint32(v>>16), uint32(REGZERO), uint32(p.To.Reg)) + o2 = OP_IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) + + case 20: /* mov lo/hi,r */ + a := OP(2, 0) /* mfhi */ + if p.From.Reg == REG_LO { + a = OP(2, 2) /* mflo */ + } + o1 = OP_RRR(a, uint32(REGZERO), uint32(REGZERO), uint32(p.To.Reg)) + + case 21: /* mov r,lo/hi */ + a := OP(2, 1) /* mthi */ + if p.To.Reg == REG_LO { + a = OP(2, 3) /* mtlo */ + } + o1 = OP_RRR(a, uint32(REGZERO), uint32(p.From.Reg), uint32(REGZERO)) + + case 22: /* mul r1,r2 [r3]*/ + if p.To.Reg != 0 { + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + a := SP(3, 4) | 2 /* mul */ + o1 = OP_RRR(a, uint32(p.From.Reg), uint32(r), uint32(p.To.Reg)) + } else { + o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.Reg), uint32(REGZERO)) + } + + case 23: /* add $lcon,r1,r2 ==> lu+or+add */ + v := c.regoff(&p.From) + o1 = OP_IRR(c.opirr(ALUI), uint32(v>>16), uint32(REGZERO), uint32(REGTMP)) + o2 = OP_IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o3 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 24: /* mov $ucon,r ==> lu r */ + v := c.regoff(&p.From) + o1 = OP_IRR(c.opirr(ALUI), uint32(v>>16), uint32(REGZERO), uint32(p.To.Reg)) + + case 25: /* add/and $ucon,[r1],r2 ==> lu $con,t; add t,[r1],r2 */ + v := c.regoff(&p.From) + o1 = OP_IRR(c.opirr(ALUI), uint32(v>>16), uint32(REGZERO), uint32(REGTMP)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 26: /* mov $lsext/auto/oreg,r ==> lu+or+add */ + v := c.regoff(&p.From) + o1 = OP_IRR(c.opirr(ALUI), uint32(v>>16), uint32(REGZERO), uint32(REGTMP)) + o2 = OP_IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o3 = OP_RRR(c.oprrr(add), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 27: /* mov [sl]ext/auto/oreg,fr ==> lwc1 o(r) */ + v := c.regoff(&p.From) + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + a := -AMOVF + if p.As == AMOVD { + a = -AMOVD + } + switch o.size { + case 12: + o1 = OP_IRR(c.opirr(ALUI), uint32((v+1<<15)>>16), uint32(REGZERO), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_IRR(c.opirr(a), uint32(v), uint32(REGTMP), uint32(p.To.Reg)) + + case 4: + o1 = OP_IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.To.Reg)) + } + + case 28: /* mov fr,[sl]ext/auto/oreg ==> swc1 o(r) */ + v := c.regoff(&p.To) + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + a := AMOVF + if p.As == AMOVD { + a = AMOVD + } + switch o.size { + case 12: + o1 = OP_IRR(c.opirr(ALUI), uint32((v+1<<15)>>16), uint32(REGZERO), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_IRR(c.opirr(a), uint32(v), uint32(REGTMP), uint32(p.From.Reg)) + + case 4: + o1 = OP_IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.From.Reg)) + } + + case 30: /* movw r,fr */ + a := SP(2, 1) | (4 << 21) /* mtc1 */ + o1 = OP_RRR(a, uint32(p.From.Reg), uint32(0), uint32(p.To.Reg)) + + case 31: /* movw fr,r */ + a := SP(2, 1) | (0 << 21) /* mtc1 */ + o1 = OP_RRR(a, uint32(p.To.Reg), uint32(0), uint32(p.From.Reg)) + + case 32: /* fadd fr1,[fr2],fr3 */ + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 = OP_FRRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg)) + + case 33: /* fabs fr1, fr3 */ + o1 = OP_FRRR(c.oprrr(p.As), uint32(0), uint32(p.From.Reg), uint32(p.To.Reg)) + + case 34: /* mov $con,fr ==> or/add $i,t; mov t,fr */ + v := c.regoff(&p.From) + a := AADDU + if o.a1 == C_ANDCON { + a = AOR + } + o1 = OP_IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP)) + o2 = OP_RRR(SP(2, 1)|(4<<21), uint32(REGTMP), uint32(0), uint32(p.To.Reg)) /* mtc1 */ + + case 35: /* mov r,lext/auto/oreg ==> sw o(REGTMP) */ + v := c.regoff(&p.To) + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o1 = OP_IRR(c.opirr(ALUI), uint32((v+1<<15)>>16), uint32(REGZERO), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_IRR(c.opirr(p.As), uint32(v), uint32(REGTMP), uint32(p.From.Reg)) + + case 36: /* mov lext/auto/oreg,r ==> lw o(REGTMP) */ + v := c.regoff(&p.From) + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 = OP_IRR(c.opirr(ALUI), uint32((v+1<<15)>>16), uint32(REGZERO), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg)) + + case 37: /* movw r,mr */ + a := SP(2, 0) | (4 << 21) /* mtc0 */ + if p.As == AMOVV { + a = SP(2, 0) | (5 << 21) /* dmtc0 */ + } + o1 = OP_RRR(a, uint32(p.From.Reg), uint32(0), uint32(p.To.Reg)) + + case 38: /* movw mr,r */ + a := SP(2, 0) | (0 << 21) /* mfc0 */ + if p.As == AMOVV { + a = SP(2, 0) | (1 << 21) /* dmfc0 */ + } + o1 = OP_RRR(a, uint32(p.To.Reg), uint32(0), uint32(p.From.Reg)) + + case 40: /* word */ + o1 = uint32(c.regoff(&p.From)) + + case 41: /* movw f,fcr */ + o1 = OP_RRR(SP(2, 1)|(6<<21), uint32(p.From.Reg), uint32(0), uint32(p.To.Reg)) /* mtcc1 */ + + case 42: /* movw fcr,r */ + o1 = OP_RRR(SP(2, 1)|(2<<21), uint32(p.To.Reg), uint32(0), uint32(p.From.Reg)) /* mfcc1 */ + + case 47: /* movv r,fr */ + a := SP(2, 1) | (5 << 21) /* dmtc1 */ + o1 = OP_RRR(a, uint32(p.From.Reg), uint32(0), uint32(p.To.Reg)) + + case 48: /* movv fr,r */ + a := SP(2, 1) | (1 << 21) /* dmtc1 */ + o1 = OP_RRR(a, uint32(p.To.Reg), uint32(0), uint32(p.From.Reg)) + + case 49: /* undef */ + o1 = 52 /* trap -- teq r0, r0 */ + + /* relocation operations */ + case 50: /* mov r,addr ==> lu + add REGSB, REGTMP + sw o(REGTMP) */ + o1 = OP_IRR(c.opirr(ALUI), uint32(0), uint32(REGZERO), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDRMIPSU + o2 = OP_IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.To.Sym + rel2.Add = p.To.Offset + rel2.Type = objabi.R_ADDRMIPS + + if o.size == 12 { + o3 = o2 + o2 = OP_RRR(c.oprrr(AADDVU), uint32(REGSB), uint32(REGTMP), uint32(REGTMP)) + rel2.Off += 4 + } + + case 51: /* mov addr,r ==> lu + add REGSB, REGTMP + lw o(REGTMP) */ + o1 = OP_IRR(c.opirr(ALUI), uint32(0), uint32(REGZERO), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRMIPSU + o2 = OP_IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.From.Sym + rel2.Add = p.From.Offset + rel2.Type = objabi.R_ADDRMIPS + + if o.size == 12 { + o3 = o2 + o2 = OP_RRR(c.oprrr(AADDVU), uint32(REGSB), uint32(REGTMP), uint32(REGTMP)) + rel2.Off += 4 + } + + case 52: /* mov $lext, r ==> lu + add REGSB, r + add */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = OP_IRR(c.opirr(ALUI), uint32(0), uint32(REGZERO), uint32(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRMIPSU + o2 = OP_IRR(c.opirr(add), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg)) + rel2 := obj.Addrel(c.cursym) + rel2.Off = int32(c.pc + 4) + rel2.Siz = 4 + rel2.Sym = p.From.Sym + rel2.Add = p.From.Offset + rel2.Type = objabi.R_ADDRMIPS + + if o.size == 12 { + o3 = o2 + o2 = OP_RRR(c.oprrr(AADDVU), uint32(REGSB), uint32(p.To.Reg), uint32(p.To.Reg)) + rel2.Off += 4 + } + + case 53: /* mov r, tlsvar ==> rdhwr + sw o(r3) */ + // clobbers R3 ! + // load thread pointer with RDHWR, R3 is used for fast kernel emulation on Linux + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = (037<<26 + 073) | (29 << 11) | (3 << 16) // rdhwr $29, r3 + o2 = OP_IRR(c.opirr(p.As), uint32(0), uint32(REG_R3), uint32(p.From.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc + 4) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDRMIPSTLS + + case 54: /* mov tlsvar, r ==> rdhwr + lw o(r3) */ + // clobbers R3 ! + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = (037<<26 + 073) | (29 << 11) | (3 << 16) // rdhwr $29, r3 + o2 = OP_IRR(c.opirr(-p.As), uint32(0), uint32(REG_R3), uint32(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc + 4) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRMIPSTLS + + case 55: /* mov $tlsvar, r ==> rdhwr + add */ + // clobbers R3 ! + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = (037<<26 + 073) | (29 << 11) | (3 << 16) // rdhwr $29, r3 + o2 = OP_IRR(c.opirr(add), uint32(0), uint32(REG_R3), uint32(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc + 4) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRMIPSTLS + + case 56: /* vmov{b,h,w,d} $scon, wr */ + + v := c.regoff(&p.From) + o1 = OP_VI10(110, c.twobitdf(p.As), v, uint32(p.To.Reg), 7) + + case 57: /* vld $soreg, wr */ + v := c.lsoffset(p.As, c.regoff(&p.From)) + o1 = OP_VMI10(v, uint32(p.From.Reg), uint32(p.To.Reg), 8, c.twobitdf(p.As)) + + case 58: /* vst wr, $soreg */ + v := c.lsoffset(p.As, c.regoff(&p.To)) + o1 = OP_VMI10(v, uint32(p.To.Reg), uint32(p.From.Reg), 9, c.twobitdf(p.As)) + } + + out[0] = o1 + out[1] = o2 + out[2] = o3 + out[3] = o4 +} + +func (c *ctxt0) vregoff(a *obj.Addr) int64 { + c.instoffset = 0 + c.aclass(a) + return c.instoffset +} + +func (c *ctxt0) regoff(a *obj.Addr) int32 { + return int32(c.vregoff(a)) +} + +func (c *ctxt0) oprrr(a obj.As) uint32 { + switch a { + case AADD: + return OP(4, 0) + case AADDU: + return OP(4, 1) + case ASGT: + return OP(5, 2) + case ASGTU: + return OP(5, 3) + case AAND: + return OP(4, 4) + case AOR: + return OP(4, 5) + case AXOR: + return OP(4, 6) + case ASUB: + return OP(4, 2) + case ASUBU, ANEGW: + return OP(4, 3) + case ANOR: + return OP(4, 7) + case ASLL: + return OP(0, 4) + case ASRL: + return OP(0, 6) + case ASRA: + return OP(0, 7) + case AROTR: + return OP(8, 6) + case ASLLV: + return OP(2, 4) + case ASRLV: + return OP(2, 6) + case ASRAV: + return OP(2, 7) + case AROTRV: + return OP(10, 6) + case AADDV: + return OP(5, 4) + case AADDVU: + return OP(5, 5) + case ASUBV: + return OP(5, 6) + case ASUBVU, ANEGV: + return OP(5, 7) + case AREM, + ADIV: + return OP(3, 2) + case AREMU, + ADIVU: + return OP(3, 3) + case AMUL: + return OP(3, 0) + case AMULU: + return OP(3, 1) + case AREMV, + ADIVV: + return OP(3, 6) + case AREMVU, + ADIVVU: + return OP(3, 7) + case AMULV: + return OP(3, 4) + case AMULVU: + return OP(3, 5) + + case AJMP: + return OP(1, 0) + case AJAL: + return OP(1, 1) + + case ABREAK: + return OP(1, 5) + case ASYSCALL: + return OP(1, 4) + case ATLBP: + return MMU(1, 0) + case ATLBR: + return MMU(0, 1) + case ATLBWI: + return MMU(0, 2) + case ATLBWR: + return MMU(0, 6) + case ARFE: + return MMU(2, 0) + + case ADIVF: + return FPF(0, 3) + case ADIVD: + return FPD(0, 3) + case AMULF: + return FPF(0, 2) + case AMULD: + return FPD(0, 2) + case ASUBF: + return FPF(0, 1) + case ASUBD: + return FPD(0, 1) + case AADDF: + return FPF(0, 0) + case AADDD: + return FPD(0, 0) + case ATRUNCFV: + return FPF(1, 1) + case ATRUNCDV: + return FPD(1, 1) + case ATRUNCFW: + return FPF(1, 5) + case ATRUNCDW: + return FPD(1, 5) + case AMOVFV: + return FPF(4, 5) + case AMOVDV: + return FPD(4, 5) + case AMOVVF: + return FPV(4, 0) + case AMOVVD: + return FPV(4, 1) + case AMOVFW: + return FPF(4, 4) + case AMOVDW: + return FPD(4, 4) + case AMOVWF: + return FPW(4, 0) + case AMOVDF: + return FPD(4, 0) + case AMOVWD: + return FPW(4, 1) + case AMOVFD: + return FPF(4, 1) + case AABSF: + return FPF(0, 5) + case AABSD: + return FPD(0, 5) + case AMOVF: + return FPF(0, 6) + case AMOVD: + return FPD(0, 6) + case ANEGF: + return FPF(0, 7) + case ANEGD: + return FPD(0, 7) + case ACMPEQF: + return FPF(6, 2) + case ACMPEQD: + return FPD(6, 2) + case ACMPGTF: + return FPF(7, 4) + case ACMPGTD: + return FPD(7, 4) + case ACMPGEF: + return FPF(7, 6) + case ACMPGED: + return FPD(7, 6) + + case ASQRTF: + return FPF(0, 4) + case ASQRTD: + return FPD(0, 4) + + case ASYNC: + return OP(1, 7) + case ANOOP: + return 0 + + case ACMOVN: + return OP(1, 3) + case ACMOVZ: + return OP(1, 2) + case ACMOVT: + return OP(0, 1) | (1 << 16) + case ACMOVF: + return OP(0, 1) | (0 << 16) + case ACLO: + return SP(3, 4) | OP(4, 1) + case ACLZ: + return SP(3, 4) | OP(4, 0) + case AMADD: + return SP(3, 4) | OP(0, 0) + case AMSUB: + return SP(3, 4) | OP(0, 4) + } + + if a < 0 { + c.ctxt.Diag("bad rrr opcode -%v", -a) + } else { + c.ctxt.Diag("bad rrr opcode %v", a) + } + return 0 +} + +func (c *ctxt0) opirr(a obj.As) uint32 { + switch a { + case AADD: + return SP(1, 0) + case AADDU: + return SP(1, 1) + case ASGT: + return SP(1, 2) + case ASGTU: + return SP(1, 3) + case AAND: + return SP(1, 4) + case AOR: + return SP(1, 5) + case AXOR: + return SP(1, 6) + case ALUI: + return SP(1, 7) + case ASLL: + return OP(0, 0) + case ASRL: + return OP(0, 2) + case ASRA: + return OP(0, 3) + case AROTR: + return OP(0, 2) | 1<<21 + case AADDV: + return SP(3, 0) + case AADDVU: + return SP(3, 1) + + case AJMP: + return SP(0, 2) + case AJAL, + obj.ADUFFZERO, + obj.ADUFFCOPY: + return SP(0, 3) + case ABEQ: + return SP(0, 4) + case -ABEQ: + return SP(2, 4) /* likely */ + case ABNE: + return SP(0, 5) + case -ABNE: + return SP(2, 5) /* likely */ + case ABGEZ: + return SP(0, 1) | BCOND(0, 1) + case -ABGEZ: + return SP(0, 1) | BCOND(0, 3) /* likely */ + case ABGEZAL: + return SP(0, 1) | BCOND(2, 1) + case -ABGEZAL: + return SP(0, 1) | BCOND(2, 3) /* likely */ + case ABGTZ: + return SP(0, 7) + case -ABGTZ: + return SP(2, 7) /* likely */ + case ABLEZ: + return SP(0, 6) + case -ABLEZ: + return SP(2, 6) /* likely */ + case ABLTZ: + return SP(0, 1) | BCOND(0, 0) + case -ABLTZ: + return SP(0, 1) | BCOND(0, 2) /* likely */ + case ABLTZAL: + return SP(0, 1) | BCOND(2, 0) + case -ABLTZAL: + return SP(0, 1) | BCOND(2, 2) /* likely */ + case ABFPT: + return SP(2, 1) | (257 << 16) + case -ABFPT: + return SP(2, 1) | (259 << 16) /* likely */ + case ABFPF: + return SP(2, 1) | (256 << 16) + case -ABFPF: + return SP(2, 1) | (258 << 16) /* likely */ + + case AMOVB, + AMOVBU: + return SP(5, 0) + case AMOVH, + AMOVHU: + return SP(5, 1) + case AMOVW, + AMOVWU: + return SP(5, 3) + case AMOVV: + return SP(7, 7) + case AMOVF: + return SP(7, 1) + case AMOVD: + return SP(7, 5) + case AMOVWL: + return SP(5, 2) + case AMOVWR: + return SP(5, 6) + case AMOVVL: + return SP(5, 4) + case AMOVVR: + return SP(5, 5) + + case ABREAK: + return SP(5, 7) + + case -AMOVWL: + return SP(4, 2) + case -AMOVWR: + return SP(4, 6) + case -AMOVVL: + return SP(3, 2) + case -AMOVVR: + return SP(3, 3) + case -AMOVB: + return SP(4, 0) + case -AMOVBU: + return SP(4, 4) + case -AMOVH: + return SP(4, 1) + case -AMOVHU: + return SP(4, 5) + case -AMOVW: + return SP(4, 3) + case -AMOVWU: + return SP(4, 7) + case -AMOVV: + return SP(6, 7) + case -AMOVF: + return SP(6, 1) + case -AMOVD: + return SP(6, 5) + + case ASLLV: + return OP(7, 0) + case ASRLV: + return OP(7, 2) + case ASRAV: + return OP(7, 3) + case AROTRV: + return OP(7, 2) | 1<<21 + case -ASLLV: + return OP(7, 4) + case -ASRLV: + return OP(7, 6) + case -ASRAV: + return OP(7, 7) + case -AROTRV: + return OP(7, 6) | 1<<21 + + case ATEQ: + return OP(6, 4) + case ATNE: + return OP(6, 6) + case -ALL: + return SP(6, 0) + case -ALLV: + return SP(6, 4) + case ASC: + return SP(7, 0) + case ASCV: + return SP(7, 4) + } + + if a < 0 { + c.ctxt.Diag("bad irr opcode -%v", -a) + } else { + c.ctxt.Diag("bad irr opcode %v", a) + } + return 0 +} + +func vshift(a obj.As) bool { + switch a { + case ASLLV, + ASRLV, + ASRAV, + AROTRV: + return true + } + return false +} + +// MSA Two-bit Data Format Field Encoding +func (c *ctxt0) twobitdf(a obj.As) uint32 { + switch a { + case AVMOVB: + return 0 + case AVMOVH: + return 1 + case AVMOVW: + return 2 + case AVMOVD: + return 3 + default: + c.ctxt.Diag("unsupported data format %v", a) + } + return 0 +} + +// MSA Load/Store offset have to be multiple of size of data format +func (c *ctxt0) lsoffset(a obj.As, o int32) int32 { + var mod int32 + switch a { + case AVMOVB: + mod = 1 + case AVMOVH: + mod = 2 + case AVMOVW: + mod = 4 + case AVMOVD: + mod = 8 + default: + c.ctxt.Diag("unsupported instruction:%v", a) + } + + if o%mod != 0 { + c.ctxt.Diag("invalid offset for %v: %d is not a multiple of %d", a, o, mod) + } + + return o / mod +} diff --git a/src/cmd/internal/obj/mips/list0.go b/src/cmd/internal/obj/mips/list0.go new file mode 100644 index 0000000..f734e21 --- /dev/null +++ b/src/cmd/internal/obj/mips/list0.go @@ -0,0 +1,83 @@ +// cmd/9l/list.c from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package mips + +import ( + "cmd/internal/obj" + "fmt" +) + +func init() { + obj.RegisterRegister(obj.RBaseMIPS, REG_LAST+1, rconv) + obj.RegisterOpcode(obj.ABaseMIPS, Anames) +} + +func rconv(r int) string { + if r == 0 { + return "NONE" + } + if r == REGG { + // Special case. + return "g" + } + if REG_R0 <= r && r <= REG_R31 { + return fmt.Sprintf("R%d", r-REG_R0) + } + if REG_F0 <= r && r <= REG_F31 { + return fmt.Sprintf("F%d", r-REG_F0) + } + if REG_M0 <= r && r <= REG_M31 { + return fmt.Sprintf("M%d", r-REG_M0) + } + if REG_FCR0 <= r && r <= REG_FCR31 { + return fmt.Sprintf("FCR%d", r-REG_FCR0) + } + if REG_W0 <= r && r <= REG_W31 { + return fmt.Sprintf("W%d", r-REG_W0) + } + if r == REG_HI { + return "HI" + } + if r == REG_LO { + return "LO" + } + + return fmt.Sprintf("Rgok(%d)", r-obj.RBaseMIPS) +} + +func DRconv(a int) string { + s := "C_??" + if a >= C_NONE && a <= C_NCLASS { + s = cnames0[a] + } + var fp string + fp += s + return fp +} diff --git a/src/cmd/internal/obj/mips/obj0.go b/src/cmd/internal/obj/mips/obj0.go new file mode 100644 index 0000000..9241dfd --- /dev/null +++ b/src/cmd/internal/obj/mips/obj0.go @@ -0,0 +1,1536 @@ +// cmd/9l/noop.c, cmd/9l/pass.c, cmd/9l/span.c from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package mips + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "encoding/binary" + "fmt" + "log" + "math" +) + +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + c := ctxt0{ctxt: ctxt, newprog: newprog} + + p.From.Class = 0 + p.To.Class = 0 + + // Rewrite JMP/JAL to symbol as TYPE_BRANCH. + switch p.As { + case AJMP, + AJAL, + ARET, + obj.ADUFFZERO, + obj.ADUFFCOPY: + if p.To.Sym != nil { + p.To.Type = obj.TYPE_BRANCH + } + } + + // Rewrite float constants to values stored in memory. + switch p.As { + case AMOVF: + if p.From.Type == obj.TYPE_FCONST { + f32 := float32(p.From.Val.(float64)) + if math.Float32bits(f32) == 0 { + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REGZERO + break + } + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float32Sym(f32) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + case AMOVD: + if p.From.Type == obj.TYPE_FCONST { + f64 := p.From.Val.(float64) + if math.Float64bits(f64) == 0 && c.ctxt.Arch.Family == sys.MIPS64 { + p.As = AMOVV + p.From.Type = obj.TYPE_REG + p.From.Reg = REGZERO + break + } + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float64Sym(f64) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + // Put >32-bit constants in memory and load them + case AMOVV: + if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == 0 && int64(int32(p.From.Offset)) != p.From.Offset { + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Int64Sym(p.From.Offset) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + } + + // Rewrite SUB constants into ADD. + switch p.As { + case ASUB: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADD + } + + case ASUBU: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADDU + } + + case ASUBV: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADDV + } + + case ASUBVU: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADDVU + } + } +} + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + // TODO(minux): add morestack short-cuts with small fixed frame-size. + c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym} + + // a switch for enabling/disabling instruction scheduling + nosched := true + + if c.cursym.Func().Text == nil || c.cursym.Func().Text.Link == nil { + return + } + + p := c.cursym.Func().Text + textstksiz := p.To.Offset + if textstksiz == -ctxt.Arch.FixedFrameSize { + // Historical way to mark NOFRAME. + p.From.Sym.Set(obj.AttrNoFrame, true) + textstksiz = 0 + } + if textstksiz < 0 { + c.ctxt.Diag("negative frame size %d - did you mean NOFRAME?", textstksiz) + } + if p.From.Sym.NoFrame() { + if textstksiz != 0 { + c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) + } + } + + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) + + /* + * find leaf subroutines + * expand RET + * expand BECOME pseudo + */ + + for p := c.cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + /* too hard, just leave alone */ + case obj.ATEXT: + p.Mark |= LABEL | LEAF | SYNC + if p.Link != nil { + p.Link.Mark |= LABEL + } + + /* too hard, just leave alone */ + case AMOVW, + AMOVV: + if p.To.Type == obj.TYPE_REG && p.To.Reg >= REG_SPECIAL { + p.Mark |= LABEL | SYNC + break + } + if p.From.Type == obj.TYPE_REG && p.From.Reg >= REG_SPECIAL { + p.Mark |= LABEL | SYNC + } + + /* too hard, just leave alone */ + case ASYSCALL, + AWORD, + ATLBWR, + ATLBWI, + ATLBP, + ATLBR: + p.Mark |= LABEL | SYNC + + case ANOR: + if p.To.Type == obj.TYPE_REG { + if p.To.Reg == REGZERO { + p.Mark |= LABEL | SYNC + } + } + + case ABGEZAL, + ABLTZAL, + AJAL, + obj.ADUFFZERO, + obj.ADUFFCOPY: + c.cursym.Func().Text.Mark &^= LEAF + fallthrough + + case AJMP, + ABEQ, + ABGEZ, + ABGTZ, + ABLEZ, + ABLTZ, + ABNE, + ABFPT, ABFPF: + if p.As == ABFPT || p.As == ABFPF { + // We don't treat ABFPT and ABFPF as branches here, + // so that we will always fill nop (0x0) in their + // delay slot during assembly. + // This is to workaround a kernel FPU emulator bug + // where it uses the user stack to simulate the + // instruction in the delay slot if it's not 0x0, + // and somehow that leads to SIGSEGV when the kernel + // jump to the stack. + p.Mark |= SYNC + } else { + p.Mark |= BRANCH + } + q1 := p.To.Target() + if q1 != nil { + for q1.As == obj.ANOP { + q1 = q1.Link + p.To.SetTarget(q1) + } + + if q1.Mark&LEAF == 0 { + q1.Mark |= LABEL + } + } + //else { + // p.Mark |= LABEL + //} + q1 = p.Link + if q1 != nil { + q1.Mark |= LABEL + } + + case ARET: + if p.Link != nil { + p.Link.Mark |= LABEL + } + } + } + + var mov, add obj.As + if c.ctxt.Arch.Family == sys.MIPS64 { + add = AADDV + mov = AMOVV + } else { + add = AADDU + mov = AMOVW + } + + var q *obj.Prog + var q1 *obj.Prog + autosize := int32(0) + var p1 *obj.Prog + var p2 *obj.Prog + for p := c.cursym.Func().Text; p != nil; p = p.Link { + o := p.As + switch o { + case obj.ATEXT: + autosize = int32(textstksiz) + + if p.Mark&LEAF != 0 && autosize == 0 { + // A leaf function with no locals has no frame. + p.From.Sym.Set(obj.AttrNoFrame, true) + } + + if !p.From.Sym.NoFrame() { + // If there is a stack frame at all, it includes + // space to save the LR. + autosize += int32(c.ctxt.Arch.FixedFrameSize) + } + + if autosize&4 != 0 && c.ctxt.Arch.Family == sys.MIPS64 { + autosize += 4 + } + + if autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.NoSplit() { + if ctxt.Debugvlog { + ctxt.Logf("save suppressed in: %s\n", c.cursym.Name) + } + + c.cursym.Func().Text.Mark |= LEAF + } + } + + p.To.Offset = int64(autosize) - ctxt.Arch.FixedFrameSize + + if c.cursym.Func().Text.Mark&LEAF != 0 { + c.cursym.Set(obj.AttrLeaf, true) + if p.From.Sym.NoFrame() { + break + } + } + + if !p.From.Sym.NoSplit() { + p = c.stacksplit(p, autosize) // emit split check + } + + q = p + + if autosize != 0 { + // Make sure to save link register for non-empty frame, even if + // it is a leaf function, so that traceback works. + // Store link register before decrement SP, so if a signal comes + // during the execution of the function prologue, the traceback + // code will not see a half-updated stack frame. + // This sequence is not async preemptible, as if we open a frame + // at the current SP, it will clobber the saved LR. + q = c.ctxt.StartUnsafePoint(q, c.newprog) + + q = obj.Appendp(q, newprog) + q.As = mov + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REGLINK + q.To.Type = obj.TYPE_MEM + q.To.Offset = int64(-autosize) + q.To.Reg = REGSP + + q = obj.Appendp(q, newprog) + q.As = add + q.Pos = p.Pos + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(-autosize) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = +autosize + + q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) + + // On Linux, in a cgo binary we may get a SIGSETXID signal early on + // before the signal stack is set, as glibc doesn't allow us to block + // SIGSETXID. So a signal may land on the current stack and clobber + // the content below the SP. We store the LR again after the SP is + // decremented. + q = obj.Appendp(q, newprog) + q.As = mov + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REGLINK + q.To.Type = obj.TYPE_MEM + q.To.Offset = 0 + q.To.Reg = REGSP + } + + if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOV g_panic(g), R1 + // BEQ R1, end + // MOV panic_argp(R1), R2 + // ADD $(autosize+FIXED_FRAME), R29, R3 + // BNE R2, R3, end + // ADD $FIXED_FRAME, R29, R2 + // MOV R2, panic_argp(R1) + // end: + // NOP + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not an mips NOP: it encodes to 0 instruction bytes. + // + // We don't generate this for leafs because that means the wrapped + // function was inlined into the wrapper. + + q = obj.Appendp(q, newprog) + + q.As = mov + q.From.Type = obj.TYPE_MEM + q.From.Reg = REGG + q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R1 + + q = obj.Appendp(q, newprog) + q.As = ABEQ + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R1 + q.To.Type = obj.TYPE_BRANCH + q.Mark |= BRANCH + p1 = q + + q = obj.Appendp(q, newprog) + q.As = mov + q.From.Type = obj.TYPE_MEM + q.From.Reg = REG_R1 + q.From.Offset = 0 // Panic.argp + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R2 + + q = obj.Appendp(q, newprog) + q.As = add + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(autosize) + ctxt.Arch.FixedFrameSize + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R3 + + q = obj.Appendp(q, newprog) + q.As = ABNE + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R2 + q.Reg = REG_R3 + q.To.Type = obj.TYPE_BRANCH + q.Mark |= BRANCH + p2 = q + + q = obj.Appendp(q, newprog) + q.As = add + q.From.Type = obj.TYPE_CONST + q.From.Offset = ctxt.Arch.FixedFrameSize + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R2 + + q = obj.Appendp(q, newprog) + q.As = mov + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R2 + q.To.Type = obj.TYPE_MEM + q.To.Reg = REG_R1 + q.To.Offset = 0 // Panic.argp + + q = obj.Appendp(q, newprog) + + q.As = obj.ANOP + p1.To.SetTarget(q) + p2.To.SetTarget(q) + } + + case ARET: + if p.From.Type == obj.TYPE_CONST { + ctxt.Diag("using BECOME (%v) is not supported!", p) + break + } + + retSym := p.To.Sym + p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction + p.To.Sym = nil + + if c.cursym.Func().Text.Mark&LEAF != 0 { + if autosize == 0 { + p.As = AJMP + p.From = obj.Addr{} + if retSym != nil { // retjmp + p.To.Type = obj.TYPE_BRANCH + p.To.Name = obj.NAME_EXTERN + p.To.Sym = retSym + } else { + p.To.Type = obj.TYPE_MEM + p.To.Reg = REGLINK + p.To.Offset = 0 + } + p.Mark |= BRANCH + break + } + + p.As = add + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(autosize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = -autosize + + q = c.newprog() + q.As = AJMP + q.Pos = p.Pos + if retSym != nil { // retjmp + q.To.Type = obj.TYPE_BRANCH + q.To.Name = obj.NAME_EXTERN + q.To.Sym = retSym + } else { + q.To.Type = obj.TYPE_MEM + q.To.Reg = REGLINK + q.To.Offset = 0 + } + q.Mark |= BRANCH + q.Spadj = +autosize + + q.Link = p.Link + p.Link = q + break + } + + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Offset = 0 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + + if autosize != 0 { + q = c.newprog() + q.As = add + q.Pos = p.Pos + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(autosize) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -autosize + + q.Link = p.Link + p.Link = q + } + + q1 = c.newprog() + q1.As = AJMP + q1.Pos = p.Pos + if retSym != nil { // retjmp + q1.To.Type = obj.TYPE_BRANCH + q1.To.Name = obj.NAME_EXTERN + q1.To.Sym = retSym + } else { + q1.To.Type = obj.TYPE_MEM + q1.To.Offset = 0 + q1.To.Reg = REGLINK + } + q1.Mark |= BRANCH + q1.Spadj = +autosize + + q1.Link = q.Link + q.Link = q1 + + case AADD, + AADDU, + AADDV, + AADDVU: + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { + p.Spadj = int32(-p.From.Offset) + } + + case obj.AGETCALLERPC: + if cursym.Leaf() { + /* MOV LR, Rd */ + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + } else { + /* MOV (RSP), Rd */ + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + } + } + + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { + f := c.cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + } + + if c.ctxt.Arch.Family == sys.MIPS { + // rewrite MOVD into two MOVF in 32-bit mode to avoid unaligned memory access + for p = c.cursym.Func().Text; p != nil; p = p1 { + p1 = p.Link + + if p.As != AMOVD { + continue + } + if p.From.Type != obj.TYPE_MEM && p.To.Type != obj.TYPE_MEM { + continue + } + + p.As = AMOVF + q = c.newprog() + *q = *p + q.Link = p.Link + p.Link = q + p1 = q.Link + + var addrOff int64 + if c.ctxt.Arch.ByteOrder == binary.BigEndian { + addrOff = 4 // swap load/save order + } + if p.From.Type == obj.TYPE_MEM { + reg := REG_F0 + (p.To.Reg-REG_F0)&^1 + p.To.Reg = reg + q.To.Reg = reg + 1 + p.From.Offset += addrOff + q.From.Offset += 4 - addrOff + } else if p.To.Type == obj.TYPE_MEM { + reg := REG_F0 + (p.From.Reg-REG_F0)&^1 + p.From.Reg = reg + q.From.Reg = reg + 1 + p.To.Offset += addrOff + q.To.Offset += 4 - addrOff + } + } + } + + if nosched { + // if we don't do instruction scheduling, simply add + // NOP after each branch instruction. + for p = c.cursym.Func().Text; p != nil; p = p.Link { + if p.Mark&BRANCH != 0 { + c.addnop(p) + } + } + return + } + + // instruction scheduling + q = nil // p - 1 + q1 = c.cursym.Func().Text // top of block + o := 0 // count of instructions + for p = c.cursym.Func().Text; p != nil; p = p1 { + p1 = p.Link + o++ + if p.Mark&NOSCHED != 0 { + if q1 != p { + c.sched(q1, q) + } + for ; p != nil; p = p.Link { + if p.Mark&NOSCHED == 0 { + break + } + q = p + } + p1 = p + q1 = p + o = 0 + continue + } + if p.Mark&(LABEL|SYNC) != 0 { + if q1 != p { + c.sched(q1, q) + } + q1 = p + o = 1 + } + if p.Mark&(BRANCH|SYNC) != 0 { + c.sched(q1, p) + q1 = p1 + o = 0 + } + if o >= NSCHED { + c.sched(q1, p) + q1 = p1 + o = 0 + } + q = p + } +} + +func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { + var mov, add obj.As + + if c.ctxt.Arch.Family == sys.MIPS64 { + add = AADDV + mov = AMOVV + } else { + add = AADDU + mov = AMOVW + } + + if c.ctxt.Flag_maymorestack != "" { + // Save LR and REGCTXT. + frameSize := 2 * c.ctxt.Arch.PtrSize + + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + // MOV REGLINK, -8/-16(SP) + p = obj.Appendp(p, c.newprog) + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_MEM + p.To.Offset = int64(-frameSize) + p.To.Reg = REGSP + + // MOV REGCTXT, -4/-8(SP) + p = obj.Appendp(p, c.newprog) + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REGCTXT + p.To.Type = obj.TYPE_MEM + p.To.Offset = -int64(c.ctxt.Arch.PtrSize) + p.To.Reg = REGSP + + // ADD $-8/$-16, SP + p = obj.Appendp(p, c.newprog) + p.As = add + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(-frameSize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = int32(frameSize) + + // JAL maymorestack + p = obj.Appendp(p, c.newprog) + p.As = AJAL + p.To.Type = obj.TYPE_BRANCH + // See ../x86/obj6.go + p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI()) + p.Mark |= BRANCH + + // Restore LR and REGCTXT. + + // MOV 0(SP), REGLINK + p = obj.Appendp(p, c.newprog) + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Offset = 0 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + + // MOV 4/8(SP), REGCTXT + p = obj.Appendp(p, c.newprog) + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Offset = int64(c.ctxt.Arch.PtrSize) + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGCTXT + + // ADD $8/$16, SP + p = obj.Appendp(p, c.newprog) + p.As = add + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(frameSize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = int32(-frameSize) + + p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + } + + // Jump back to here after morestack returns. + startPred := p + + // MOV g_stackguard(g), R1 + p = obj.Appendp(p, c.newprog) + + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0 + if c.cursym.CFunc() { + p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R1 + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + var q *obj.Prog + if framesize <= objabi.StackSmall { + // small stack: SP < stackguard + // AGTU SP, stackguard, R1 + p = obj.Appendp(p, c.newprog) + + p.As = ASGTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REGSP + p.Reg = REG_R1 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R1 + } else { + // large stack: SP-framesize < stackguard-StackSmall + offset := int64(framesize) - objabi.StackSmall + if framesize > objabi.StackBig { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // SGTU $(framesize-StackSmall), SP, R2 + // BNE R2, label-of-call-to-morestack + + p = obj.Appendp(p, c.newprog) + p.As = ASGTU + p.From.Type = obj.TYPE_CONST + p.From.Offset = offset + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R2 + + p = obj.Appendp(p, c.newprog) + q = p + p.As = ABNE + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R2 + p.To.Type = obj.TYPE_BRANCH + p.Mark |= BRANCH + } + + // Check against the stack guard. We've ensured this won't underflow. + // ADD $-(framesize-StackSmall), SP, R2 + // SGTU R2, stackguard, R1 + p = obj.Appendp(p, c.newprog) + + p.As = add + p.From.Type = obj.TYPE_CONST + p.From.Offset = -offset + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R2 + + p = obj.Appendp(p, c.newprog) + p.As = ASGTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R2 + p.Reg = REG_R1 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R1 + } + + // q1: BNE R1, done + p = obj.Appendp(p, c.newprog) + q1 := p + + p.As = ABNE + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R1 + p.To.Type = obj.TYPE_BRANCH + p.Mark |= BRANCH + + // MOV LINK, R3 + p = obj.Appendp(p, c.newprog) + + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R3 + if q != nil { + q.To.SetTarget(p) + p.Mark |= LABEL + } + + p = c.ctxt.EmitEntryStackMap(c.cursym, p, c.newprog) + + // JAL runtime.morestack(SB) + p = obj.Appendp(p, c.newprog) + + p.As = AJAL + p.To.Type = obj.TYPE_BRANCH + if c.cursym.CFunc() { + p.To.Sym = c.ctxt.Lookup("runtime.morestackc") + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { + p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") + } else { + p.To.Sym = c.ctxt.Lookup("runtime.morestack") + } + p.Mark |= BRANCH + + p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + + // JMP start + p = obj.Appendp(p, c.newprog) + + p.As = AJMP + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(startPred.Link) + startPred.Link.Mark |= LABEL + p.Mark |= BRANCH + + // placeholder for q1's jump target + p = obj.Appendp(p, c.newprog) + + p.As = obj.ANOP // zero-width place holder + q1.To.SetTarget(p) + + return p +} + +func (c *ctxt0) addnop(p *obj.Prog) { + q := c.newprog() + q.As = ANOOP + q.Pos = p.Pos + q.Link = p.Link + p.Link = q +} + +const ( + E_HILO = 1 << 0 + E_FCR = 1 << 1 + E_MCR = 1 << 2 + E_MEM = 1 << 3 + E_MEMSP = 1 << 4 /* uses offset and size */ + E_MEMSB = 1 << 5 /* uses offset and size */ + ANYMEM = E_MEM | E_MEMSP | E_MEMSB + //DELAY = LOAD|BRANCH|FCMP + DELAY = BRANCH /* only schedule branch */ +) + +type Dep struct { + ireg uint32 + freg uint32 + cc uint32 +} + +type Sch struct { + p obj.Prog + set Dep + used Dep + soffset int32 + size uint8 + nop uint8 + comp bool +} + +func (c *ctxt0) sched(p0, pe *obj.Prog) { + var sch [NSCHED]Sch + + /* + * build side structure + */ + s := sch[:] + for p := p0; ; p = p.Link { + s[0].p = *p + c.markregused(&s[0]) + if p == pe { + break + } + s = s[1:] + } + se := s + + for i := cap(sch) - cap(se); i >= 0; i-- { + s = sch[i:] + if s[0].p.Mark&DELAY == 0 { + continue + } + if -cap(s) < -cap(se) { + if !conflict(&s[0], &s[1]) { + continue + } + } + + var t []Sch + var j int + for j = cap(sch) - cap(s) - 1; j >= 0; j-- { + t = sch[j:] + if t[0].comp { + if s[0].p.Mark&BRANCH != 0 { + continue + } + } + if t[0].p.Mark&DELAY != 0 { + if -cap(s) >= -cap(se) || conflict(&t[0], &s[1]) { + continue + } + } + for u := t[1:]; -cap(u) <= -cap(s); u = u[1:] { + if c.depend(&u[0], &t[0]) { + continue + } + } + goto out2 + } + + if s[0].p.Mark&BRANCH != 0 { + s[0].nop = 1 + } + continue + + out2: + // t[0] is the instruction being moved to fill the delay + stmp := t[0] + copy(t[:i-j], t[1:i-j+1]) + s[0] = stmp + + if t[i-j-1].p.Mark&BRANCH != 0 { + // t[i-j] is being put into a branch delay slot + // combine its Spadj with the branch instruction + t[i-j-1].p.Spadj += t[i-j].p.Spadj + t[i-j].p.Spadj = 0 + } + + i-- + } + + /* + * put it all back + */ + var p *obj.Prog + var q *obj.Prog + for s, p = sch[:], p0; -cap(s) <= -cap(se); s, p = s[1:], q { + q = p.Link + if q != s[0].p.Link { + *p = s[0].p + p.Link = q + } + for s[0].nop != 0 { + s[0].nop-- + c.addnop(p) + } + } +} + +func (c *ctxt0) markregused(s *Sch) { + p := &s.p + s.comp = c.compound(p) + s.nop = 0 + if s.comp { + s.set.ireg |= 1 << (REGTMP - REG_R0) + s.used.ireg |= 1 << (REGTMP - REG_R0) + } + + ar := 0 /* dest is really reference */ + ad := 0 /* source/dest is really address */ + ld := 0 /* opcode is load instruction */ + sz := 20 /* size of load/store for overlap computation */ + + /* + * flags based on opcode + */ + switch p.As { + case obj.ATEXT: + c.autosize = int32(p.To.Offset + 8) + ad = 1 + + case AJAL: + r := p.Reg + if r == 0 { + r = REGLINK + } + s.set.ireg |= 1 << uint(r-REG_R0) + ar = 1 + ad = 1 + + case ABGEZAL, + ABLTZAL: + s.set.ireg |= 1 << (REGLINK - REG_R0) + fallthrough + case ABEQ, + ABGEZ, + ABGTZ, + ABLEZ, + ABLTZ, + ABNE: + ar = 1 + ad = 1 + + case ABFPT, + ABFPF: + ad = 1 + s.used.cc |= E_FCR + + case ACMPEQD, + ACMPEQF, + ACMPGED, + ACMPGEF, + ACMPGTD, + ACMPGTF: + ar = 1 + s.set.cc |= E_FCR + p.Mark |= FCMP + + case AJMP: + ar = 1 + ad = 1 + + case AMOVB, + AMOVBU: + sz = 1 + ld = 1 + + case AMOVH, + AMOVHU: + sz = 2 + ld = 1 + + case AMOVF, + AMOVW, + AMOVWL, + AMOVWR: + sz = 4 + ld = 1 + + case AMOVD, + AMOVV, + AMOVVL, + AMOVVR: + sz = 8 + ld = 1 + + case ADIV, + ADIVU, + AMUL, + AMULU, + AREM, + AREMU, + ADIVV, + ADIVVU, + AMULV, + AMULVU, + AREMV, + AREMVU: + s.set.cc = E_HILO + fallthrough + case AADD, + AADDU, + AADDV, + AADDVU, + AAND, + ANOR, + AOR, + ASGT, + ASGTU, + ASLL, + ASRA, + ASRL, + ASLLV, + ASRAV, + ASRLV, + ASUB, + ASUBU, + ASUBV, + ASUBVU, + AXOR, + + AADDD, + AADDF, + AADDW, + ASUBD, + ASUBF, + ASUBW, + AMULF, + AMULD, + AMULW, + ADIVF, + ADIVD, + ADIVW: + if p.Reg == 0 { + if p.To.Type == obj.TYPE_REG { + p.Reg = p.To.Reg + } + //if(p->reg == NREG) + // print("botch %P\n", p); + } + } + + /* + * flags based on 'to' field + */ + cls := int(p.To.Class) + if cls == 0 { + cls = c.aclass(&p.To) + 1 + p.To.Class = int8(cls) + } + cls-- + switch cls { + default: + fmt.Printf("unknown class %d %v\n", cls, p) + + case C_ZCON, + C_SCON, + C_ADD0CON, + C_AND0CON, + C_ADDCON, + C_ANDCON, + C_UCON, + C_LCON, + C_NONE, + C_SBRA, + C_LBRA, + C_ADDR, + C_TEXTSIZE: + break + + case C_HI, + C_LO: + s.set.cc |= E_HILO + + case C_FCREG: + s.set.cc |= E_FCR + + case C_MREG: + s.set.cc |= E_MCR + + case C_ZOREG, + C_SOREG, + C_LOREG: + cls = int(p.To.Reg) + s.used.ireg |= 1 << uint(cls-REG_R0) + if ad != 0 { + break + } + s.size = uint8(sz) + s.soffset = c.regoff(&p.To) + + m := uint32(ANYMEM) + if cls == REGSB { + m = E_MEMSB + } + if cls == REGSP { + m = E_MEMSP + } + + if ar != 0 { + s.used.cc |= m + } else { + s.set.cc |= m + } + + case C_SACON, + C_LACON: + s.used.ireg |= 1 << (REGSP - REG_R0) + + case C_SECON, + C_LECON: + s.used.ireg |= 1 << (REGSB - REG_R0) + + case C_REG: + if ar != 0 { + s.used.ireg |= 1 << uint(p.To.Reg-REG_R0) + } else { + s.set.ireg |= 1 << uint(p.To.Reg-REG_R0) + } + + case C_FREG: + if ar != 0 { + s.used.freg |= 1 << uint(p.To.Reg-REG_F0) + } else { + s.set.freg |= 1 << uint(p.To.Reg-REG_F0) + } + if ld != 0 && p.From.Type == obj.TYPE_REG { + p.Mark |= LOAD + } + + case C_SAUTO, + C_LAUTO: + s.used.ireg |= 1 << (REGSP - REG_R0) + if ad != 0 { + break + } + s.size = uint8(sz) + s.soffset = c.regoff(&p.To) + + if ar != 0 { + s.used.cc |= E_MEMSP + } else { + s.set.cc |= E_MEMSP + } + + case C_SEXT, + C_LEXT: + s.used.ireg |= 1 << (REGSB - REG_R0) + if ad != 0 { + break + } + s.size = uint8(sz) + s.soffset = c.regoff(&p.To) + + if ar != 0 { + s.used.cc |= E_MEMSB + } else { + s.set.cc |= E_MEMSB + } + } + + /* + * flags based on 'from' field + */ + cls = int(p.From.Class) + if cls == 0 { + cls = c.aclass(&p.From) + 1 + p.From.Class = int8(cls) + } + cls-- + switch cls { + default: + fmt.Printf("unknown class %d %v\n", cls, p) + + case C_ZCON, + C_SCON, + C_ADD0CON, + C_AND0CON, + C_ADDCON, + C_ANDCON, + C_UCON, + C_LCON, + C_NONE, + C_SBRA, + C_LBRA, + C_ADDR, + C_TEXTSIZE: + break + + case C_HI, + C_LO: + s.used.cc |= E_HILO + + case C_FCREG: + s.used.cc |= E_FCR + + case C_MREG: + s.used.cc |= E_MCR + + case C_ZOREG, + C_SOREG, + C_LOREG: + cls = int(p.From.Reg) + s.used.ireg |= 1 << uint(cls-REG_R0) + if ld != 0 { + p.Mark |= LOAD + } + s.size = uint8(sz) + s.soffset = c.regoff(&p.From) + + m := uint32(ANYMEM) + if cls == REGSB { + m = E_MEMSB + } + if cls == REGSP { + m = E_MEMSP + } + + s.used.cc |= m + + case C_SACON, + C_LACON: + cls = int(p.From.Reg) + if cls == 0 { + cls = REGSP + } + s.used.ireg |= 1 << uint(cls-REG_R0) + + case C_SECON, + C_LECON: + s.used.ireg |= 1 << (REGSB - REG_R0) + + case C_REG: + s.used.ireg |= 1 << uint(p.From.Reg-REG_R0) + + case C_FREG: + s.used.freg |= 1 << uint(p.From.Reg-REG_F0) + if ld != 0 && p.To.Type == obj.TYPE_REG { + p.Mark |= LOAD + } + + case C_SAUTO, + C_LAUTO: + s.used.ireg |= 1 << (REGSP - REG_R0) + if ld != 0 { + p.Mark |= LOAD + } + if ad != 0 { + break + } + s.size = uint8(sz) + s.soffset = c.regoff(&p.From) + + s.used.cc |= E_MEMSP + + case C_SEXT: + case C_LEXT: + s.used.ireg |= 1 << (REGSB - REG_R0) + if ld != 0 { + p.Mark |= LOAD + } + if ad != 0 { + break + } + s.size = uint8(sz) + s.soffset = c.regoff(&p.From) + + s.used.cc |= E_MEMSB + } + + cls = int(p.Reg) + if cls != 0 { + if REG_F0 <= cls && cls <= REG_F31 { + s.used.freg |= 1 << uint(cls-REG_F0) + } else { + s.used.ireg |= 1 << uint(cls-REG_R0) + } + } + s.set.ireg &^= (1 << (REGZERO - REG_R0)) /* R0 can't be set */ +} + +/* + * test to see if two instructions can be + * interchanged without changing semantics + */ +func (c *ctxt0) depend(sa, sb *Sch) bool { + if sa.set.ireg&(sb.set.ireg|sb.used.ireg) != 0 { + return true + } + if sb.set.ireg&sa.used.ireg != 0 { + return true + } + + if sa.set.freg&(sb.set.freg|sb.used.freg) != 0 { + return true + } + if sb.set.freg&sa.used.freg != 0 { + return true + } + + /* + * special case. + * loads from same address cannot pass. + * this is for hardware fifo's and the like + */ + if sa.used.cc&sb.used.cc&E_MEM != 0 { + if sa.p.Reg == sb.p.Reg { + if c.regoff(&sa.p.From) == c.regoff(&sb.p.From) { + return true + } + } + } + + x := (sa.set.cc & (sb.set.cc | sb.used.cc)) | (sb.set.cc & sa.used.cc) + if x != 0 { + /* + * allow SB and SP to pass each other. + * allow SB to pass SB iff doffsets are ok + * anything else conflicts + */ + if x != E_MEMSP && x != E_MEMSB { + return true + } + x = sa.set.cc | sb.set.cc | sa.used.cc | sb.used.cc + if x&E_MEM != 0 { + return true + } + if offoverlap(sa, sb) { + return true + } + } + + return false +} + +func offoverlap(sa, sb *Sch) bool { + if sa.soffset < sb.soffset { + if sa.soffset+int32(sa.size) > sb.soffset { + return true + } + return false + } + if sb.soffset+int32(sb.size) > sa.soffset { + return true + } + return false +} + +/* + * test 2 adjacent instructions + * and find out if inserted instructions + * are desired to prevent stalls. + */ +func conflict(sa, sb *Sch) bool { + if sa.set.ireg&sb.used.ireg != 0 { + return true + } + if sa.set.freg&sb.used.freg != 0 { + return true + } + if sa.set.cc&sb.used.cc != 0 { + return true + } + return false +} + +func (c *ctxt0) compound(p *obj.Prog) bool { + o := c.oplook(p) + if o.size != 4 { + return true + } + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSB { + return true + } + return false +} + +var Linkmips64 = obj.LinkArch{ + Arch: sys.ArchMIPS64, + Init: buildop, + Preprocess: preprocess, + Assemble: span0, + Progedit: progedit, + DWARFRegisters: MIPSDWARFRegisters, +} + +var Linkmips64le = obj.LinkArch{ + Arch: sys.ArchMIPS64LE, + Init: buildop, + Preprocess: preprocess, + Assemble: span0, + Progedit: progedit, + DWARFRegisters: MIPSDWARFRegisters, +} + +var Linkmips = obj.LinkArch{ + Arch: sys.ArchMIPS, + Init: buildop, + Preprocess: preprocess, + Assemble: span0, + Progedit: progedit, + DWARFRegisters: MIPSDWARFRegisters, +} + +var Linkmipsle = obj.LinkArch{ + Arch: sys.ArchMIPSLE, + Init: buildop, + Preprocess: preprocess, + Assemble: span0, + Progedit: progedit, + DWARFRegisters: MIPSDWARFRegisters, +} diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go new file mode 100644 index 0000000..ff0968e --- /dev/null +++ b/src/cmd/internal/obj/objfile.go @@ -0,0 +1,897 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Writing Go object files. + +package obj + +import ( + "bytes" + "cmd/internal/bio" + "cmd/internal/goobj" + "cmd/internal/notsha256" + "cmd/internal/objabi" + "cmd/internal/sys" + "encoding/binary" + "fmt" + "io" + "log" + "os" + "path/filepath" + "sort" + "strings" +) + +const UnlinkablePkg = "<unlinkable>" // invalid package path, used when compiled without -p flag + +// Entry point of writing new object file. +func WriteObjFile(ctxt *Link, b *bio.Writer) { + + debugAsmEmit(ctxt) + + genFuncInfoSyms(ctxt) + + w := writer{ + Writer: goobj.NewWriter(b), + ctxt: ctxt, + pkgpath: objabi.PathToPrefix(ctxt.Pkgpath), + } + + start := b.Offset() + w.init() + + // Header + // We just reserve the space. We'll fill in the offsets later. + flags := uint32(0) + if ctxt.Flag_shared { + flags |= goobj.ObjFlagShared + } + if w.pkgpath == UnlinkablePkg { + flags |= goobj.ObjFlagUnlinkable + } + if w.pkgpath == "" { + log.Fatal("empty package path") + } + if ctxt.IsAsm { + flags |= goobj.ObjFlagFromAssembly + } + h := goobj.Header{ + Magic: goobj.Magic, + Fingerprint: ctxt.Fingerprint, + Flags: flags, + } + h.Write(w.Writer) + + // String table + w.StringTable() + + // Autolib + h.Offsets[goobj.BlkAutolib] = w.Offset() + for i := range ctxt.Imports { + ctxt.Imports[i].Write(w.Writer) + } + + // Package references + h.Offsets[goobj.BlkPkgIdx] = w.Offset() + for _, pkg := range w.pkglist { + w.StringRef(pkg) + } + + // File table (for DWARF and pcln generation). + h.Offsets[goobj.BlkFile] = w.Offset() + for _, f := range ctxt.PosTable.FileTable() { + w.StringRef(filepath.ToSlash(f)) + } + + // Symbol definitions + h.Offsets[goobj.BlkSymdef] = w.Offset() + for _, s := range ctxt.defs { + w.Sym(s) + } + + // Short hashed symbol definitions + h.Offsets[goobj.BlkHashed64def] = w.Offset() + for _, s := range ctxt.hashed64defs { + w.Sym(s) + } + + // Hashed symbol definitions + h.Offsets[goobj.BlkHasheddef] = w.Offset() + for _, s := range ctxt.hasheddefs { + w.Sym(s) + } + + // Non-pkg symbol definitions + h.Offsets[goobj.BlkNonpkgdef] = w.Offset() + for _, s := range ctxt.nonpkgdefs { + w.Sym(s) + } + + // Non-pkg symbol references + h.Offsets[goobj.BlkNonpkgref] = w.Offset() + for _, s := range ctxt.nonpkgrefs { + w.Sym(s) + } + + // Referenced package symbol flags + h.Offsets[goobj.BlkRefFlags] = w.Offset() + w.refFlags() + + // Hashes + h.Offsets[goobj.BlkHash64] = w.Offset() + for _, s := range ctxt.hashed64defs { + w.Hash64(s) + } + h.Offsets[goobj.BlkHash] = w.Offset() + for _, s := range ctxt.hasheddefs { + w.Hash(s) + } + // TODO: hashedrefs unused/unsupported for now + + // Reloc indexes + h.Offsets[goobj.BlkRelocIdx] = w.Offset() + nreloc := uint32(0) + lists := [][]*LSym{ctxt.defs, ctxt.hashed64defs, ctxt.hasheddefs, ctxt.nonpkgdefs} + for _, list := range lists { + for _, s := range list { + w.Uint32(nreloc) + nreloc += uint32(len(s.R)) + } + } + w.Uint32(nreloc) + + // Symbol Info indexes + h.Offsets[goobj.BlkAuxIdx] = w.Offset() + naux := uint32(0) + for _, list := range lists { + for _, s := range list { + w.Uint32(naux) + naux += uint32(nAuxSym(s)) + } + } + w.Uint32(naux) + + // Data indexes + h.Offsets[goobj.BlkDataIdx] = w.Offset() + dataOff := int64(0) + for _, list := range lists { + for _, s := range list { + w.Uint32(uint32(dataOff)) + dataOff += int64(len(s.P)) + if file := s.File(); file != nil { + dataOff += int64(file.Size) + } + } + } + if int64(uint32(dataOff)) != dataOff { + log.Fatalf("data too large") + } + w.Uint32(uint32(dataOff)) + + // Relocs + h.Offsets[goobj.BlkReloc] = w.Offset() + for _, list := range lists { + for _, s := range list { + sort.Sort(relocByOff(s.R)) // some platforms (e.g. PE) requires relocations in address order + for i := range s.R { + w.Reloc(&s.R[i]) + } + } + } + + // Aux symbol info + h.Offsets[goobj.BlkAux] = w.Offset() + for _, list := range lists { + for _, s := range list { + w.Aux(s) + } + } + + // Data + h.Offsets[goobj.BlkData] = w.Offset() + for _, list := range lists { + for _, s := range list { + w.Bytes(s.P) + if file := s.File(); file != nil { + w.writeFile(ctxt, file) + } + } + } + + // Blocks used only by tools (objdump, nm). + + // Referenced symbol names from other packages + h.Offsets[goobj.BlkRefName] = w.Offset() + w.refNames() + + h.Offsets[goobj.BlkEnd] = w.Offset() + + // Fix up block offsets in the header + end := start + int64(w.Offset()) + b.MustSeek(start, 0) + h.Write(w.Writer) + b.MustSeek(end, 0) +} + +type writer struct { + *goobj.Writer + filebuf []byte + ctxt *Link + pkgpath string // the package import path (escaped), "" if unknown + pkglist []string // list of packages referenced, indexed by ctxt.pkgIdx + + // scratch space for writing (the Write methods escape + // as they are interface calls) + tmpSym goobj.Sym + tmpReloc goobj.Reloc + tmpAux goobj.Aux + tmpHash64 goobj.Hash64Type + tmpHash goobj.HashType + tmpRefFlags goobj.RefFlags + tmpRefName goobj.RefName +} + +// prepare package index list +func (w *writer) init() { + w.pkglist = make([]string, len(w.ctxt.pkgIdx)+1) + w.pkglist[0] = "" // dummy invalid package for index 0 + for pkg, i := range w.ctxt.pkgIdx { + w.pkglist[i] = pkg + } +} + +func (w *writer) writeFile(ctxt *Link, file *FileInfo) { + f, err := os.Open(file.Name) + if err != nil { + ctxt.Diag("%v", err) + return + } + defer f.Close() + if w.filebuf == nil { + w.filebuf = make([]byte, 1024) + } + buf := w.filebuf + written := int64(0) + for { + n, err := f.Read(buf) + w.Bytes(buf[:n]) + written += int64(n) + if err == io.EOF { + break + } + if err != nil { + ctxt.Diag("%v", err) + return + } + } + if written != file.Size { + ctxt.Diag("copy %s: unexpected length %d != %d", file.Name, written, file.Size) + } +} + +func (w *writer) StringTable() { + w.AddString("") + for _, p := range w.ctxt.Imports { + w.AddString(p.Pkg) + } + for _, pkg := range w.pkglist { + w.AddString(pkg) + } + w.ctxt.traverseSyms(traverseAll, func(s *LSym) { + // Don't put names of builtins into the string table (to save + // space). + if s.PkgIdx == goobj.PkgIdxBuiltin { + return + } + // TODO: this includes references of indexed symbols from other packages, + // for which the linker doesn't need the name. Consider moving them to + // a separate block (for tools only). + if w.ctxt.Flag_noRefName && s.PkgIdx < goobj.PkgIdxSpecial { + // Don't include them if Flag_noRefName + return + } + if w.pkgpath != "" { + s.Name = strings.Replace(s.Name, "\"\".", w.pkgpath+".", -1) + } + w.AddString(s.Name) + }) + + // All filenames are in the postable. + for _, f := range w.ctxt.PosTable.FileTable() { + w.AddString(filepath.ToSlash(f)) + } +} + +// cutoff is the maximum data section size permitted by the linker +// (see issue #9862). +const cutoff = int64(2e9) // 2 GB (or so; looks better in errors than 2^31) + +func (w *writer) Sym(s *LSym) { + abi := uint16(s.ABI()) + if s.Static() { + abi = goobj.SymABIstatic + } + flag := uint8(0) + if s.DuplicateOK() { + flag |= goobj.SymFlagDupok + } + if s.Local() { + flag |= goobj.SymFlagLocal + } + if s.MakeTypelink() { + flag |= goobj.SymFlagTypelink + } + if s.Leaf() { + flag |= goobj.SymFlagLeaf + } + if s.NoSplit() { + flag |= goobj.SymFlagNoSplit + } + if s.ReflectMethod() { + flag |= goobj.SymFlagReflectMethod + } + if strings.HasPrefix(s.Name, "type:") && s.Name[5] != '.' && s.Type == objabi.SRODATA { + flag |= goobj.SymFlagGoType + } + flag2 := uint8(0) + if s.UsedInIface() { + flag2 |= goobj.SymFlagUsedInIface + } + if strings.HasPrefix(s.Name, "go:itab.") && s.Type == objabi.SRODATA { + flag2 |= goobj.SymFlagItab + } + if strings.HasPrefix(s.Name, w.ctxt.Pkgpath) && strings.HasPrefix(s.Name[len(w.ctxt.Pkgpath):], ".") && strings.HasPrefix(s.Name[len(w.ctxt.Pkgpath)+1:], objabi.GlobalDictPrefix) { + flag2 |= goobj.SymFlagDict + } + name := s.Name + if strings.HasPrefix(name, "gofile..") { + name = filepath.ToSlash(name) + } + var align uint32 + if fn := s.Func(); fn != nil { + align = uint32(fn.Align) + } + if s.ContentAddressable() && s.Size != 0 { + // We generally assume data symbols are natually aligned + // (e.g. integer constants), except for strings and a few + // compiler-emitted funcdata. If we dedup a string symbol and + // a non-string symbol with the same content, we should keep + // the largest alignment. + // TODO: maybe the compiler could set the alignment for all + // data symbols more carefully. + switch { + case strings.HasPrefix(s.Name, "go:string."), + strings.HasPrefix(name, "type:.namedata."), + strings.HasPrefix(name, "type:.importpath."), + strings.HasSuffix(name, ".opendefer"), + strings.HasSuffix(name, ".arginfo0"), + strings.HasSuffix(name, ".arginfo1"), + strings.HasSuffix(name, ".argliveinfo"): + // These are just bytes, or varints. + align = 1 + case strings.HasPrefix(name, "gclocals·"): + // It has 32-bit fields. + align = 4 + default: + switch { + case w.ctxt.Arch.PtrSize == 8 && s.Size%8 == 0: + align = 8 + case s.Size%4 == 0: + align = 4 + case s.Size%2 == 0: + align = 2 + default: + align = 1 + } + } + } + if s.Size > cutoff { + w.ctxt.Diag("%s: symbol too large (%d bytes > %d bytes)", s.Name, s.Size, cutoff) + } + o := &w.tmpSym + o.SetName(name, w.Writer) + o.SetABI(abi) + o.SetType(uint8(s.Type)) + o.SetFlag(flag) + o.SetFlag2(flag2) + o.SetSiz(uint32(s.Size)) + o.SetAlign(align) + o.Write(w.Writer) +} + +func (w *writer) Hash64(s *LSym) { + if !s.ContentAddressable() || len(s.R) != 0 { + panic("Hash of non-content-addressable symbol") + } + w.tmpHash64 = contentHash64(s) + w.Bytes(w.tmpHash64[:]) +} + +func (w *writer) Hash(s *LSym) { + if !s.ContentAddressable() { + panic("Hash of non-content-addressable symbol") + } + w.tmpHash = w.contentHash(s) + w.Bytes(w.tmpHash[:]) +} + +// contentHashSection returns a mnemonic for s's section. +// The goal is to prevent content-addressability from moving symbols between sections. +// contentHashSection only distinguishes between sets of sections for which this matters. +// Allowing flexibility increases the effectiveness of content-addressibility. +// But in some cases, such as doing addressing based on a base symbol, +// we need to ensure that a symbol is always in a prticular section. +// Some of these conditions are duplicated in cmd/link/internal/ld.(*Link).symtab. +// TODO: instead of duplicating them, have the compiler decide where symbols go. +func contentHashSection(s *LSym) byte { + name := s.Name + if s.IsPcdata() { + return 'P' + } + if strings.HasPrefix(name, "gcargs.") || + strings.HasPrefix(name, "gclocals.") || + strings.HasPrefix(name, "gclocals·") || + strings.HasSuffix(name, ".opendefer") || + strings.HasSuffix(name, ".arginfo0") || + strings.HasSuffix(name, ".arginfo1") || + strings.HasSuffix(name, ".argliveinfo") || + strings.HasSuffix(name, ".wrapinfo") || + strings.HasSuffix(name, ".args_stackmap") || + strings.HasSuffix(name, ".stkobj") { + return 'F' // go:func.* or go:funcrel.* + } + if strings.HasPrefix(name, "type:") { + return 'T' + } + return 0 +} + +func contentHash64(s *LSym) goobj.Hash64Type { + if contentHashSection(s) != 0 { + panic("short hash of non-default-section sym " + s.Name) + } + var b goobj.Hash64Type + copy(b[:], s.P) + return b +} + +// Compute the content hash for a content-addressable symbol. +// We build a content hash based on its content and relocations. +// Depending on the category of the referenced symbol, we choose +// different hash algorithms such that the hash is globally +// consistent. +// - For referenced content-addressable symbol, its content hash +// is globally consistent. +// - For package symbol and builtin symbol, its local index is +// globally consistent. +// - For non-package symbol, its fully-expanded name is globally +// consistent. For now, we require we know the current package +// path so we can always expand symbol names. (Otherwise, +// symbols with relocations are not considered hashable.) +// +// For now, we assume there is no circular dependencies among +// hashed symbols. +func (w *writer) contentHash(s *LSym) goobj.HashType { + h := notsha256.New() + var tmp [14]byte + + // Include the size of the symbol in the hash. + // This preserves the length of symbols, preventing the following two symbols + // from hashing the same: + // + // [2]int{1,2} ≠ [10]int{1,2,0,0,0...} + // + // In this case, if the smaller symbol is alive, the larger is not kept unless + // needed. + binary.LittleEndian.PutUint64(tmp[:8], uint64(s.Size)) + // Some symbols require being in separate sections. + tmp[8] = contentHashSection(s) + h.Write(tmp[:9]) + + // The compiler trims trailing zeros _sometimes_. We just do + // it always. + h.Write(bytes.TrimRight(s.P, "\x00")) + for i := range s.R { + r := &s.R[i] + binary.LittleEndian.PutUint32(tmp[:4], uint32(r.Off)) + tmp[4] = r.Siz + tmp[5] = uint8(r.Type) + binary.LittleEndian.PutUint64(tmp[6:14], uint64(r.Add)) + h.Write(tmp[:]) + rs := r.Sym + if rs == nil { + fmt.Printf("symbol: %s\n", s) + fmt.Printf("relocation: %#v\n", r) + panic("nil symbol target in relocation") + } + switch rs.PkgIdx { + case goobj.PkgIdxHashed64: + h.Write([]byte{0}) + t := contentHash64(rs) + h.Write(t[:]) + case goobj.PkgIdxHashed: + h.Write([]byte{1}) + t := w.contentHash(rs) + h.Write(t[:]) + case goobj.PkgIdxNone: + h.Write([]byte{2}) + io.WriteString(h, rs.Name) // name is already expanded at this point + case goobj.PkgIdxBuiltin: + h.Write([]byte{3}) + binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx)) + h.Write(tmp[:4]) + case goobj.PkgIdxSelf: + io.WriteString(h, w.pkgpath) + binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx)) + h.Write(tmp[:4]) + default: + io.WriteString(h, rs.Pkg) + binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx)) + h.Write(tmp[:4]) + } + } + var b goobj.HashType + copy(b[:], h.Sum(nil)) + return b +} + +func makeSymRef(s *LSym) goobj.SymRef { + if s == nil { + return goobj.SymRef{} + } + if s.PkgIdx == 0 || !s.Indexed() { + fmt.Printf("unindexed symbol reference: %v\n", s) + panic("unindexed symbol reference") + } + return goobj.SymRef{PkgIdx: uint32(s.PkgIdx), SymIdx: uint32(s.SymIdx)} +} + +func (w *writer) Reloc(r *Reloc) { + o := &w.tmpReloc + o.SetOff(r.Off) + o.SetSiz(r.Siz) + o.SetType(uint16(r.Type)) + o.SetAdd(r.Add) + o.SetSym(makeSymRef(r.Sym)) + o.Write(w.Writer) +} + +func (w *writer) aux1(typ uint8, rs *LSym) { + o := &w.tmpAux + o.SetType(typ) + o.SetSym(makeSymRef(rs)) + o.Write(w.Writer) +} + +func (w *writer) Aux(s *LSym) { + if s.Gotype != nil { + w.aux1(goobj.AuxGotype, s.Gotype) + } + if fn := s.Func(); fn != nil { + w.aux1(goobj.AuxFuncInfo, fn.FuncInfoSym) + + for _, d := range fn.Pcln.Funcdata { + w.aux1(goobj.AuxFuncdata, d) + } + + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { + w.aux1(goobj.AuxDwarfInfo, fn.dwarfInfoSym) + } + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { + w.aux1(goobj.AuxDwarfLoc, fn.dwarfLocSym) + } + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { + w.aux1(goobj.AuxDwarfRanges, fn.dwarfRangesSym) + } + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { + w.aux1(goobj.AuxDwarfLines, fn.dwarfDebugLinesSym) + } + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { + w.aux1(goobj.AuxPcsp, fn.Pcln.Pcsp) + } + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { + w.aux1(goobj.AuxPcfile, fn.Pcln.Pcfile) + } + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { + w.aux1(goobj.AuxPcline, fn.Pcln.Pcline) + } + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { + w.aux1(goobj.AuxPcinline, fn.Pcln.Pcinline) + } + for _, pcSym := range fn.Pcln.Pcdata { + w.aux1(goobj.AuxPcdata, pcSym) + } + + } +} + +// Emits flags of referenced indexed symbols. +func (w *writer) refFlags() { + seen := make(map[*LSym]bool) + w.ctxt.traverseSyms(traverseRefs, func(rs *LSym) { // only traverse refs, not auxs, as tools don't need auxs + switch rs.PkgIdx { + case goobj.PkgIdxNone, goobj.PkgIdxHashed64, goobj.PkgIdxHashed, goobj.PkgIdxBuiltin, goobj.PkgIdxSelf: // not an external indexed reference + return + case goobj.PkgIdxInvalid: + panic("unindexed symbol reference") + } + if seen[rs] { + return + } + seen[rs] = true + symref := makeSymRef(rs) + flag2 := uint8(0) + if rs.UsedInIface() { + flag2 |= goobj.SymFlagUsedInIface + } + if flag2 == 0 { + return // no need to write zero flags + } + o := &w.tmpRefFlags + o.SetSym(symref) + o.SetFlag2(flag2) + o.Write(w.Writer) + }) +} + +// Emits names of referenced indexed symbols, used by tools (objdump, nm) +// only. +func (w *writer) refNames() { + if w.ctxt.Flag_noRefName { + return + } + seen := make(map[*LSym]bool) + w.ctxt.traverseSyms(traverseRefs, func(rs *LSym) { // only traverse refs, not auxs, as tools don't need auxs + switch rs.PkgIdx { + case goobj.PkgIdxNone, goobj.PkgIdxHashed64, goobj.PkgIdxHashed, goobj.PkgIdxBuiltin, goobj.PkgIdxSelf: // not an external indexed reference + return + case goobj.PkgIdxInvalid: + panic("unindexed symbol reference") + } + if seen[rs] { + return + } + seen[rs] = true + symref := makeSymRef(rs) + o := &w.tmpRefName + o.SetSym(symref) + o.SetName(rs.Name, w.Writer) + o.Write(w.Writer) + }) + // TODO: output in sorted order? + // Currently tools (cmd/internal/goobj package) doesn't use mmap, + // and it just read it into a map in memory upfront. If it uses + // mmap, if the output is sorted, it probably could avoid reading + // into memory and just do lookups in the mmap'd object file. +} + +// return the number of aux symbols s have. +func nAuxSym(s *LSym) int { + n := 0 + if s.Gotype != nil { + n++ + } + if fn := s.Func(); fn != nil { + // FuncInfo is an aux symbol, each Funcdata is an aux symbol + n += 1 + len(fn.Pcln.Funcdata) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { + n++ + } + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { + n++ + } + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { + n++ + } + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { + n++ + } + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { + n++ + } + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { + n++ + } + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { + n++ + } + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { + n++ + } + n += len(fn.Pcln.Pcdata) + } + return n +} + +// generate symbols for FuncInfo. +func genFuncInfoSyms(ctxt *Link) { + infosyms := make([]*LSym, 0, len(ctxt.Text)) + var b bytes.Buffer + symidx := int32(len(ctxt.defs)) + for _, s := range ctxt.Text { + fn := s.Func() + if fn == nil { + continue + } + o := goobj.FuncInfo{ + Args: uint32(fn.Args), + Locals: uint32(fn.Locals), + FuncID: fn.FuncID, + FuncFlag: fn.FuncFlag, + StartLine: fn.StartLine, + } + pc := &fn.Pcln + i := 0 + o.File = make([]goobj.CUFileIndex, len(pc.UsedFiles)) + for f := range pc.UsedFiles { + o.File[i] = f + i++ + } + sort.Slice(o.File, func(i, j int) bool { return o.File[i] < o.File[j] }) + o.InlTree = make([]goobj.InlTreeNode, len(pc.InlTree.nodes)) + for i, inl := range pc.InlTree.nodes { + f, l := ctxt.getFileIndexAndLine(inl.Pos) + o.InlTree[i] = goobj.InlTreeNode{ + Parent: int32(inl.Parent), + File: goobj.CUFileIndex(f), + Line: l, + Func: makeSymRef(inl.Func), + ParentPC: inl.ParentPC, + } + } + + o.Write(&b) + p := b.Bytes() + isym := &LSym{ + Type: objabi.SDATA, // for now, I don't think it matters + PkgIdx: goobj.PkgIdxSelf, + SymIdx: symidx, + P: append([]byte(nil), p...), + Size: int64(len(p)), + } + isym.Set(AttrIndexed, true) + symidx++ + infosyms = append(infosyms, isym) + fn.FuncInfoSym = isym + b.Reset() + + dwsyms := []*LSym{fn.dwarfRangesSym, fn.dwarfLocSym, fn.dwarfDebugLinesSym, fn.dwarfInfoSym} + for _, s := range dwsyms { + if s == nil || s.Size == 0 { + continue + } + s.PkgIdx = goobj.PkgIdxSelf + s.SymIdx = symidx + s.Set(AttrIndexed, true) + symidx++ + infosyms = append(infosyms, s) + } + } + ctxt.defs = append(ctxt.defs, infosyms...) +} + +func writeAuxSymDebug(ctxt *Link, par *LSym, aux *LSym) { + // Most aux symbols (ex: funcdata) are not interesting-- + // pick out just the DWARF ones for now. + if aux.Type != objabi.SDWARFLOC && + aux.Type != objabi.SDWARFFCN && + aux.Type != objabi.SDWARFABSFCN && + aux.Type != objabi.SDWARFLINES && + aux.Type != objabi.SDWARFRANGE { + return + } + ctxt.writeSymDebugNamed(aux, "aux for "+par.Name) +} + +func debugAsmEmit(ctxt *Link) { + if ctxt.Debugasm > 0 { + ctxt.traverseSyms(traverseDefs, ctxt.writeSymDebug) + if ctxt.Debugasm > 1 { + fn := func(par *LSym, aux *LSym) { + writeAuxSymDebug(ctxt, par, aux) + } + ctxt.traverseAuxSyms(traverseAux, fn) + } + } +} + +func (ctxt *Link) writeSymDebug(s *LSym) { + ctxt.writeSymDebugNamed(s, s.Name) +} + +func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) { + ver := "" + if ctxt.Debugasm > 1 { + ver = fmt.Sprintf("<%d>", s.ABI()) + } + fmt.Fprintf(ctxt.Bso, "%s%s ", name, ver) + if s.Type != 0 { + fmt.Fprintf(ctxt.Bso, "%v ", s.Type) + } + if s.Static() { + fmt.Fprint(ctxt.Bso, "static ") + } + if s.DuplicateOK() { + fmt.Fprintf(ctxt.Bso, "dupok ") + } + if s.CFunc() { + fmt.Fprintf(ctxt.Bso, "cfunc ") + } + if s.NoSplit() { + fmt.Fprintf(ctxt.Bso, "nosplit ") + } + if s.Func() != nil && s.Func().FuncFlag&objabi.FuncFlag_TOPFRAME != 0 { + fmt.Fprintf(ctxt.Bso, "topframe ") + } + if s.Func() != nil && s.Func().FuncFlag&objabi.FuncFlag_ASM != 0 { + fmt.Fprintf(ctxt.Bso, "asm ") + } + fmt.Fprintf(ctxt.Bso, "size=%d", s.Size) + if s.Type == objabi.STEXT { + fn := s.Func() + fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x align=%#x", uint64(fn.Args), uint64(fn.Locals), uint64(fn.FuncID), uint64(fn.Align)) + if s.Leaf() { + fmt.Fprintf(ctxt.Bso, " leaf") + } + } + fmt.Fprintf(ctxt.Bso, "\n") + if s.Type == objabi.STEXT { + for p := s.Func().Text; p != nil; p = p.Link { + fmt.Fprintf(ctxt.Bso, "\t%#04x ", uint(int(p.Pc))) + if ctxt.Debugasm > 1 { + io.WriteString(ctxt.Bso, p.String()) + } else { + p.InnermostString(ctxt.Bso) + } + fmt.Fprintln(ctxt.Bso) + } + } + for i := 0; i < len(s.P); i += 16 { + fmt.Fprintf(ctxt.Bso, "\t%#04x", uint(i)) + j := i + for ; j < i+16 && j < len(s.P); j++ { + fmt.Fprintf(ctxt.Bso, " %02x", s.P[j]) + } + for ; j < i+16; j++ { + fmt.Fprintf(ctxt.Bso, " ") + } + fmt.Fprintf(ctxt.Bso, " ") + for j = i; j < i+16 && j < len(s.P); j++ { + c := int(s.P[j]) + b := byte('.') + if ' ' <= c && c <= 0x7e { + b = byte(c) + } + ctxt.Bso.WriteByte(b) + } + + fmt.Fprintf(ctxt.Bso, "\n") + } + + sort.Sort(relocByOff(s.R)) // generate stable output + for _, r := range s.R { + name := "" + ver := "" + if r.Sym != nil { + name = r.Sym.Name + if ctxt.Debugasm > 1 { + ver = fmt.Sprintf("<%d>", r.Sym.ABI()) + } + } else if r.Type == objabi.R_TLS_LE { + name = "TLS" + } + if ctxt.Arch.InFamily(sys.ARM, sys.PPC64) { + fmt.Fprintf(ctxt.Bso, "\trel %d+%d t=%d %s%s+%x\n", int(r.Off), r.Siz, r.Type, name, ver, uint64(r.Add)) + } else { + fmt.Fprintf(ctxt.Bso, "\trel %d+%d t=%d %s%s+%d\n", int(r.Off), r.Siz, r.Type, name, ver, r.Add) + } + } +} + +// relocByOff sorts relocations by their offsets. +type relocByOff []Reloc + +func (x relocByOff) Len() int { return len(x) } +func (x relocByOff) Less(i, j int) bool { return x[i].Off < x[j].Off } +func (x relocByOff) Swap(i, j int) { x[i], x[j] = x[j], x[i] } diff --git a/src/cmd/internal/obj/objfile_test.go b/src/cmd/internal/obj/objfile_test.go new file mode 100644 index 0000000..9e99056 --- /dev/null +++ b/src/cmd/internal/obj/objfile_test.go @@ -0,0 +1,144 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "bytes" + "internal/testenv" + "os" + "path/filepath" + "testing" + "unsafe" + + "cmd/internal/goobj" + "cmd/internal/sys" +) + +var dummyArch = LinkArch{Arch: sys.ArchAMD64} + +func TestContentHash64(t *testing.T) { + s1 := &LSym{P: []byte("A")} + s2 := &LSym{P: []byte("A\x00\x00\x00")} + s1.Set(AttrContentAddressable, true) + s2.Set(AttrContentAddressable, true) + h1 := contentHash64(s1) + h2 := contentHash64(s2) + if h1 != h2 { + t.Errorf("contentHash64(s1)=%x, contentHash64(s2)=%x, expect equal", h1, h2) + } + + ctxt := Linknew(&dummyArch) // little endian + s3 := ctxt.Int64Sym(int64('A')) + h3 := contentHash64(s3) + if h1 != h3 { + t.Errorf("contentHash64(s1)=%x, contentHash64(s3)=%x, expect equal", h1, h3) + } +} + +func TestContentHash(t *testing.T) { + syms := []*LSym{ + &LSym{P: []byte("TestSymbol")}, // 0 + &LSym{P: []byte("TestSymbol")}, // 1 + &LSym{P: []byte("TestSymbol2")}, // 2 + &LSym{P: []byte("")}, // 3 + &LSym{P: []byte("")}, // 4 + &LSym{P: []byte("")}, // 5 + &LSym{P: []byte("")}, // 6 + } + for _, s := range syms { + s.Set(AttrContentAddressable, true) + s.PkgIdx = goobj.PkgIdxHashed + } + // s3 references s0 + r := Addrel(syms[3]) + r.Sym = syms[0] + // s4 references s0 + r = Addrel(syms[4]) + r.Sym = syms[0] + // s5 references s1 + r = Addrel(syms[5]) + r.Sym = syms[1] + // s6 references s2 + r = Addrel(syms[6]) + r.Sym = syms[2] + + // compute hashes + h := make([]goobj.HashType, len(syms)) + w := &writer{} + for i := range h { + h[i] = w.contentHash(syms[i]) + } + + tests := []struct { + a, b int + equal bool + }{ + {0, 1, true}, // same contents, no relocs + {0, 2, false}, // different contents + {3, 4, true}, // same contents, same relocs + {3, 5, true}, // recursively same contents + {3, 6, false}, // same contents, different relocs + } + for _, test := range tests { + if (h[test.a] == h[test.b]) != test.equal { + eq := "equal" + if !test.equal { + eq = "not equal" + } + t.Errorf("h%d=%x, h%d=%x, expect %s", test.a, h[test.a], test.b, h[test.b], eq) + } + } +} + +func TestSymbolTooLarge(t *testing.T) { // Issue 42054 + testenv.MustHaveGoBuild(t) + if unsafe.Sizeof(uintptr(0)) < 8 { + t.Skip("skip on 32-bit architectures") + } + + tmpdir, err := os.MkdirTemp("", "TestSymbolTooLarge") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + src := filepath.Join(tmpdir, "p.go") + err = os.WriteFile(src, []byte("package p; var x [1<<32]byte"), 0666) + if err != nil { + t.Fatalf("failed to write source file: %v\n", err) + } + obj := filepath.Join(tmpdir, "p.o") + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "compile", "-p=p", "-o", obj, src) + out, err := cmd.CombinedOutput() + if err == nil { + t.Fatalf("did not fail\noutput: %s", out) + } + const want = "symbol too large" + if !bytes.Contains(out, []byte(want)) { + t.Errorf("unexpected error message: want: %q, got: %s", want, out) + } +} + +func TestNoRefName(t *testing.T) { + // Test that the norefname flag works. + testenv.MustHaveGoBuild(t) + + tmpdir := t.TempDir() + + src := filepath.Join(tmpdir, "x.go") + err := os.WriteFile(src, []byte("package main; import \"fmt\"; func main() { fmt.Println(123) }\n"), 0666) + if err != nil { + t.Fatalf("failed to write source file: %v\n", err) + } + exe := filepath.Join(tmpdir, "x.exe") + + // Build the fmt package with norefname. Not rebuilding all packages to save time. + // Also testing that norefname and non-norefname packages can link together. + cmd := testenv.Command(t, testenv.GoToolPath(t), "build", "-gcflags=fmt=-d=norefname", "-o", exe, src) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("build failed: %v, output:\n%s", err, out) + } +} diff --git a/src/cmd/internal/obj/pass.go b/src/cmd/internal/obj/pass.go new file mode 100644 index 0000000..b91a15d --- /dev/null +++ b/src/cmd/internal/obj/pass.go @@ -0,0 +1,181 @@ +// Inferno utils/6l/pass.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package obj + +// Code and data passes. + +// brloop returns the ultimate destination of the series of unconditional jumps beginning at p. +// In the case of an infinite loop, brloop returns nil. +func brloop(p *Prog) *Prog { + c := 0 + for q := p; q != nil; q = q.To.Target() { + if q.As != AJMP || q.To.Target() == nil { + return q + } + c++ + if c >= 5000 { + // infinite loop + return nil + } + } + panic("unreachable") +} + +// checkaddr checks that a has an expected encoding, especially TYPE_CONST vs TYPE_ADDR. +func checkaddr(ctxt *Link, p *Prog, a *Addr) { + switch a.Type { + case TYPE_NONE, TYPE_REGREG2, TYPE_REGLIST: + return + + case TYPE_BRANCH, TYPE_TEXTSIZE: + if a.Reg != 0 || a.Index != 0 || a.Scale != 0 || a.Name != 0 { + break + } + return + + case TYPE_MEM: + return + + case TYPE_CONST: + // TODO(rsc): After fixing SHRQ, check a.Index != 0 too. + if a.Name != 0 || a.Sym != nil || a.Reg != 0 { + ctxt.Diag("argument is TYPE_CONST, should be TYPE_ADDR, in %v", p) + return + } + + if a.Reg != 0 || a.Scale != 0 || a.Name != 0 || a.Sym != nil || a.Val != nil { + break + } + return + + case TYPE_FCONST, TYPE_SCONST: + if a.Reg != 0 || a.Index != 0 || a.Scale != 0 || a.Name != 0 || a.Offset != 0 || a.Sym != nil { + break + } + return + + case TYPE_REG: + // TODO(rsc): After fixing PINSRQ, check a.Offset != 0 too. + // TODO(rsc): After fixing SHRQ, check a.Index != 0 too. + if a.Scale != 0 || a.Name != 0 || a.Sym != nil { + break + } + return + + case TYPE_ADDR: + if a.Val != nil { + break + } + if a.Reg == 0 && a.Index == 0 && a.Scale == 0 && a.Name == 0 && a.Sym == nil { + ctxt.Diag("argument is TYPE_ADDR, should be TYPE_CONST, in %v", p) + } + return + + case TYPE_SHIFT, TYPE_REGREG: + if a.Index != 0 || a.Scale != 0 || a.Name != 0 || a.Sym != nil || a.Val != nil { + break + } + return + + case TYPE_INDIR: + // Expect sym and name to be set, nothing else. + // Technically more is allowed, but this is only used for *name(SB). + if a.Reg != 0 || a.Index != 0 || a.Scale != 0 || a.Name == 0 || a.Offset != 0 || a.Sym == nil || a.Val != nil { + break + } + return + case TYPE_SPECIAL: + if a.Reg != 0 || a.Index != 0 || a.Scale != 0 || a.Name != 0 || a.Class != 0 || a.Sym != nil { + break + } + return + } + + ctxt.Diag("invalid encoding for argument %v", p) +} + +func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { + for p := sym.Func().Text; p != nil; p = p.Link { + checkaddr(ctxt, p, &p.From) + if p.GetFrom3() != nil { + checkaddr(ctxt, p, p.GetFrom3()) + } + checkaddr(ctxt, p, &p.To) + + if ctxt.Arch.Progedit != nil { + ctxt.Arch.Progedit(ctxt, p, newprog) + } + if p.To.Type != TYPE_BRANCH { + continue + } + if p.To.Val != nil { + continue + } + + if p.To.Sym != nil { + continue + } + q := sym.Func().Text + for q != nil && p.To.Offset != q.Pc { + if q.Forwd != nil && p.To.Offset >= q.Forwd.Pc { + q = q.Forwd + } else { + q = q.Link + } + } + + if q == nil { + name := "<nil>" + if p.To.Sym != nil { + name = p.To.Sym.Name + } + ctxt.Diag("branch out of range (%#x)\n%v [%s]", uint32(p.To.Offset), p, name) + p.To.Type = TYPE_NONE + } + + p.To.SetTarget(q) + } + + if !ctxt.Flag_optimize { + return + } + + // Collapse series of jumps to jumps. + for p := sym.Func().Text; p != nil; p = p.Link { + if p.To.Target() == nil { + continue + } + p.To.SetTarget(brloop(p.To.Target())) + if p.To.Target() != nil && p.To.Type == TYPE_BRANCH { + p.To.Offset = p.To.Target().Pc + } + } +} diff --git a/src/cmd/internal/obj/pcln.go b/src/cmd/internal/obj/pcln.go new file mode 100644 index 0000000..67a0780 --- /dev/null +++ b/src/cmd/internal/obj/pcln.go @@ -0,0 +1,423 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "cmd/internal/goobj" + "cmd/internal/objabi" + "encoding/binary" + "fmt" + "log" +) + +// funcpctab writes to dst a pc-value table mapping the code in func to the values +// returned by valfunc parameterized by arg. The invocation of valfunc to update the +// current value is, for each p, +// +// sym = valfunc(func, p, 0, arg); +// record sym.P as value at p->pc; +// sym = valfunc(func, p, 1, arg); +// +// where func is the function, val is the current value, p is the instruction being +// considered, and arg can be used to further parameterize valfunc. +func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, int32, *Prog, int32, interface{}) int32, arg interface{}) *LSym { + dbg := desc == ctxt.Debugpcln + dst := []byte{} + sym := &LSym{ + Type: objabi.SRODATA, + Attribute: AttrContentAddressable | AttrPcdata, + } + + if dbg { + ctxt.Logf("funcpctab %s [valfunc=%s]\n", func_.Name, desc) + } + + val := int32(-1) + oldval := val + fn := func_.Func() + if fn.Text == nil { + // Return the empty symbol we've built so far. + return sym + } + + pc := fn.Text.Pc + + if dbg { + ctxt.Logf("%6x %6d %v\n", uint64(pc), val, fn.Text) + } + + buf := make([]byte, binary.MaxVarintLen32) + started := false + for p := fn.Text; p != nil; p = p.Link { + // Update val. If it's not changing, keep going. + val = valfunc(ctxt, func_, val, p, 0, arg) + + if val == oldval && started { + val = valfunc(ctxt, func_, val, p, 1, arg) + if dbg { + ctxt.Logf("%6x %6s %v\n", uint64(p.Pc), "", p) + } + continue + } + + // If the pc of the next instruction is the same as the + // pc of this instruction, this instruction is not a real + // instruction. Keep going, so that we only emit a delta + // for a true instruction boundary in the program. + if p.Link != nil && p.Link.Pc == p.Pc { + val = valfunc(ctxt, func_, val, p, 1, arg) + if dbg { + ctxt.Logf("%6x %6s %v\n", uint64(p.Pc), "", p) + } + continue + } + + // The table is a sequence of (value, pc) pairs, where each + // pair states that the given value is in effect from the current position + // up to the given pc, which becomes the new current position. + // To generate the table as we scan over the program instructions, + // we emit a "(value" when pc == func->value, and then + // each time we observe a change in value we emit ", pc) (value". + // When the scan is over, we emit the closing ", pc)". + // + // The table is delta-encoded. The value deltas are signed and + // transmitted in zig-zag form, where a complement bit is placed in bit 0, + // and the pc deltas are unsigned. Both kinds of deltas are sent + // as variable-length little-endian base-128 integers, + // where the 0x80 bit indicates that the integer continues. + + if dbg { + ctxt.Logf("%6x %6d %v\n", uint64(p.Pc), val, p) + } + + if started { + pcdelta := (p.Pc - pc) / int64(ctxt.Arch.MinLC) + n := binary.PutUvarint(buf, uint64(pcdelta)) + dst = append(dst, buf[:n]...) + pc = p.Pc + } + + delta := val - oldval + n := binary.PutVarint(buf, int64(delta)) + dst = append(dst, buf[:n]...) + oldval = val + started = true + val = valfunc(ctxt, func_, val, p, 1, arg) + } + + if started { + if dbg { + ctxt.Logf("%6x done\n", uint64(fn.Text.Pc+func_.Size)) + } + v := (func_.Size - pc) / int64(ctxt.Arch.MinLC) + if v < 0 { + ctxt.Diag("negative pc offset: %v", v) + } + n := binary.PutUvarint(buf, uint64(v)) + dst = append(dst, buf[:n]...) + // add terminating varint-encoded 0, which is just 0 + dst = append(dst, 0) + } + + if dbg { + ctxt.Logf("wrote %d bytes to %p\n", len(dst), dst) + for _, p := range dst { + ctxt.Logf(" %02x", p) + } + ctxt.Logf("\n") + } + + sym.Size = int64(len(dst)) + sym.P = dst + return sym +} + +// pctofileline computes either the file number (arg == 0) +// or the line number (arg == 1) to use at p. +// Because p.Pos applies to p, phase == 0 (before p) +// takes care of the update. +func pctofileline(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg interface{}) int32 { + if p.As == ATEXT || p.As == ANOP || p.Pos.Line() == 0 || phase == 1 { + return oldval + } + f, l := ctxt.getFileIndexAndLine(p.Pos) + if arg == nil { + return l + } + pcln := arg.(*Pcln) + pcln.UsedFiles[goobj.CUFileIndex(f)] = struct{}{} + return int32(f) +} + +// pcinlineState holds the state used to create a function's inlining +// tree and the PC-value table that maps PCs to nodes in that tree. +type pcinlineState struct { + globalToLocal map[int]int + localTree InlTree +} + +// addBranch adds a branch from the global inlining tree in ctxt to +// the function's local inlining tree, returning the index in the local tree. +func (s *pcinlineState) addBranch(ctxt *Link, globalIndex int) int { + if globalIndex < 0 { + return -1 + } + + localIndex, ok := s.globalToLocal[globalIndex] + if ok { + return localIndex + } + + // Since tracebacks don't include column information, we could + // use one node for multiple calls of the same function on the + // same line (e.g., f(x) + f(y)). For now, we use one node for + // each inlined call. + call := ctxt.InlTree.nodes[globalIndex] + call.Parent = s.addBranch(ctxt, call.Parent) + localIndex = len(s.localTree.nodes) + s.localTree.nodes = append(s.localTree.nodes, call) + s.globalToLocal[globalIndex] = localIndex + return localIndex +} + +func (s *pcinlineState) setParentPC(ctxt *Link, globalIndex int, pc int32) { + localIndex, ok := s.globalToLocal[globalIndex] + if !ok { + // We know where to unwind to when we need to unwind a body identified + // by globalIndex. But there may be no instructions generated by that + // body (it's empty, or its instructions were CSEd with other things, etc.). + // In that case, we don't need an unwind entry. + // TODO: is this really right? Seems to happen a whole lot... + return + } + s.localTree.setParentPC(localIndex, pc) +} + +// pctoinline computes the index into the local inlining tree to use at p. +// If p is not the result of inlining, pctoinline returns -1. Because p.Pos +// applies to p, phase == 0 (before p) takes care of the update. +func (s *pcinlineState) pctoinline(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg interface{}) int32 { + if phase == 1 { + return oldval + } + + posBase := ctxt.PosTable.Pos(p.Pos).Base() + if posBase == nil { + return -1 + } + + globalIndex := posBase.InliningIndex() + if globalIndex < 0 { + return -1 + } + + if s.globalToLocal == nil { + s.globalToLocal = make(map[int]int) + } + + return int32(s.addBranch(ctxt, globalIndex)) +} + +// pctospadj computes the sp adjustment in effect. +// It is oldval plus any adjustment made by p itself. +// The adjustment by p takes effect only after p, so we +// apply the change during phase == 1. +func pctospadj(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg interface{}) int32 { + if oldval == -1 { // starting + oldval = 0 + } + if phase == 0 { + return oldval + } + if oldval+p.Spadj < -10000 || oldval+p.Spadj > 1100000000 { + ctxt.Diag("overflow in spadj: %d + %d = %d", oldval, p.Spadj, oldval+p.Spadj) + ctxt.DiagFlush() + log.Fatalf("bad code") + } + + return oldval + p.Spadj +} + +// pctopcdata computes the pcdata value in effect at p. +// A PCDATA instruction sets the value in effect at future +// non-PCDATA instructions. +// Since PCDATA instructions have no width in the final code, +// it does not matter which phase we use for the update. +func pctopcdata(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg interface{}) int32 { + if phase == 0 || p.As != APCDATA || p.From.Offset != int64(arg.(uint32)) { + return oldval + } + if int64(int32(p.To.Offset)) != p.To.Offset { + ctxt.Diag("overflow in PCDATA instruction: %v", p) + ctxt.DiagFlush() + log.Fatalf("bad code") + } + + return int32(p.To.Offset) +} + +func linkpcln(ctxt *Link, cursym *LSym) { + pcln := &cursym.Func().Pcln + pcln.UsedFiles = make(map[goobj.CUFileIndex]struct{}) + + npcdata := 0 + nfuncdata := 0 + for p := cursym.Func().Text; p != nil; p = p.Link { + // Find the highest ID of any used PCDATA table. This ignores PCDATA table + // that consist entirely of "-1", since that's the assumed default value. + // From.Offset is table ID + // To.Offset is data + if p.As == APCDATA && p.From.Offset >= int64(npcdata) && p.To.Offset != -1 { // ignore -1 as we start at -1, if we only see -1, nothing changed + npcdata = int(p.From.Offset + 1) + } + // Find the highest ID of any FUNCDATA table. + // From.Offset is table ID + if p.As == AFUNCDATA && p.From.Offset >= int64(nfuncdata) { + nfuncdata = int(p.From.Offset + 1) + } + } + + pcln.Pcdata = make([]*LSym, npcdata) + pcln.Funcdata = make([]*LSym, nfuncdata) + + pcln.Pcsp = funcpctab(ctxt, cursym, "pctospadj", pctospadj, nil) + pcln.Pcfile = funcpctab(ctxt, cursym, "pctofile", pctofileline, pcln) + pcln.Pcline = funcpctab(ctxt, cursym, "pctoline", pctofileline, nil) + + // Check that all the Progs used as inline markers are still reachable. + // See issue #40473. + fn := cursym.Func() + inlMarkProgs := make(map[*Prog]struct{}, len(fn.InlMarks)) + for _, inlMark := range fn.InlMarks { + inlMarkProgs[inlMark.p] = struct{}{} + } + for p := fn.Text; p != nil; p = p.Link { + delete(inlMarkProgs, p) + } + if len(inlMarkProgs) > 0 { + ctxt.Diag("one or more instructions used as inline markers are no longer reachable") + } + + pcinlineState := new(pcinlineState) + pcln.Pcinline = funcpctab(ctxt, cursym, "pctoinline", pcinlineState.pctoinline, nil) + for _, inlMark := range fn.InlMarks { + pcinlineState.setParentPC(ctxt, int(inlMark.id), int32(inlMark.p.Pc)) + } + pcln.InlTree = pcinlineState.localTree + if ctxt.Debugpcln == "pctoinline" && len(pcln.InlTree.nodes) > 0 { + ctxt.Logf("-- inlining tree for %s:\n", cursym) + dumpInlTree(ctxt, pcln.InlTree) + ctxt.Logf("--\n") + } + + // tabulate which pc and func data we have. + havepc := make([]uint32, (npcdata+31)/32) + havefunc := make([]uint32, (nfuncdata+31)/32) + for p := fn.Text; p != nil; p = p.Link { + if p.As == AFUNCDATA { + if (havefunc[p.From.Offset/32]>>uint64(p.From.Offset%32))&1 != 0 { + ctxt.Diag("multiple definitions for FUNCDATA $%d", p.From.Offset) + } + havefunc[p.From.Offset/32] |= 1 << uint64(p.From.Offset%32) + } + + if p.As == APCDATA && p.To.Offset != -1 { + havepc[p.From.Offset/32] |= 1 << uint64(p.From.Offset%32) + } + } + + // pcdata. + for i := 0; i < npcdata; i++ { + if (havepc[i/32]>>uint(i%32))&1 == 0 { + // use an empty symbol. + pcln.Pcdata[i] = &LSym{ + Type: objabi.SRODATA, + Attribute: AttrContentAddressable | AttrPcdata, + } + } else { + pcln.Pcdata[i] = funcpctab(ctxt, cursym, "pctopcdata", pctopcdata, interface{}(uint32(i))) + } + } + + // funcdata + if nfuncdata > 0 { + for p := fn.Text; p != nil; p = p.Link { + if p.As != AFUNCDATA { + continue + } + i := int(p.From.Offset) + if p.To.Type != TYPE_MEM || p.To.Offset != 0 { + panic(fmt.Sprintf("bad funcdata: %v", p)) + } + pcln.Funcdata[i] = p.To.Sym + } + } +} + +// PCIter iterates over encoded pcdata tables. +type PCIter struct { + p []byte + PC uint32 + NextPC uint32 + PCScale uint32 + Value int32 + start bool + Done bool +} + +// NewPCIter creates a PCIter with a scale factor for the PC step size. +func NewPCIter(pcScale uint32) *PCIter { + it := new(PCIter) + it.PCScale = pcScale + return it +} + +// Next advances it to the Next pc. +func (it *PCIter) Next() { + it.PC = it.NextPC + if it.Done { + return + } + if len(it.p) == 0 { + it.Done = true + return + } + + // Value delta + val, n := binary.Varint(it.p) + if n <= 0 { + log.Fatalf("bad Value varint in pciterNext: read %v", n) + } + it.p = it.p[n:] + + if val == 0 && !it.start { + it.Done = true + return + } + + it.start = false + it.Value += int32(val) + + // pc delta + pc, n := binary.Uvarint(it.p) + if n <= 0 { + log.Fatalf("bad pc varint in pciterNext: read %v", n) + } + it.p = it.p[n:] + + it.NextPC = it.PC + uint32(pc)*it.PCScale +} + +// init prepares it to iterate over p, +// and advances it to the first pc. +func (it *PCIter) Init(p []byte) { + it.p = p + it.PC = 0 + it.NextPC = 0 + it.Value = -1 + it.start = true + it.Done = false + it.Next() +} diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go new file mode 100644 index 0000000..fe9d2e1 --- /dev/null +++ b/src/cmd/internal/obj/plist.go @@ -0,0 +1,382 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "cmd/internal/objabi" + "cmd/internal/src" + "fmt" + "strings" +) + +type Plist struct { + Firstpc *Prog + Curfn interface{} // holds a *gc.Node, if non-nil +} + +// ProgAlloc is a function that allocates Progs. +// It is used to provide access to cached/bulk-allocated Progs to the assemblers. +type ProgAlloc func() *Prog + +func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string) { + // Build list of symbols, and assign instructions to lists. + var curtext *LSym + var etext *Prog + var text []*LSym + + var plink *Prog + for p := plist.Firstpc; p != nil; p = plink { + if ctxt.Debugasm > 0 && ctxt.Debugvlog { + fmt.Printf("obj: %v\n", p) + } + plink = p.Link + p.Link = nil + + switch p.As { + case AEND: + continue + + case ATEXT: + s := p.From.Sym + if s == nil { + // func _() { } + curtext = nil + continue + } + text = append(text, s) + etext = p + curtext = s + continue + + case AFUNCDATA: + // Rewrite reference to go_args_stackmap(SB) to the Go-provided declaration information. + if curtext == nil { // func _() {} + continue + } + switch p.To.Sym.Name { + case "go_args_stackmap": + if p.From.Type != TYPE_CONST || p.From.Offset != objabi.FUNCDATA_ArgsPointerMaps { + ctxt.Diag("%s: FUNCDATA use of go_args_stackmap(SB) without FUNCDATA_ArgsPointerMaps", p.Pos) + } + p.To.Sym = ctxt.LookupDerived(curtext, curtext.Name+".args_stackmap") + case "no_pointers_stackmap": + if p.From.Type != TYPE_CONST || p.From.Offset != objabi.FUNCDATA_LocalsPointerMaps { + ctxt.Diag("%s: FUNCDATA use of no_pointers_stackmap(SB) without FUNCDATA_LocalsPointerMaps", p.Pos) + } + // funcdata for functions with no local variables in frame. + // Define two zero-length bitmaps, because the same index is used + // for the local variables as for the argument frame, and assembly + // frames have two argument bitmaps, one without results and one with results. + // Write []uint32{2, 0}. + b := make([]byte, 8) + ctxt.Arch.ByteOrder.PutUint32(b, 2) + s := ctxt.GCLocalsSym(b) + if !s.OnList() { + ctxt.Globl(s, int64(len(s.P)), int(RODATA|DUPOK)) + } + p.To.Sym = s + } + + } + + if curtext == nil { + etext = nil + continue + } + etext.Link = p + etext = p + } + + if newprog == nil { + newprog = ctxt.NewProg + } + + // Add reference to Go arguments for assembly functions without them. + if ctxt.IsAsm { + for _, s := range text { + if !strings.HasPrefix(s.Name, "\"\".") { + continue + } + // The current args_stackmap generation in the compiler assumes + // that the function in question is ABI0, so avoid introducing + // an args_stackmap reference if the func is not ABI0 (better to + // have no stackmap than an incorrect/lying stackmap). + if s.ABI() != ABI0 { + continue + } + foundArgMap, foundArgInfo := false, false + for p := s.Func().Text; p != nil; p = p.Link { + if p.As == AFUNCDATA && p.From.Type == TYPE_CONST { + if p.From.Offset == objabi.FUNCDATA_ArgsPointerMaps { + foundArgMap = true + } + if p.From.Offset == objabi.FUNCDATA_ArgInfo { + foundArgInfo = true + } + if foundArgMap && foundArgInfo { + break + } + } + } + if !foundArgMap { + p := Appendp(s.Func().Text, newprog) + p.As = AFUNCDATA + p.From.Type = TYPE_CONST + p.From.Offset = objabi.FUNCDATA_ArgsPointerMaps + p.To.Type = TYPE_MEM + p.To.Name = NAME_EXTERN + p.To.Sym = ctxt.LookupDerived(s, s.Name+".args_stackmap") + } + if !foundArgInfo { + p := Appendp(s.Func().Text, newprog) + p.As = AFUNCDATA + p.From.Type = TYPE_CONST + p.From.Offset = objabi.FUNCDATA_ArgInfo + p.To.Type = TYPE_MEM + p.To.Name = NAME_EXTERN + p.To.Sym = ctxt.LookupDerived(s, fmt.Sprintf("%s.arginfo%d", s.Name, s.ABI())) + } + } + } + + // Turn functions into machine code images. + for _, s := range text { + mkfwd(s) + if ctxt.Arch.ErrorCheck != nil { + ctxt.Arch.ErrorCheck(ctxt, s) + } + linkpatch(ctxt, s, newprog) + ctxt.Arch.Preprocess(ctxt, s, newprog) + ctxt.Arch.Assemble(ctxt, s, newprog) + if ctxt.Errors > 0 { + continue + } + linkpcln(ctxt, s) + if myimportpath != "" { + ctxt.populateDWARF(plist.Curfn, s, myimportpath) + } + } +} + +func (ctxt *Link) InitTextSym(s *LSym, flag int, start src.XPos) { + if s == nil { + // func _() { } + return + } + if s.Func() != nil { + ctxt.Diag("%s: symbol %s redeclared\n\t%s: other declaration of symbol %s", ctxt.PosTable.Pos(start), s.Name, ctxt.PosTable.Pos(s.Func().Text.Pos), s.Name) + return + } + s.NewFuncInfo() + if s.OnList() { + ctxt.Diag("%s: symbol %s redeclared", ctxt.PosTable.Pos(start), s.Name) + return + } + + // startLine should be the same line number that would be displayed via + // pcln, etc for the declaration (i.e., relative line number, as + // adjusted by //line). + _, startLine := ctxt.getFileSymbolAndLine(start) + + // TODO(mdempsky): Remove once cmd/asm stops writing "" symbols. + name := strings.Replace(s.Name, "\"\"", ctxt.Pkgpath, -1) + s.Func().FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0 || flag&ABIWRAPPER != 0) + s.Func().FuncFlag = ctxt.toFuncFlag(flag) + s.Func().StartLine = startLine + s.Set(AttrOnList, true) + s.Set(AttrDuplicateOK, flag&DUPOK != 0) + s.Set(AttrNoSplit, flag&NOSPLIT != 0) + s.Set(AttrReflectMethod, flag&REFLECTMETHOD != 0) + s.Set(AttrWrapper, flag&WRAPPER != 0) + s.Set(AttrABIWrapper, flag&ABIWRAPPER != 0) + s.Set(AttrNeedCtxt, flag&NEEDCTXT != 0) + s.Set(AttrNoFrame, flag&NOFRAME != 0) + s.Type = objabi.STEXT + ctxt.Text = append(ctxt.Text, s) + + // Set up DWARF entries for s + ctxt.dwarfSym(s) +} + +func (ctxt *Link) toFuncFlag(flag int) objabi.FuncFlag { + var out objabi.FuncFlag + if flag&TOPFRAME != 0 { + out |= objabi.FuncFlag_TOPFRAME + } + if ctxt.IsAsm { + out |= objabi.FuncFlag_ASM + } + return out +} + +func (ctxt *Link) Globl(s *LSym, size int64, flag int) { + ctxt.GloblPos(s, size, flag, src.NoXPos) +} +func (ctxt *Link) GloblPos(s *LSym, size int64, flag int, pos src.XPos) { + if s.OnList() { + // TODO: print where the first declaration was. + ctxt.Diag("%s: symbol %s redeclared", ctxt.PosTable.Pos(pos), s.Name) + } + s.Set(AttrOnList, true) + ctxt.Data = append(ctxt.Data, s) + s.Size = size + if s.Type == 0 { + s.Type = objabi.SBSS + } + if flag&DUPOK != 0 { + s.Set(AttrDuplicateOK, true) + } + if flag&RODATA != 0 { + s.Type = objabi.SRODATA + } else if flag&NOPTR != 0 { + if s.Type == objabi.SDATA { + s.Type = objabi.SNOPTRDATA + } else { + s.Type = objabi.SNOPTRBSS + } + } else if flag&TLSBSS != 0 { + s.Type = objabi.STLSBSS + } +} + +// EmitEntryLiveness generates PCDATA Progs after p to switch to the +// liveness map active at the entry of function s. It returns the last +// Prog generated. +func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog { + pcdata := ctxt.EmitEntryStackMap(s, p, newprog) + pcdata = ctxt.EmitEntryUnsafePoint(s, pcdata, newprog) + return pcdata +} + +// Similar to EmitEntryLiveness, but just emit stack map. +func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { + pcdata := Appendp(p, newprog) + pcdata.Pos = s.Func().Text.Pos + pcdata.As = APCDATA + pcdata.From.Type = TYPE_CONST + pcdata.From.Offset = objabi.PCDATA_StackMapIndex + pcdata.To.Type = TYPE_CONST + pcdata.To.Offset = -1 // pcdata starts at -1 at function entry + + return pcdata +} + +// Similar to EmitEntryLiveness, but just emit unsafe point map. +func (ctxt *Link) EmitEntryUnsafePoint(s *LSym, p *Prog, newprog ProgAlloc) *Prog { + pcdata := Appendp(p, newprog) + pcdata.Pos = s.Func().Text.Pos + pcdata.As = APCDATA + pcdata.From.Type = TYPE_CONST + pcdata.From.Offset = objabi.PCDATA_UnsafePoint + pcdata.To.Type = TYPE_CONST + pcdata.To.Offset = -1 + + return pcdata +} + +// StartUnsafePoint generates PCDATA Progs after p to mark the +// beginning of an unsafe point. The unsafe point starts immediately +// after p. +// It returns the last Prog generated. +func (ctxt *Link) StartUnsafePoint(p *Prog, newprog ProgAlloc) *Prog { + pcdata := Appendp(p, newprog) + pcdata.As = APCDATA + pcdata.From.Type = TYPE_CONST + pcdata.From.Offset = objabi.PCDATA_UnsafePoint + pcdata.To.Type = TYPE_CONST + pcdata.To.Offset = objabi.PCDATA_UnsafePointUnsafe + + return pcdata +} + +// EndUnsafePoint generates PCDATA Progs after p to mark the end of an +// unsafe point, restoring the register map index to oldval. +// The unsafe point ends right after p. +// It returns the last Prog generated. +func (ctxt *Link) EndUnsafePoint(p *Prog, newprog ProgAlloc, oldval int64) *Prog { + pcdata := Appendp(p, newprog) + pcdata.As = APCDATA + pcdata.From.Type = TYPE_CONST + pcdata.From.Offset = objabi.PCDATA_UnsafePoint + pcdata.To.Type = TYPE_CONST + pcdata.To.Offset = oldval + + return pcdata +} + +// MarkUnsafePoints inserts PCDATAs to mark nonpreemptible and restartable +// instruction sequences, based on isUnsafePoint and isRestartable predicate. +// p0 is the start of the instruction stream. +// isUnsafePoint(p) returns true if p is not safe for async preemption. +// isRestartable(p) returns true if we can restart at the start of p (this Prog) +// upon async preemption. (Currently multi-Prog restartable sequence is not +// supported.) +// isRestartable can be nil. In this case it is treated as always returning false. +// If isUnsafePoint(p) and isRestartable(p) are both true, it is treated as +// an unsafe point. +func MarkUnsafePoints(ctxt *Link, p0 *Prog, newprog ProgAlloc, isUnsafePoint, isRestartable func(*Prog) bool) { + if isRestartable == nil { + // Default implementation: nothing is restartable. + isRestartable = func(*Prog) bool { return false } + } + prev := p0 + prevPcdata := int64(-1) // entry PC data value + prevRestart := int64(0) + for p := prev.Link; p != nil; p, prev = p.Link, p { + if p.As == APCDATA && p.From.Offset == objabi.PCDATA_UnsafePoint { + prevPcdata = p.To.Offset + continue + } + if prevPcdata == objabi.PCDATA_UnsafePointUnsafe { + continue // already unsafe + } + if isUnsafePoint(p) { + q := ctxt.StartUnsafePoint(prev, newprog) + q.Pc = p.Pc + q.Link = p + // Advance to the end of unsafe point. + for p.Link != nil && isUnsafePoint(p.Link) { + p = p.Link + } + if p.Link == nil { + break // Reached the end, don't bother marking the end + } + p = ctxt.EndUnsafePoint(p, newprog, prevPcdata) + p.Pc = p.Link.Pc + continue + } + if isRestartable(p) { + val := int64(objabi.PCDATA_Restart1) + if val == prevRestart { + val = objabi.PCDATA_Restart2 + } + prevRestart = val + q := Appendp(prev, newprog) + q.As = APCDATA + q.From.Type = TYPE_CONST + q.From.Offset = objabi.PCDATA_UnsafePoint + q.To.Type = TYPE_CONST + q.To.Offset = val + q.Pc = p.Pc + q.Link = p + + if p.Link == nil { + break // Reached the end, don't bother marking the end + } + if isRestartable(p.Link) { + // Next Prog is also restartable. No need to mark the end + // of this sequence. We'll just go ahead mark the next one. + continue + } + p = Appendp(p, newprog) + p.As = APCDATA + p.From.Type = TYPE_CONST + p.From.Offset = objabi.PCDATA_UnsafePoint + p.To.Type = TYPE_CONST + p.To.Offset = prevPcdata + p.Pc = p.Link.Pc + } + } +} diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go new file mode 100644 index 0000000..995f9d9 --- /dev/null +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -0,0 +1,1092 @@ +// cmd/9c/9.out.h from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package ppc64 + +import "cmd/internal/obj" + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p ppc64 + +/* + * powerpc 64 + */ +const ( + NSNAME = 8 + NSYM = 50 + NREG = 32 /* number of general registers */ + NFREG = 32 /* number of floating point registers */ +) + +const ( + /* RBasePPC64 = 4096 */ + /* R0=4096 ... R31=4127 */ + REG_R0 = obj.RBasePPC64 + iota + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + REG_R16 + REG_R17 + REG_R18 + REG_R19 + REG_R20 + REG_R21 + REG_R22 + REG_R23 + REG_R24 + REG_R25 + REG_R26 + REG_R27 + REG_R28 + REG_R29 + REG_R30 + REG_R31 + + // CR bits. Use Book 1, chapter 2 naming for bits. Keep aligned to 32 + REG_CR0LT + REG_CR0GT + REG_CR0EQ + REG_CR0SO + REG_CR1LT + REG_CR1GT + REG_CR1EQ + REG_CR1SO + REG_CR2LT + REG_CR2GT + REG_CR2EQ + REG_CR2SO + REG_CR3LT + REG_CR3GT + REG_CR3EQ + REG_CR3SO + REG_CR4LT + REG_CR4GT + REG_CR4EQ + REG_CR4SO + REG_CR5LT + REG_CR5GT + REG_CR5EQ + REG_CR5SO + REG_CR6LT + REG_CR6GT + REG_CR6EQ + REG_CR6SO + REG_CR7LT + REG_CR7GT + REG_CR7EQ + REG_CR7SO + + /* Align FPR and VSR vectors such that when masked with 0x3F they produce + an equivalent VSX register. */ + /* F0=4160 ... F31=4191 */ + REG_F0 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + REG_F16 + REG_F17 + REG_F18 + REG_F19 + REG_F20 + REG_F21 + REG_F22 + REG_F23 + REG_F24 + REG_F25 + REG_F26 + REG_F27 + REG_F28 + REG_F29 + REG_F30 + REG_F31 + + /* V0=4192 ... V31=4223 */ + REG_V0 + REG_V1 + REG_V2 + REG_V3 + REG_V4 + REG_V5 + REG_V6 + REG_V7 + REG_V8 + REG_V9 + REG_V10 + REG_V11 + REG_V12 + REG_V13 + REG_V14 + REG_V15 + REG_V16 + REG_V17 + REG_V18 + REG_V19 + REG_V20 + REG_V21 + REG_V22 + REG_V23 + REG_V24 + REG_V25 + REG_V26 + REG_V27 + REG_V28 + REG_V29 + REG_V30 + REG_V31 + + /* VS0=4224 ... VS63=4287 */ + REG_VS0 + REG_VS1 + REG_VS2 + REG_VS3 + REG_VS4 + REG_VS5 + REG_VS6 + REG_VS7 + REG_VS8 + REG_VS9 + REG_VS10 + REG_VS11 + REG_VS12 + REG_VS13 + REG_VS14 + REG_VS15 + REG_VS16 + REG_VS17 + REG_VS18 + REG_VS19 + REG_VS20 + REG_VS21 + REG_VS22 + REG_VS23 + REG_VS24 + REG_VS25 + REG_VS26 + REG_VS27 + REG_VS28 + REG_VS29 + REG_VS30 + REG_VS31 + REG_VS32 + REG_VS33 + REG_VS34 + REG_VS35 + REG_VS36 + REG_VS37 + REG_VS38 + REG_VS39 + REG_VS40 + REG_VS41 + REG_VS42 + REG_VS43 + REG_VS44 + REG_VS45 + REG_VS46 + REG_VS47 + REG_VS48 + REG_VS49 + REG_VS50 + REG_VS51 + REG_VS52 + REG_VS53 + REG_VS54 + REG_VS55 + REG_VS56 + REG_VS57 + REG_VS58 + REG_VS59 + REG_VS60 + REG_VS61 + REG_VS62 + REG_VS63 + + REG_CR0 + REG_CR1 + REG_CR2 + REG_CR3 + REG_CR4 + REG_CR5 + REG_CR6 + REG_CR7 + + // MMA accumulator registers, these shadow VSR 0-31 + // e.g MMAx shadows VSRx*4-VSRx*4+3 or + // MMA0 shadows VSR0-VSR3 + REG_A0 + REG_A1 + REG_A2 + REG_A3 + REG_A4 + REG_A5 + REG_A6 + REG_A7 + + REG_MSR + REG_FPSCR + REG_CR + + REG_SPECIAL = REG_CR0 + + REG_CRBIT0 = REG_CR0LT // An alias for a Condition Register bit 0 + + REG_SPR0 = obj.RBasePPC64 + 1024 // first of 1024 registers + + REG_XER = REG_SPR0 + 1 + REG_LR = REG_SPR0 + 8 + REG_CTR = REG_SPR0 + 9 + + REGZERO = REG_R0 /* set to zero */ + REGSP = REG_R1 + REGSB = REG_R2 + REGRET = REG_R3 + REGARG = -1 /* -1 disables passing the first argument in register */ + REGRT1 = REG_R20 /* reserved for runtime, duffzero and duffcopy */ + REGRT2 = REG_R21 /* reserved for runtime, duffcopy */ + REGMIN = REG_R7 /* register variables allocated from here to REGMAX */ + REGCTXT = REG_R11 /* context for closures */ + REGTLS = REG_R13 /* C ABI TLS base pointer */ + REGMAX = REG_R27 + REGEXT = REG_R30 /* external registers allocated from here down */ + REGG = REG_R30 /* G */ + REGTMP = REG_R31 /* used by the linker */ + FREGRET = REG_F0 + FREGMIN = REG_F17 /* first register variable */ + FREGMAX = REG_F26 /* last register variable for 9g only */ + FREGEXT = REG_F26 /* first external register */ +) + +// OpenPOWER ABI for Linux Supplement Power Architecture 64-Bit ELF V2 ABI +// https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture +var PPC64DWARFRegisters = map[int16]int16{} + +func init() { + // f assigns dwarfregister[from:to] = (base):(to-from+base) + f := func(from, to, base int16) { + for r := int16(from); r <= to; r++ { + PPC64DWARFRegisters[r] = r - from + base + } + } + f(REG_R0, REG_R31, 0) + f(REG_F0, REG_F31, 32) + f(REG_V0, REG_V31, 77) + f(REG_CR0, REG_CR7, 68) + + f(REG_VS0, REG_VS31, 32) // overlaps F0-F31 + f(REG_VS32, REG_VS63, 77) // overlaps V0-V31 + PPC64DWARFRegisters[REG_LR] = 65 + PPC64DWARFRegisters[REG_CTR] = 66 + PPC64DWARFRegisters[REG_XER] = 76 +} + +/* + * GENERAL: + * + * compiler allocates R3 up as temps + * compiler allocates register variables R7-R27 + * compiler allocates external registers R30 down + * + * compiler allocates register variables F17-F26 + * compiler allocates external registers F26 down + */ +const ( + BIG = 32768 - 8 +) + +const ( + /* mark flags */ + LABEL = 1 << 0 + LEAF = 1 << 1 + FLOAT = 1 << 2 + BRANCH = 1 << 3 + LOAD = 1 << 4 + FCMP = 1 << 5 + SYNC = 1 << 6 + LIST = 1 << 7 + FOLL = 1 << 8 + NOSCHED = 1 << 9 + PFX_X64B = 1 << 10 // A prefixed instruction crossing a 64B boundary +) + +// Values for use in branch instruction BC +// BC B0,BI,label +// BO is type of branch + likely bits described below +// BI is CR value + branch type +// ex: BEQ CR2,label is BC 12,10,label +// 12 = BO_BCR +// 10 = BI_CR2 + BI_EQ + +const ( + BI_CR0 = 0 + BI_CR1 = 4 + BI_CR2 = 8 + BI_CR3 = 12 + BI_CR4 = 16 + BI_CR5 = 20 + BI_CR6 = 24 + BI_CR7 = 28 + BI_LT = 0 + BI_GT = 1 + BI_EQ = 2 + BI_FU = 3 +) + +// Common values for the BO field. + +const ( + BO_ALWAYS = 20 // branch unconditionally + BO_BCTR = 16 // decrement ctr, branch on ctr != 0 + BO_NOTBCTR = 18 // decrement ctr, branch on ctr == 0 + BO_BCR = 12 // branch on cr value + BO_BCRBCTR = 8 // decrement ctr, branch on ctr != 0 and cr value + BO_NOTBCR = 4 // branch on not cr value +) + +// Bit settings from the CR + +const ( + C_COND_LT = iota // 0 result is negative + C_COND_GT // 1 result is positive + C_COND_EQ // 2 result is zero + C_COND_SO // 3 summary overflow or FP compare w/ NaN +) + +const ( + C_NONE = iota + C_REGP /* An even numbered gpr which can be used a gpr pair argument */ + C_REG /* Any gpr register */ + C_FREGP /* An even numbered fpr which can be used a fpr pair argument */ + C_FREG /* Any fpr register */ + C_VREG /* Any vector register */ + C_VSREGP /* An even numbered vsx register which can be used as a vsx register pair argument */ + C_VSREG /* Any vector-scalar register */ + C_CREG /* The condition registor (CR) */ + C_CRBIT /* A single bit of the CR register (0-31) */ + C_SPR /* special processor register */ + C_AREG /* MMA accumulator register */ + C_ZCON /* The constant zero */ + C_U1CON /* 1 bit unsigned constant */ + C_U2CON /* 2 bit unsigned constant */ + C_U3CON /* 3 bit unsigned constant */ + C_U4CON /* 4 bit unsigned constant */ + C_U5CON /* 5 bit unsigned constant */ + C_U8CON /* 8 bit unsigned constant */ + C_U15CON /* 15 bit unsigned constant */ + C_S16CON /* 16 bit signed constant */ + C_U16CON /* 16 bit unsigned constant */ + C_32S16CON /* Any 32 bit constant of the form 0x....0000, signed or unsigned */ + C_32CON /* Any constant which fits into 32 bits. Can be signed or unsigned */ + C_S34CON /* 34 bit signed constant */ + C_64CON /* Any constant which fits into 64 bits. Can be signed or unsigned */ + C_SACON /* $n(REG) where n <= int16 */ + C_LACON /* $n(REG) where n <= int32 */ + C_DACON /* $n(REG) where n <= int64 */ + C_SBRA /* A short offset argument to a branching instruction */ + C_LBRA /* A long offset argument to a branching instruction */ + C_LBRAPIC /* Like C_LBRA, but requires an extra NOP for potential TOC restore by the linker. */ + C_ZOREG /* An $0+reg memory op */ + C_SOREG /* An $n+reg memory arg where n is a 16 bit signed offset */ + C_LOREG /* An $n+reg memory arg where n is a 32 bit signed offset */ + C_XOREG /* An reg+reg memory arg */ + C_FPSCR /* The fpscr register */ + C_XER /* The xer, holds the carry bit */ + C_LR /* The link register */ + C_CTR /* The count register */ + C_ANY /* Any argument */ + C_GOK /* A non-matched argument */ + C_ADDR /* A symbolic memory location */ + C_TLS_LE /* A thread local, local-exec, type memory arg */ + C_TLS_IE /* A thread local, initial-exec, type memory arg */ + C_TEXTSIZE /* An argument with Type obj.TYPE_TEXTSIZE */ + + C_NCLASS /* must be the last */ + + /* Aliased names which should be cleaned up, or integrated. */ + C_SCON = C_U15CON + C_UCON = C_32S16CON + C_ADDCON = C_S16CON + C_ANDCON = C_U16CON + C_LCON = C_32CON + + /* Aliased names which may be generated by ppc64map for the optab. */ + C_S3216CON = C_32S16CON // TODO: these should be treated differently (e.g xoris vs addis) + C_U3216CON = C_32S16CON + C_S32CON = C_32CON + C_U32CON = C_32CON +) + +const ( + AADD = obj.ABasePPC64 + obj.A_ARCHSPECIFIC + iota + AADDCC + AADDIS + AADDV + AADDVCC + AADDC + AADDCCC + AADDCV + AADDCVCC + AADDME + AADDMECC + AADDMEVCC + AADDMEV + AADDE + AADDECC + AADDEVCC + AADDEV + AADDZE + AADDZECC + AADDZEVCC + AADDZEV + AADDEX + AAND + AANDCC + AANDN + AANDNCC + AANDISCC + ABC + ABCL + ABEQ + ABGE // not LT = G/E/U + ABGT + ABLE // not GT = L/E/U + ABLT + ABNE // not EQ = L/G/U + ABVC // Branch if float not unordered (also branch on not summary overflow) + ABVS // Branch if float unordered (also branch on summary overflow) + ABDNZ // Decrement CTR, and branch if CTR != 0 + ABDZ // Decrement CTR, and branch if CTR == 0 + ACMP + ACMPU + ACMPEQB + ACNTLZW + ACNTLZWCC + ACRAND + ACRANDN + ACREQV + ACRNAND + ACRNOR + ACROR + ACRORN + ACRXOR + ADIVW + ADIVWCC + ADIVWVCC + ADIVWV + ADIVWU + ADIVWUCC + ADIVWUVCC + ADIVWUV + AMODUD + AMODUW + AMODSD + AMODSW + AEQV + AEQVCC + AEXTSB + AEXTSBCC + AEXTSH + AEXTSHCC + AFABS + AFABSCC + AFADD + AFADDCC + AFADDS + AFADDSCC + AFCMPO + AFCMPU + AFCTIW + AFCTIWCC + AFCTIWZ + AFCTIWZCC + AFDIV + AFDIVCC + AFDIVS + AFDIVSCC + AFMADD + AFMADDCC + AFMADDS + AFMADDSCC + AFMOVD + AFMOVDCC + AFMOVDU + AFMOVS + AFMOVSU + AFMOVSX + AFMOVSZ + AFMSUB + AFMSUBCC + AFMSUBS + AFMSUBSCC + AFMUL + AFMULCC + AFMULS + AFMULSCC + AFNABS + AFNABSCC + AFNEG + AFNEGCC + AFNMADD + AFNMADDCC + AFNMADDS + AFNMADDSCC + AFNMSUB + AFNMSUBCC + AFNMSUBS + AFNMSUBSCC + AFRSP + AFRSPCC + AFSUB + AFSUBCC + AFSUBS + AFSUBSCC + AISEL + AMOVMW + ALBAR + ALHAR + ALSW + ALWAR + ALWSYNC + AMOVDBR + AMOVWBR + AMOVB + AMOVBU + AMOVBZ + AMOVBZU + AMOVH + AMOVHBR + AMOVHU + AMOVHZ + AMOVHZU + AMOVW + AMOVWU + AMOVFL + AMOVCRFS + AMTFSB0 + AMTFSB0CC + AMTFSB1 + AMTFSB1CC + AMULHW + AMULHWCC + AMULHWU + AMULHWUCC + AMULLW + AMULLWCC + AMULLWVCC + AMULLWV + ANAND + ANANDCC + ANEG + ANEGCC + ANEGVCC + ANEGV + ANOR + ANORCC + AOR + AORCC + AORN + AORNCC + AORIS + AREM + AREMU + ARFI + ARLWMI + ARLWMICC + ARLWNM + ARLWNMCC + ACLRLSLWI + ASLW + ASLWCC + ASRW + ASRAW + ASRAWCC + ASRWCC + ASTBCCC + ASTHCCC + ASTSW + ASTWCCC + ASUB + ASUBCC + ASUBVCC + ASUBC + ASUBCCC + ASUBCV + ASUBCVCC + ASUBME + ASUBMECC + ASUBMEVCC + ASUBMEV + ASUBV + ASUBE + ASUBECC + ASUBEV + ASUBEVCC + ASUBZE + ASUBZECC + ASUBZEVCC + ASUBZEV + ASYNC + AXOR + AXORCC + AXORIS + + ADCBF + ADCBI + ADCBST + ADCBT + ADCBTST + ADCBZ + AEIEIO + AICBI + AISYNC + APTESYNC + ATLBIE + ATLBIEL + ATLBSYNC + ATW + + ASYSCALL + AWORD + + ARFCI + + AFCPSGN + AFCPSGNCC + /* optional on 32-bit */ + AFRES + AFRESCC + AFRIM + AFRIMCC + AFRIP + AFRIPCC + AFRIZ + AFRIZCC + AFRIN + AFRINCC + AFRSQRTE + AFRSQRTECC + AFSEL + AFSELCC + AFSQRT + AFSQRTCC + AFSQRTS + AFSQRTSCC + + /* 64-bit */ + + ACNTLZD + ACNTLZDCC + ACMPW /* CMP with L=0 */ + ACMPWU + ACMPB + AFTDIV + AFTSQRT + ADIVD + ADIVDCC + ADIVDE + ADIVDECC + ADIVDEU + ADIVDEUCC + ADIVDVCC + ADIVDV + ADIVDU + ADIVDUCC + ADIVDUVCC + ADIVDUV + AEXTSW + AEXTSWCC + /* AFCFIW; AFCFIWCC */ + AFCFID + AFCFIDCC + AFCFIDU + AFCFIDUCC + AFCFIDS + AFCFIDSCC + AFCTID + AFCTIDCC + AFCTIDZ + AFCTIDZCC + ALDAR + AMOVD + AMOVDU + AMOVWZ + AMOVWZU + AMULHD + AMULHDCC + AMULHDU + AMULHDUCC + AMULLD + AMULLDCC + AMULLDVCC + AMULLDV + ARFID + ARLDMI + ARLDMICC + ARLDIMI + ARLDIMICC + ARLDC + ARLDCCC + ARLDCR + ARLDCRCC + ARLDICR + ARLDICRCC + ARLDCL + ARLDCLCC + ARLDICL + ARLDICLCC + ARLDIC + ARLDICCC + ACLRLSLDI + AROTL + AROTLW + ASLBIA + ASLBIE + ASLBMFEE + ASLBMFEV + ASLBMTE + ASLD + ASLDCC + ASRD + ASRAD + ASRADCC + ASRDCC + AEXTSWSLI + AEXTSWSLICC + ASTDCCC + ATD + + /* 64-bit pseudo operation */ + ADWORD + AREMD + AREMDU + + /* more 64-bit operations */ + AHRFID + APOPCNTD + APOPCNTW + APOPCNTB + ACNTTZW + ACNTTZWCC + ACNTTZD + ACNTTZDCC + ACOPY + APASTECC + ADARN + AMADDHD + AMADDHDU + AMADDLD + + /* Vector */ + ALVEBX + ALVEHX + ALVEWX + ALVX + ALVXL + ALVSL + ALVSR + ASTVEBX + ASTVEHX + ASTVEWX + ASTVX + ASTVXL + AVAND + AVANDC + AVNAND + AVOR + AVORC + AVNOR + AVXOR + AVEQV + AVADDUM + AVADDUBM + AVADDUHM + AVADDUWM + AVADDUDM + AVADDUQM + AVADDCU + AVADDCUQ + AVADDCUW + AVADDUS + AVADDUBS + AVADDUHS + AVADDUWS + AVADDSS + AVADDSBS + AVADDSHS + AVADDSWS + AVADDE + AVADDEUQM + AVADDECUQ + AVSUBUM + AVSUBUBM + AVSUBUHM + AVSUBUWM + AVSUBUDM + AVSUBUQM + AVSUBCU + AVSUBCUQ + AVSUBCUW + AVSUBUS + AVSUBUBS + AVSUBUHS + AVSUBUWS + AVSUBSS + AVSUBSBS + AVSUBSHS + AVSUBSWS + AVSUBE + AVSUBEUQM + AVSUBECUQ + AVMULESB + AVMULOSB + AVMULEUB + AVMULOUB + AVMULESH + AVMULOSH + AVMULEUH + AVMULOUH + AVMULESW + AVMULOSW + AVMULEUW + AVMULOUW + AVMULUWM + AVPMSUM + AVPMSUMB + AVPMSUMH + AVPMSUMW + AVPMSUMD + AVMSUMUDM + AVR + AVRLB + AVRLH + AVRLW + AVRLD + AVS + AVSLB + AVSLH + AVSLW + AVSL + AVSLO + AVSRB + AVSRH + AVSRW + AVSR + AVSRO + AVSLD + AVSRD + AVSA + AVSRAB + AVSRAH + AVSRAW + AVSRAD + AVSOI + AVSLDOI + AVCLZ + AVCLZB + AVCLZH + AVCLZW + AVCLZD + AVPOPCNT + AVPOPCNTB + AVPOPCNTH + AVPOPCNTW + AVPOPCNTD + AVCMPEQ + AVCMPEQUB + AVCMPEQUBCC + AVCMPEQUH + AVCMPEQUHCC + AVCMPEQUW + AVCMPEQUWCC + AVCMPEQUD + AVCMPEQUDCC + AVCMPGT + AVCMPGTUB + AVCMPGTUBCC + AVCMPGTUH + AVCMPGTUHCC + AVCMPGTUW + AVCMPGTUWCC + AVCMPGTUD + AVCMPGTUDCC + AVCMPGTSB + AVCMPGTSBCC + AVCMPGTSH + AVCMPGTSHCC + AVCMPGTSW + AVCMPGTSWCC + AVCMPGTSD + AVCMPGTSDCC + AVCMPNEZB + AVCMPNEZBCC + AVCMPNEB + AVCMPNEBCC + AVCMPNEH + AVCMPNEHCC + AVCMPNEW + AVCMPNEWCC + AVPERM + AVPERMXOR + AVPERMR + AVBPERMQ + AVBPERMD + AVSEL + AVSPLTB + AVSPLTH + AVSPLTW + AVSPLTISB + AVSPLTISH + AVSPLTISW + AVCIPH + AVCIPHER + AVCIPHERLAST + AVNCIPH + AVNCIPHER + AVNCIPHERLAST + AVSBOX + AVSHASIGMA + AVSHASIGMAW + AVSHASIGMAD + AVMRGEW + AVMRGOW + + /* VSX */ + ALXV + ALXVL + ALXVLL + ALXVD2X + ALXVW4X + ALXVH8X + ALXVB16X + ALXVX + ALXVDSX + ASTXV + ASTXVL + ASTXVLL + ASTXVD2X + ASTXVW4X + ASTXVH8X + ASTXVB16X + ASTXVX + ALXSDX + ASTXSDX + ALXSIWAX + ALXSIWZX + ASTXSIWX + AMFVSRD + AMFFPRD + AMFVRD + AMFVSRWZ + AMFVSRLD + AMTVSRD + AMTFPRD + AMTVRD + AMTVSRWA + AMTVSRWZ + AMTVSRDD + AMTVSRWS + AXXLAND + AXXLANDC + AXXLEQV + AXXLNAND + AXXLOR + AXXLORC + AXXLNOR + AXXLORQ + AXXLXOR + AXXSEL + AXXMRGHW + AXXMRGLW + AXXSPLTW + AXXSPLTIB + AXXPERM + AXXPERMDI + AXXSLDWI + AXXBRQ + AXXBRD + AXXBRW + AXXBRH + AXSCVDPSP + AXSCVSPDP + AXSCVDPSPN + AXSCVSPDPN + AXVCVDPSP + AXVCVSPDP + AXSCVDPSXDS + AXSCVDPSXWS + AXSCVDPUXDS + AXSCVDPUXWS + AXSCVSXDDP + AXSCVUXDDP + AXSCVSXDSP + AXSCVUXDSP + AXVCVDPSXDS + AXVCVDPSXWS + AXVCVDPUXDS + AXVCVDPUXWS + AXVCVSPSXDS + AXVCVSPSXWS + AXVCVSPUXDS + AXVCVSPUXWS + AXVCVSXDDP + AXVCVSXWDP + AXVCVUXDDP + AXVCVUXWDP + AXVCVSXDSP + AXVCVSXWSP + AXVCVUXDSP + AXVCVUXWSP + ALASTAOUT // The last instruction in this list. Also the first opcode generated by ppc64map. + + // aliases + ABR = obj.AJMP + ABL = obj.ACALL + ALAST = ALASTGEN // The final enumerated instruction value + 1. This is used to size the oprange table. +) diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go new file mode 100644 index 0000000..05014ec --- /dev/null +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -0,0 +1,611 @@ +// Code generated by stringer -i a.out.go -o anames.go -p ppc64; DO NOT EDIT. + +package ppc64 + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "ADD", + "ADDCC", + "ADDIS", + "ADDV", + "ADDVCC", + "ADDC", + "ADDCCC", + "ADDCV", + "ADDCVCC", + "ADDME", + "ADDMECC", + "ADDMEVCC", + "ADDMEV", + "ADDE", + "ADDECC", + "ADDEVCC", + "ADDEV", + "ADDZE", + "ADDZECC", + "ADDZEVCC", + "ADDZEV", + "ADDEX", + "AND", + "ANDCC", + "ANDN", + "ANDNCC", + "ANDISCC", + "BC", + "BCL", + "BEQ", + "BGE", + "BGT", + "BLE", + "BLT", + "BNE", + "BVC", + "BVS", + "BDNZ", + "BDZ", + "CMP", + "CMPU", + "CMPEQB", + "CNTLZW", + "CNTLZWCC", + "CRAND", + "CRANDN", + "CREQV", + "CRNAND", + "CRNOR", + "CROR", + "CRORN", + "CRXOR", + "DIVW", + "DIVWCC", + "DIVWVCC", + "DIVWV", + "DIVWU", + "DIVWUCC", + "DIVWUVCC", + "DIVWUV", + "MODUD", + "MODUW", + "MODSD", + "MODSW", + "EQV", + "EQVCC", + "EXTSB", + "EXTSBCC", + "EXTSH", + "EXTSHCC", + "FABS", + "FABSCC", + "FADD", + "FADDCC", + "FADDS", + "FADDSCC", + "FCMPO", + "FCMPU", + "FCTIW", + "FCTIWCC", + "FCTIWZ", + "FCTIWZCC", + "FDIV", + "FDIVCC", + "FDIVS", + "FDIVSCC", + "FMADD", + "FMADDCC", + "FMADDS", + "FMADDSCC", + "FMOVD", + "FMOVDCC", + "FMOVDU", + "FMOVS", + "FMOVSU", + "FMOVSX", + "FMOVSZ", + "FMSUB", + "FMSUBCC", + "FMSUBS", + "FMSUBSCC", + "FMUL", + "FMULCC", + "FMULS", + "FMULSCC", + "FNABS", + "FNABSCC", + "FNEG", + "FNEGCC", + "FNMADD", + "FNMADDCC", + "FNMADDS", + "FNMADDSCC", + "FNMSUB", + "FNMSUBCC", + "FNMSUBS", + "FNMSUBSCC", + "FRSP", + "FRSPCC", + "FSUB", + "FSUBCC", + "FSUBS", + "FSUBSCC", + "ISEL", + "MOVMW", + "LBAR", + "LHAR", + "LSW", + "LWAR", + "LWSYNC", + "MOVDBR", + "MOVWBR", + "MOVB", + "MOVBU", + "MOVBZ", + "MOVBZU", + "MOVH", + "MOVHBR", + "MOVHU", + "MOVHZ", + "MOVHZU", + "MOVW", + "MOVWU", + "MOVFL", + "MOVCRFS", + "MTFSB0", + "MTFSB0CC", + "MTFSB1", + "MTFSB1CC", + "MULHW", + "MULHWCC", + "MULHWU", + "MULHWUCC", + "MULLW", + "MULLWCC", + "MULLWVCC", + "MULLWV", + "NAND", + "NANDCC", + "NEG", + "NEGCC", + "NEGVCC", + "NEGV", + "NOR", + "NORCC", + "OR", + "ORCC", + "ORN", + "ORNCC", + "ORIS", + "REM", + "REMU", + "RFI", + "RLWMI", + "RLWMICC", + "RLWNM", + "RLWNMCC", + "CLRLSLWI", + "SLW", + "SLWCC", + "SRW", + "SRAW", + "SRAWCC", + "SRWCC", + "STBCCC", + "STHCCC", + "STSW", + "STWCCC", + "SUB", + "SUBCC", + "SUBVCC", + "SUBC", + "SUBCCC", + "SUBCV", + "SUBCVCC", + "SUBME", + "SUBMECC", + "SUBMEVCC", + "SUBMEV", + "SUBV", + "SUBE", + "SUBECC", + "SUBEV", + "SUBEVCC", + "SUBZE", + "SUBZECC", + "SUBZEVCC", + "SUBZEV", + "SYNC", + "XOR", + "XORCC", + "XORIS", + "DCBF", + "DCBI", + "DCBST", + "DCBT", + "DCBTST", + "DCBZ", + "EIEIO", + "ICBI", + "ISYNC", + "PTESYNC", + "TLBIE", + "TLBIEL", + "TLBSYNC", + "TW", + "SYSCALL", + "WORD", + "RFCI", + "FCPSGN", + "FCPSGNCC", + "FRES", + "FRESCC", + "FRIM", + "FRIMCC", + "FRIP", + "FRIPCC", + "FRIZ", + "FRIZCC", + "FRIN", + "FRINCC", + "FRSQRTE", + "FRSQRTECC", + "FSEL", + "FSELCC", + "FSQRT", + "FSQRTCC", + "FSQRTS", + "FSQRTSCC", + "CNTLZD", + "CNTLZDCC", + "CMPW", + "CMPWU", + "CMPB", + "FTDIV", + "FTSQRT", + "DIVD", + "DIVDCC", + "DIVDE", + "DIVDECC", + "DIVDEU", + "DIVDEUCC", + "DIVDVCC", + "DIVDV", + "DIVDU", + "DIVDUCC", + "DIVDUVCC", + "DIVDUV", + "EXTSW", + "EXTSWCC", + "FCFID", + "FCFIDCC", + "FCFIDU", + "FCFIDUCC", + "FCFIDS", + "FCFIDSCC", + "FCTID", + "FCTIDCC", + "FCTIDZ", + "FCTIDZCC", + "LDAR", + "MOVD", + "MOVDU", + "MOVWZ", + "MOVWZU", + "MULHD", + "MULHDCC", + "MULHDU", + "MULHDUCC", + "MULLD", + "MULLDCC", + "MULLDVCC", + "MULLDV", + "RFID", + "RLDMI", + "RLDMICC", + "RLDIMI", + "RLDIMICC", + "RLDC", + "RLDCCC", + "RLDCR", + "RLDCRCC", + "RLDICR", + "RLDICRCC", + "RLDCL", + "RLDCLCC", + "RLDICL", + "RLDICLCC", + "RLDIC", + "RLDICCC", + "CLRLSLDI", + "ROTL", + "ROTLW", + "SLBIA", + "SLBIE", + "SLBMFEE", + "SLBMFEV", + "SLBMTE", + "SLD", + "SLDCC", + "SRD", + "SRAD", + "SRADCC", + "SRDCC", + "EXTSWSLI", + "EXTSWSLICC", + "STDCCC", + "TD", + "DWORD", + "REMD", + "REMDU", + "HRFID", + "POPCNTD", + "POPCNTW", + "POPCNTB", + "CNTTZW", + "CNTTZWCC", + "CNTTZD", + "CNTTZDCC", + "COPY", + "PASTECC", + "DARN", + "MADDHD", + "MADDHDU", + "MADDLD", + "LVEBX", + "LVEHX", + "LVEWX", + "LVX", + "LVXL", + "LVSL", + "LVSR", + "STVEBX", + "STVEHX", + "STVEWX", + "STVX", + "STVXL", + "VAND", + "VANDC", + "VNAND", + "VOR", + "VORC", + "VNOR", + "VXOR", + "VEQV", + "VADDUM", + "VADDUBM", + "VADDUHM", + "VADDUWM", + "VADDUDM", + "VADDUQM", + "VADDCU", + "VADDCUQ", + "VADDCUW", + "VADDUS", + "VADDUBS", + "VADDUHS", + "VADDUWS", + "VADDSS", + "VADDSBS", + "VADDSHS", + "VADDSWS", + "VADDE", + "VADDEUQM", + "VADDECUQ", + "VSUBUM", + "VSUBUBM", + "VSUBUHM", + "VSUBUWM", + "VSUBUDM", + "VSUBUQM", + "VSUBCU", + "VSUBCUQ", + "VSUBCUW", + "VSUBUS", + "VSUBUBS", + "VSUBUHS", + "VSUBUWS", + "VSUBSS", + "VSUBSBS", + "VSUBSHS", + "VSUBSWS", + "VSUBE", + "VSUBEUQM", + "VSUBECUQ", + "VMULESB", + "VMULOSB", + "VMULEUB", + "VMULOUB", + "VMULESH", + "VMULOSH", + "VMULEUH", + "VMULOUH", + "VMULESW", + "VMULOSW", + "VMULEUW", + "VMULOUW", + "VMULUWM", + "VPMSUM", + "VPMSUMB", + "VPMSUMH", + "VPMSUMW", + "VPMSUMD", + "VMSUMUDM", + "VR", + "VRLB", + "VRLH", + "VRLW", + "VRLD", + "VS", + "VSLB", + "VSLH", + "VSLW", + "VSL", + "VSLO", + "VSRB", + "VSRH", + "VSRW", + "VSR", + "VSRO", + "VSLD", + "VSRD", + "VSA", + "VSRAB", + "VSRAH", + "VSRAW", + "VSRAD", + "VSOI", + "VSLDOI", + "VCLZ", + "VCLZB", + "VCLZH", + "VCLZW", + "VCLZD", + "VPOPCNT", + "VPOPCNTB", + "VPOPCNTH", + "VPOPCNTW", + "VPOPCNTD", + "VCMPEQ", + "VCMPEQUB", + "VCMPEQUBCC", + "VCMPEQUH", + "VCMPEQUHCC", + "VCMPEQUW", + "VCMPEQUWCC", + "VCMPEQUD", + "VCMPEQUDCC", + "VCMPGT", + "VCMPGTUB", + "VCMPGTUBCC", + "VCMPGTUH", + "VCMPGTUHCC", + "VCMPGTUW", + "VCMPGTUWCC", + "VCMPGTUD", + "VCMPGTUDCC", + "VCMPGTSB", + "VCMPGTSBCC", + "VCMPGTSH", + "VCMPGTSHCC", + "VCMPGTSW", + "VCMPGTSWCC", + "VCMPGTSD", + "VCMPGTSDCC", + "VCMPNEZB", + "VCMPNEZBCC", + "VCMPNEB", + "VCMPNEBCC", + "VCMPNEH", + "VCMPNEHCC", + "VCMPNEW", + "VCMPNEWCC", + "VPERM", + "VPERMXOR", + "VPERMR", + "VBPERMQ", + "VBPERMD", + "VSEL", + "VSPLTB", + "VSPLTH", + "VSPLTW", + "VSPLTISB", + "VSPLTISH", + "VSPLTISW", + "VCIPH", + "VCIPHER", + "VCIPHERLAST", + "VNCIPH", + "VNCIPHER", + "VNCIPHERLAST", + "VSBOX", + "VSHASIGMA", + "VSHASIGMAW", + "VSHASIGMAD", + "VMRGEW", + "VMRGOW", + "LXV", + "LXVL", + "LXVLL", + "LXVD2X", + "LXVW4X", + "LXVH8X", + "LXVB16X", + "LXVX", + "LXVDSX", + "STXV", + "STXVL", + "STXVLL", + "STXVD2X", + "STXVW4X", + "STXVH8X", + "STXVB16X", + "STXVX", + "LXSDX", + "STXSDX", + "LXSIWAX", + "LXSIWZX", + "STXSIWX", + "MFVSRD", + "MFFPRD", + "MFVRD", + "MFVSRWZ", + "MFVSRLD", + "MTVSRD", + "MTFPRD", + "MTVRD", + "MTVSRWA", + "MTVSRWZ", + "MTVSRDD", + "MTVSRWS", + "XXLAND", + "XXLANDC", + "XXLEQV", + "XXLNAND", + "XXLOR", + "XXLORC", + "XXLNOR", + "XXLORQ", + "XXLXOR", + "XXSEL", + "XXMRGHW", + "XXMRGLW", + "XXSPLTW", + "XXSPLTIB", + "XXPERM", + "XXPERMDI", + "XXSLDWI", + "XXBRQ", + "XXBRD", + "XXBRW", + "XXBRH", + "XSCVDPSP", + "XSCVSPDP", + "XSCVDPSPN", + "XSCVSPDPN", + "XVCVDPSP", + "XVCVSPDP", + "XSCVDPSXDS", + "XSCVDPSXWS", + "XSCVDPUXDS", + "XSCVDPUXWS", + "XSCVSXDDP", + "XSCVUXDDP", + "XSCVSXDSP", + "XSCVUXDSP", + "XVCVDPSXDS", + "XVCVDPSXWS", + "XVCVDPUXDS", + "XVCVDPUXWS", + "XVCVSPSXDS", + "XVCVSPSXWS", + "XVCVSPUXDS", + "XVCVSPUXWS", + "XVCVSXDDP", + "XVCVSXWDP", + "XVCVUXDDP", + "XVCVUXWDP", + "XVCVSXDSP", + "XVCVSXWSP", + "XVCVUXDSP", + "XVCVUXWSP", + "LASTAOUT", +} diff --git a/src/cmd/internal/obj/ppc64/anames9.go b/src/cmd/internal/obj/ppc64/anames9.go new file mode 100644 index 0000000..ad6776a --- /dev/null +++ b/src/cmd/internal/obj/ppc64/anames9.go @@ -0,0 +1,55 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ppc64 + +var cnames9 = []string{ + "NONE", + "REGP", + "REG", + "FREGP", + "FREG", + "VREG", + "VSREGP", + "VSREG", + "CREG", + "CRBIT", + "SPR", + "MREG", + "ZCON", + "U1CON", + "U2CON", + "U3CON", + "U4CON", + "U5CON", + "U8CON", + "U15CON", + "S16CON", + "U16CON", + "32S16CON", + "32CON", + "S34CON", + "64CON", + "SACON", + "LACON", + "DACON", + "SBRA", + "LBRA", + "LBRAPIC", + "ZOREG", + "SOREG", + "LOREG", + "XOREG", + "FPSCR", + "XER", + "LR", + "CTR", + "ANY", + "GOK", + "ADDR", + "TLS_LE", + "TLS_IE", + "TEXTSIZE", + "NCLASS", +} diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go new file mode 100644 index 0000000..c346043 --- /dev/null +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -0,0 +1,5342 @@ +// cmd/9l/optab.c, cmd/9l/asmout.c from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package ppc64 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "encoding/binary" + "fmt" + "log" + "math" + "math/bits" + "sort" +) + +// ctxt9 holds state while assembling a single function. +// Each function gets a fresh ctxt9. +// This allows for multiple functions to be safely concurrently assembled. +type ctxt9 struct { + ctxt *obj.Link + newprog obj.ProgAlloc + cursym *obj.LSym + autosize int32 + instoffset int64 + pc int64 +} + +// Instruction layout. + +const ( + r0iszero = 1 +) + +type Optab struct { + as obj.As // Opcode + a1 uint8 // p.From argument (obj.Addr). p is of type obj.Prog. + a2 uint8 // p.Reg argument (int16 Register) + a3 uint8 // p.RestArgs[0] (obj.AddrPos) + a4 uint8 // p.RestArgs[1] + a5 uint8 // p.RestARgs[2] + a6 uint8 // p.To (obj.Addr) + type_ int8 // cases in asmout below. E.g., 44 = st r,(ra+rb); 45 = ld (ra+rb), r + size int8 // Text space in bytes to lay operation + + // A prefixed instruction is generated by this opcode. This cannot be placed + // across a 64B PC address. Opcodes should not translate to more than one + // prefixed instruction. The prefixed instruction should be written first + // (e.g when Optab.size > 8). + ispfx bool + + asmout func(*ctxt9, *obj.Prog, *Optab, *[5]uint32) +} + +// optab contains an array to be sliced of accepted operand combinations for an +// instruction. Unused arguments and fields are not explicitly enumerated, and +// should not be listed for clarity. Unused arguments and values should always +// assume the default value for the given type. +// +// optab does not list every valid ppc64 opcode, it enumerates representative +// operand combinations for a class of instruction. The variable oprange indexes +// all valid ppc64 opcodes. +// +// oprange is initialized to point a slice within optab which contains the valid +// operand combinations for a given instruction. This is initialized from buildop. +// +// Likewise, each slice of optab is dynamically sorted using the ocmp Sort interface +// to arrange entries to minimize text size of each opcode. +var optab = []Optab{ + {as: obj.ATEXT, a1: C_LOREG, a6: C_TEXTSIZE, type_: 0, size: 0}, + {as: obj.ATEXT, a1: C_LOREG, a3: C_LCON, a6: C_TEXTSIZE, type_: 0, size: 0}, + {as: obj.ATEXT, a1: C_ADDR, a6: C_TEXTSIZE, type_: 0, size: 0}, + {as: obj.ATEXT, a1: C_ADDR, a3: C_LCON, a6: C_TEXTSIZE, type_: 0, size: 0}, + /* move register */ + {as: AADD, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4}, + {as: AADD, a1: C_REG, a6: C_REG, type_: 2, size: 4}, + {as: AADD, a1: C_SCON, a2: C_REG, a6: C_REG, type_: 4, size: 4}, + {as: AADD, a1: C_SCON, a6: C_REG, type_: 4, size: 4}, + {as: AADD, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 4, size: 4}, + {as: AADD, a1: C_ADDCON, a6: C_REG, type_: 4, size: 4}, + {as: AADD, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 20, size: 4}, + {as: AADD, a1: C_UCON, a6: C_REG, type_: 20, size: 4}, + {as: AADD, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 22, size: 8}, + {as: AADD, a1: C_ANDCON, a6: C_REG, type_: 22, size: 8}, + {as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, + {as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, + {as: AADDIS, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 20, size: 4}, + {as: AADDIS, a1: C_ADDCON, a6: C_REG, type_: 20, size: 4}, + {as: AADDC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4}, + {as: AADDC, a1: C_REG, a6: C_REG, type_: 2, size: 4}, + {as: AADDC, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 4, size: 4}, + {as: AADDC, a1: C_ADDCON, a6: C_REG, type_: 4, size: 4}, + {as: AADDC, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, + {as: AADDC, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, + {as: AAND, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4}, /* logical, no literal */ + {as: AAND, a1: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: AANDCC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: AANDCC, a1: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: AANDCC, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4}, + {as: AANDCC, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4}, + {as: AANDCC, a1: C_UCON, a6: C_REG, type_: 59, size: 4}, + {as: AANDCC, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 59, size: 4}, + {as: AANDCC, a1: C_ADDCON, a6: C_REG, type_: 23, size: 8}, + {as: AANDCC, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 23, size: 8}, + {as: AANDCC, a1: C_LCON, a6: C_REG, type_: 23, size: 12}, + {as: AANDCC, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 23, size: 12}, + {as: AANDISCC, a1: C_ANDCON, a6: C_REG, type_: 59, size: 4}, + {as: AANDISCC, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 59, size: 4}, + {as: AMULLW, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4}, + {as: AMULLW, a1: C_REG, a6: C_REG, type_: 2, size: 4}, + {as: AMULLW, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 4, size: 4}, + {as: AMULLW, a1: C_ADDCON, a6: C_REG, type_: 4, size: 4}, + {as: AMULLW, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 4, size: 4}, + {as: AMULLW, a1: C_ANDCON, a6: C_REG, type_: 4, size: 4}, + {as: AMULLW, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, + {as: AMULLW, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, + {as: ASUBC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 10, size: 4}, + {as: ASUBC, a1: C_REG, a6: C_REG, type_: 10, size: 4}, + {as: ASUBC, a1: C_REG, a3: C_ADDCON, a6: C_REG, type_: 27, size: 4}, + {as: ASUBC, a1: C_REG, a3: C_LCON, a6: C_REG, type_: 28, size: 12}, + {as: AOR, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4}, /* logical, literal not cc (or/xor) */ + {as: AOR, a1: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: AOR, a1: C_ANDCON, a6: C_REG, type_: 58, size: 4}, + {as: AOR, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 58, size: 4}, + {as: AOR, a1: C_UCON, a6: C_REG, type_: 59, size: 4}, + {as: AOR, a1: C_UCON, a2: C_REG, a6: C_REG, type_: 59, size: 4}, + {as: AOR, a1: C_ADDCON, a6: C_REG, type_: 23, size: 8}, + {as: AOR, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 23, size: 8}, + {as: AOR, a1: C_LCON, a6: C_REG, type_: 23, size: 12}, + {as: AOR, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 23, size: 12}, + {as: AORIS, a1: C_ANDCON, a6: C_REG, type_: 59, size: 4}, + {as: AORIS, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 59, size: 4}, + {as: ADIVW, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4}, /* op r1[,r2],r3 */ + {as: ADIVW, a1: C_REG, a6: C_REG, type_: 2, size: 4}, + {as: ASUB, a1: C_REG, a2: C_REG, a6: C_REG, type_: 10, size: 4}, /* op r2[,r1],r3 */ + {as: ASUB, a1: C_REG, a6: C_REG, type_: 10, size: 4}, + {as: ASLW, a1: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: ASLW, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: ASLD, a1: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: ASLD, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: ASLD, a1: C_SCON, a2: C_REG, a6: C_REG, type_: 25, size: 4}, + {as: ASLD, a1: C_SCON, a6: C_REG, type_: 25, size: 4}, + {as: AEXTSWSLI, a1: C_SCON, a6: C_REG, type_: 25, size: 4}, + {as: AEXTSWSLI, a1: C_SCON, a2: C_REG, a6: C_REG, type_: 25, size: 4}, + {as: ASLW, a1: C_SCON, a2: C_REG, a6: C_REG, type_: 57, size: 4}, + {as: ASLW, a1: C_SCON, a6: C_REG, type_: 57, size: 4}, + {as: ASRAW, a1: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: ASRAW, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: ASRAW, a1: C_SCON, a2: C_REG, a6: C_REG, type_: 56, size: 4}, + {as: ASRAW, a1: C_SCON, a6: C_REG, type_: 56, size: 4}, + {as: ASRAD, a1: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: ASRAD, a1: C_REG, a2: C_REG, a6: C_REG, type_: 6, size: 4}, + {as: ASRAD, a1: C_SCON, a2: C_REG, a6: C_REG, type_: 56, size: 4}, + {as: ASRAD, a1: C_SCON, a6: C_REG, type_: 56, size: 4}, + {as: ARLWMI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 62, size: 4}, + {as: ARLWMI, a1: C_SCON, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 102, size: 4}, + {as: ARLWMI, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 63, size: 4}, + {as: ARLWMI, a1: C_REG, a2: C_REG, a3: C_SCON, a4: C_SCON, a6: C_REG, type_: 103, size: 4}, + {as: ACLRLSLWI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 62, size: 4}, + {as: ARLDMI, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 30, size: 4}, + {as: ARLDC, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 29, size: 4}, + {as: ARLDCL, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 29, size: 4}, + {as: ARLDCL, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 14, size: 4}, + {as: ARLDICL, a1: C_REG, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 14, size: 4}, + {as: ARLDICL, a1: C_SCON, a2: C_REG, a3: C_LCON, a6: C_REG, type_: 14, size: 4}, + {as: ARLDCL, a1: C_REG, a3: C_LCON, a6: C_REG, type_: 14, size: 4}, + {as: AFADD, a1: C_FREG, a6: C_FREG, type_: 2, size: 4}, + {as: AFADD, a1: C_FREG, a2: C_FREG, a6: C_FREG, type_: 2, size: 4}, + {as: AFABS, a1: C_FREG, a6: C_FREG, type_: 33, size: 4}, + {as: AFABS, a6: C_FREG, type_: 33, size: 4}, + {as: AFMADD, a1: C_FREG, a2: C_FREG, a3: C_FREG, a6: C_FREG, type_: 34, size: 4}, + {as: AFMUL, a1: C_FREG, a6: C_FREG, type_: 32, size: 4}, + {as: AFMUL, a1: C_FREG, a2: C_FREG, a6: C_FREG, type_: 32, size: 4}, + + {as: AMOVBU, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, + {as: AMOVBU, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, + {as: AMOVBU, a1: C_SOREG, a6: C_REG, type_: 8, size: 8}, + {as: AMOVBU, a1: C_XOREG, a6: C_REG, type_: 109, size: 8}, + + {as: AMOVBZU, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, + {as: AMOVBZU, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, + {as: AMOVBZU, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, + {as: AMOVBZU, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, + + {as: AMOVHBR, a1: C_REG, a6: C_XOREG, type_: 44, size: 4}, + {as: AMOVHBR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4}, + + {as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, + {as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, + {as: AMOVB, a1: C_SOREG, a6: C_REG, type_: 8, size: 8}, + {as: AMOVB, a1: C_XOREG, a6: C_REG, type_: 109, size: 8}, + {as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, + {as: AMOVB, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, + {as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, + {as: AMOVB, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, + {as: AMOVB, a1: C_REG, a6: C_REG, type_: 13, size: 4}, + + {as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, + {as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, + {as: AMOVBZ, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, + {as: AMOVBZ, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, + {as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, + {as: AMOVBZ, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, + {as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, + {as: AMOVBZ, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, + {as: AMOVBZ, a1: C_REG, a6: C_REG, type_: 13, size: 4}, + + {as: AMOVD, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4}, + {as: AMOVD, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4}, + {as: AMOVD, a1: C_UCON, a6: C_REG, type_: 3, size: 4}, + {as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, + {as: AMOVD, a1: C_SACON, a6: C_REG, type_: 3, size: 4}, + {as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, + {as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, + {as: AMOVD, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, + {as: AMOVD, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, + {as: AMOVD, a1: C_SOREG, a6: C_SPR, type_: 107, size: 8}, + {as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, + {as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, + {as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, + {as: AMOVD, a1: C_SPR, a6: C_REG, type_: 66, size: 4}, + {as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, + {as: AMOVD, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, + {as: AMOVD, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, + {as: AMOVD, a1: C_SPR, a6: C_SOREG, type_: 106, size: 8}, + {as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, + {as: AMOVD, a1: C_REG, a6: C_SPR, type_: 66, size: 4}, + {as: AMOVD, a1: C_REG, a6: C_REG, type_: 13, size: 4}, + + {as: AMOVW, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4}, + {as: AMOVW, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4}, + {as: AMOVW, a1: C_UCON, a6: C_REG, type_: 3, size: 4}, + {as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, + {as: AMOVW, a1: C_SACON, a6: C_REG, type_: 3, size: 4}, + {as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, + {as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, + {as: AMOVW, a1: C_CREG, a6: C_REG, type_: 68, size: 4}, + {as: AMOVW, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, + {as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, + {as: AMOVW, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, + {as: AMOVW, a1: C_SPR, a6: C_REG, type_: 66, size: 4}, + {as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, + {as: AMOVW, a1: C_REG, a6: C_CREG, type_: 69, size: 4}, + {as: AMOVW, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, + {as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, + {as: AMOVW, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, + {as: AMOVW, a1: C_REG, a6: C_SPR, type_: 66, size: 4}, + {as: AMOVW, a1: C_REG, a6: C_REG, type_: 13, size: 4}, + + {as: AFMOVD, a1: C_ADDCON, a6: C_FREG, type_: 24, size: 8}, + {as: AFMOVD, a1: C_SOREG, a6: C_FREG, type_: 8, size: 4}, + {as: AFMOVD, a1: C_XOREG, a6: C_FREG, type_: 109, size: 4}, + {as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, + {as: AFMOVD, a1: C_ZCON, a6: C_FREG, type_: 24, size: 4}, + {as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, + {as: AFMOVD, a1: C_FREG, a6: C_FREG, type_: 33, size: 4}, + {as: AFMOVD, a1: C_FREG, a6: C_SOREG, type_: 7, size: 4}, + {as: AFMOVD, a1: C_FREG, a6: C_XOREG, type_: 108, size: 4}, + {as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, + {as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, + + {as: AFMOVSX, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4}, + {as: AFMOVSX, a1: C_FREG, a6: C_XOREG, type_: 44, size: 4}, + + {as: AFMOVSZ, a1: C_ZOREG, a6: C_FREG, type_: 45, size: 4}, + {as: AFMOVSZ, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4}, + + {as: AMOVFL, a1: C_CREG, a6: C_CREG, type_: 67, size: 4}, + {as: AMOVFL, a1: C_FPSCR, a6: C_CREG, type_: 73, size: 4}, + {as: AMOVFL, a1: C_FPSCR, a6: C_FREG, type_: 53, size: 4}, + {as: AMOVFL, a1: C_FREG, a3: C_LCON, a6: C_FPSCR, type_: 64, size: 4}, + {as: AMOVFL, a1: C_FREG, a6: C_FPSCR, type_: 64, size: 4}, + {as: AMOVFL, a1: C_LCON, a6: C_FPSCR, type_: 65, size: 4}, + {as: AMOVFL, a1: C_REG, a6: C_CREG, type_: 69, size: 4}, + {as: AMOVFL, a1: C_REG, a6: C_LCON, type_: 69, size: 4}, + + {as: ASYSCALL, type_: 5, size: 4}, + {as: ASYSCALL, a1: C_REG, type_: 77, size: 12}, + {as: ASYSCALL, a1: C_SCON, type_: 77, size: 12}, + {as: ABEQ, a6: C_SBRA, type_: 16, size: 4}, + {as: ABEQ, a1: C_CREG, a6: C_SBRA, type_: 16, size: 4}, + {as: ABR, a6: C_LBRA, type_: 11, size: 4}, // b label + {as: ABR, a6: C_LBRAPIC, type_: 11, size: 8}, // b label; nop + {as: ABR, a6: C_LR, type_: 18, size: 4}, // blr + {as: ABR, a6: C_CTR, type_: 18, size: 4}, // bctr + {as: ABC, a1: C_SCON, a2: C_CRBIT, a6: C_SBRA, type_: 16, size: 4}, // bc bo, bi, label + {as: ABC, a1: C_SCON, a2: C_CRBIT, a6: C_LBRA, type_: 17, size: 4}, // bc bo, bi, label + {as: ABC, a1: C_SCON, a2: C_CRBIT, a6: C_LR, type_: 18, size: 4}, // bclr bo, bi + {as: ABC, a1: C_SCON, a2: C_CRBIT, a3: C_SCON, a6: C_LR, type_: 18, size: 4}, // bclr bo, bi, bh + {as: ABC, a1: C_SCON, a2: C_CRBIT, a6: C_CTR, type_: 18, size: 4}, // bcctr bo, bi + {as: ABDNZ, a6: C_SBRA, type_: 16, size: 4}, + {as: ASYNC, type_: 46, size: 4}, + {as: AWORD, a1: C_LCON, type_: 40, size: 4}, + {as: ADWORD, a1: C_64CON, type_: 31, size: 8}, + {as: ADWORD, a1: C_LACON, type_: 31, size: 8}, + {as: AADDME, a1: C_REG, a6: C_REG, type_: 47, size: 4}, + {as: AEXTSB, a1: C_REG, a6: C_REG, type_: 48, size: 4}, + {as: AEXTSB, a6: C_REG, type_: 48, size: 4}, + {as: AISEL, a1: C_U5CON, a2: C_REG, a3: C_REG, a6: C_REG, type_: 84, size: 4}, + {as: AISEL, a1: C_CRBIT, a2: C_REG, a3: C_REG, a6: C_REG, type_: 84, size: 4}, + {as: ANEG, a1: C_REG, a6: C_REG, type_: 47, size: 4}, + {as: ANEG, a6: C_REG, type_: 47, size: 4}, + {as: AREM, a1: C_REG, a6: C_REG, type_: 50, size: 12}, + {as: AREM, a1: C_REG, a2: C_REG, a6: C_REG, type_: 50, size: 12}, + {as: AREMU, a1: C_REG, a6: C_REG, type_: 50, size: 16}, + {as: AREMU, a1: C_REG, a2: C_REG, a6: C_REG, type_: 50, size: 16}, + {as: AREMD, a1: C_REG, a6: C_REG, type_: 51, size: 12}, + {as: AREMD, a1: C_REG, a2: C_REG, a6: C_REG, type_: 51, size: 12}, + {as: AMTFSB0, a1: C_SCON, type_: 52, size: 4}, + /* Other ISA 2.05+ instructions */ + {as: APOPCNTD, a1: C_REG, a6: C_REG, type_: 93, size: 4}, /* population count, x-form */ + {as: ACMPB, a1: C_REG, a2: C_REG, a6: C_REG, type_: 92, size: 4}, /* compare byte, x-form */ + {as: ACMPEQB, a1: C_REG, a2: C_REG, a6: C_CREG, type_: 92, size: 4}, /* compare equal byte, x-form, ISA 3.0 */ + {as: ACMPEQB, a1: C_REG, a6: C_REG, type_: 70, size: 4}, + {as: AFTDIV, a1: C_FREG, a2: C_FREG, a6: C_SCON, type_: 92, size: 4}, /* floating test for sw divide, x-form */ + {as: AFTSQRT, a1: C_FREG, a6: C_SCON, type_: 93, size: 4}, /* floating test for sw square root, x-form */ + {as: ACOPY, a1: C_REG, a6: C_REG, type_: 92, size: 4}, /* copy/paste facility, x-form */ + {as: ADARN, a1: C_SCON, a6: C_REG, type_: 92, size: 4}, /* deliver random number, x-form */ + {as: AMADDHD, a1: C_REG, a2: C_REG, a3: C_REG, a6: C_REG, type_: 83, size: 4}, /* multiply-add high/low doubleword, va-form */ + {as: AADDEX, a1: C_REG, a2: C_REG, a3: C_SCON, a6: C_REG, type_: 94, size: 4}, /* add extended using alternate carry, z23-form */ + {as: ACRAND, a1: C_CRBIT, a2: C_CRBIT, a6: C_CRBIT, type_: 2, size: 4}, /* logical ops for condition register bits xl-form */ + + /* Vector instructions */ + + /* Vector load */ + {as: ALVEBX, a1: C_XOREG, a6: C_VREG, type_: 45, size: 4}, /* vector load, x-form */ + + /* Vector store */ + {as: ASTVEBX, a1: C_VREG, a6: C_XOREG, type_: 44, size: 4}, /* vector store, x-form */ + + /* Vector logical */ + {as: AVAND, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector and, vx-form */ + {as: AVOR, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector or, vx-form */ + + /* Vector add */ + {as: AVADDUM, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector add unsigned modulo, vx-form */ + {as: AVADDCU, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector add & write carry unsigned, vx-form */ + {as: AVADDUS, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector add unsigned saturate, vx-form */ + {as: AVADDSS, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector add signed saturate, vx-form */ + {as: AVADDE, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG, type_: 83, size: 4}, /* vector add extended, va-form */ + + /* Vector subtract */ + {as: AVSUBUM, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector subtract unsigned modulo, vx-form */ + {as: AVSUBCU, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector subtract & write carry unsigned, vx-form */ + {as: AVSUBUS, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector subtract unsigned saturate, vx-form */ + {as: AVSUBSS, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector subtract signed saturate, vx-form */ + {as: AVSUBE, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG, type_: 83, size: 4}, /* vector subtract extended, va-form */ + + /* Vector multiply */ + {as: AVMULESB, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector multiply, vx-form */ + {as: AVPMSUM, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector polynomial multiply & sum, vx-form */ + {as: AVMSUMUDM, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG, type_: 83, size: 4}, /* vector multiply-sum, va-form */ + + /* Vector rotate */ + {as: AVR, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector rotate, vx-form */ + + /* Vector shift */ + {as: AVS, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector shift, vx-form */ + {as: AVSA, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector shift algebraic, vx-form */ + {as: AVSOI, a1: C_ANDCON, a2: C_VREG, a3: C_VREG, a6: C_VREG, type_: 83, size: 4}, /* vector shift by octet immediate, va-form */ + + /* Vector count */ + {as: AVCLZ, a1: C_VREG, a6: C_VREG, type_: 85, size: 4}, /* vector count leading zeros, vx-form */ + {as: AVPOPCNT, a1: C_VREG, a6: C_VREG, type_: 85, size: 4}, /* vector population count, vx-form */ + + /* Vector compare */ + {as: AVCMPEQ, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector compare equal, vc-form */ + {as: AVCMPGT, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector compare greater than, vc-form */ + {as: AVCMPNEZB, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector compare not equal, vx-form */ + + /* Vector merge */ + {as: AVMRGOW, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector merge odd word, vx-form */ + + /* Vector permute */ + {as: AVPERM, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG, type_: 83, size: 4}, /* vector permute, va-form */ + + /* Vector bit permute */ + {as: AVBPERMQ, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector bit permute, vx-form */ + + /* Vector select */ + {as: AVSEL, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG, type_: 83, size: 4}, /* vector select, va-form */ + + /* Vector splat */ + {as: AVSPLTB, a1: C_SCON, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector splat, vx-form */ + {as: AVSPLTB, a1: C_ADDCON, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, + {as: AVSPLTISB, a1: C_SCON, a6: C_VREG, type_: 82, size: 4}, /* vector splat immediate, vx-form */ + {as: AVSPLTISB, a1: C_ADDCON, a6: C_VREG, type_: 82, size: 4}, + + /* Vector AES */ + {as: AVCIPH, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector AES cipher, vx-form */ + {as: AVNCIPH, a1: C_VREG, a2: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector AES inverse cipher, vx-form */ + {as: AVSBOX, a1: C_VREG, a6: C_VREG, type_: 82, size: 4}, /* vector AES subbytes, vx-form */ + + /* Vector SHA */ + {as: AVSHASIGMA, a1: C_ANDCON, a2: C_VREG, a3: C_ANDCON, a6: C_VREG, type_: 82, size: 4}, /* vector SHA sigma, vx-form */ + + /* VSX vector load */ + {as: ALXVD2X, a1: C_XOREG, a6: C_VSREG, type_: 87, size: 4}, /* vsx vector load, xx1-form */ + {as: ALXV, a1: C_SOREG, a6: C_VSREG, type_: 96, size: 4}, /* vsx vector load, dq-form */ + {as: ALXVL, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 98, size: 4}, /* vsx vector load length */ + + /* VSX vector store */ + {as: ASTXVD2X, a1: C_VSREG, a6: C_XOREG, type_: 86, size: 4}, /* vsx vector store, xx1-form */ + {as: ASTXV, a1: C_VSREG, a6: C_SOREG, type_: 97, size: 4}, /* vsx vector store, dq-form */ + {as: ASTXVL, a1: C_VSREG, a2: C_REG, a6: C_REG, type_: 99, size: 4}, /* vsx vector store with length x-form */ + + /* VSX scalar load */ + {as: ALXSDX, a1: C_XOREG, a6: C_VSREG, type_: 87, size: 4}, /* vsx scalar load, xx1-form */ + + /* VSX scalar store */ + {as: ASTXSDX, a1: C_VSREG, a6: C_XOREG, type_: 86, size: 4}, /* vsx scalar store, xx1-form */ + + /* VSX scalar as integer load */ + {as: ALXSIWAX, a1: C_XOREG, a6: C_VSREG, type_: 87, size: 4}, /* vsx scalar as integer load, xx1-form */ + + /* VSX scalar store as integer */ + {as: ASTXSIWX, a1: C_VSREG, a6: C_XOREG, type_: 86, size: 4}, /* vsx scalar as integer store, xx1-form */ + + /* VSX move from VSR */ + {as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4}, + {as: AMFVSRD, a1: C_FREG, a6: C_REG, type_: 88, size: 4}, + + /* VSX move to VSR */ + {as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 104, size: 4}, + {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4}, + {as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4}, + + /* VSX logical */ + {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */ + {as: AXXLOR, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx or, xx3-form */ + + /* VSX select */ + {as: AXXSEL, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a6: C_VSREG, type_: 91, size: 4}, /* vsx select, xx4-form */ + + /* VSX merge */ + {as: AXXMRGHW, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx merge, xx3-form */ + + /* VSX splat */ + {as: AXXSPLTW, a1: C_VSREG, a3: C_SCON, a6: C_VSREG, type_: 89, size: 4}, /* vsx splat, xx2-form */ + {as: AXXSPLTIB, a1: C_SCON, a6: C_VSREG, type_: 100, size: 4}, /* vsx splat, xx2-form */ + + /* VSX permute */ + {as: AXXPERM, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx permute, xx3-form */ + + /* VSX shift */ + {as: AXXSLDWI, a1: C_VSREG, a2: C_VSREG, a3: C_SCON, a6: C_VSREG, type_: 90, size: 4}, /* vsx shift immediate, xx3-form */ + + /* VSX reverse bytes */ + {as: AXXBRQ, a1: C_VSREG, a6: C_VSREG, type_: 101, size: 4}, /* vsx reverse bytes */ + + /* VSX scalar FP-FP conversion */ + {as: AXSCVDPSP, a1: C_VSREG, a6: C_VSREG, type_: 89, size: 4}, /* vsx scalar fp-fp conversion, xx2-form */ + + /* VSX vector FP-FP conversion */ + {as: AXVCVDPSP, a1: C_VSREG, a6: C_VSREG, type_: 89, size: 4}, /* vsx vector fp-fp conversion, xx2-form */ + + /* VSX scalar FP-integer conversion */ + {as: AXSCVDPSXDS, a1: C_VSREG, a6: C_VSREG, type_: 89, size: 4}, /* vsx scalar fp-integer conversion, xx2-form */ + + /* VSX scalar integer-FP conversion */ + {as: AXSCVSXDDP, a1: C_VSREG, a6: C_VSREG, type_: 89, size: 4}, /* vsx scalar integer-fp conversion, xx2-form */ + + /* VSX vector FP-integer conversion */ + {as: AXVCVDPSXDS, a1: C_VSREG, a6: C_VSREG, type_: 89, size: 4}, /* vsx vector fp-integer conversion, xx2-form */ + + /* VSX vector integer-FP conversion */ + {as: AXVCVSXDDP, a1: C_VSREG, a6: C_VSREG, type_: 89, size: 4}, /* vsx vector integer-fp conversion, xx2-form */ + + {as: ACMP, a1: C_REG, a6: C_REG, type_: 70, size: 4}, + {as: ACMP, a1: C_REG, a2: C_CREG, a6: C_REG, type_: 70, size: 4}, + {as: ACMP, a1: C_REG, a6: C_ADDCON, type_: 71, size: 4}, + {as: ACMP, a1: C_REG, a2: C_CREG, a6: C_ADDCON, type_: 71, size: 4}, + {as: ACMPU, a1: C_REG, a6: C_REG, type_: 70, size: 4}, + {as: ACMPU, a1: C_REG, a2: C_CREG, a6: C_REG, type_: 70, size: 4}, + {as: ACMPU, a1: C_REG, a6: C_ANDCON, type_: 71, size: 4}, + {as: ACMPU, a1: C_REG, a2: C_CREG, a6: C_ANDCON, type_: 71, size: 4}, + {as: AFCMPO, a1: C_FREG, a6: C_FREG, type_: 70, size: 4}, + {as: AFCMPO, a1: C_FREG, a2: C_CREG, a6: C_FREG, type_: 70, size: 4}, + {as: ATW, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 60, size: 4}, + {as: ATW, a1: C_LCON, a2: C_REG, a6: C_ADDCON, type_: 61, size: 4}, + {as: ADCBF, a1: C_SOREG, type_: 43, size: 4}, + {as: ADCBF, a1: C_XOREG, type_: 43, size: 4}, + {as: ADCBF, a1: C_XOREG, a2: C_REG, a6: C_SCON, type_: 43, size: 4}, + {as: ADCBF, a1: C_SOREG, a6: C_SCON, type_: 43, size: 4}, + {as: ADCBF, a1: C_XOREG, a6: C_SCON, type_: 43, size: 4}, + {as: ASTDCCC, a1: C_REG, a2: C_REG, a6: C_XOREG, type_: 44, size: 4}, + {as: ASTDCCC, a1: C_REG, a6: C_XOREG, type_: 44, size: 4}, + {as: ALDAR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4}, + {as: ALDAR, a1: C_XOREG, a3: C_ANDCON, a6: C_REG, type_: 45, size: 4}, + {as: AEIEIO, type_: 46, size: 4}, + {as: ATLBIE, a1: C_REG, type_: 49, size: 4}, + {as: ATLBIE, a1: C_SCON, a6: C_REG, type_: 49, size: 4}, + {as: ASLBMFEE, a1: C_REG, a6: C_REG, type_: 55, size: 4}, + {as: ASLBMTE, a1: C_REG, a6: C_REG, type_: 55, size: 4}, + {as: ASTSW, a1: C_REG, a6: C_XOREG, type_: 44, size: 4}, + {as: ASTSW, a1: C_REG, a3: C_LCON, a6: C_ZOREG, type_: 41, size: 4}, + {as: ALSW, a1: C_XOREG, a6: C_REG, type_: 45, size: 4}, + {as: ALSW, a1: C_ZOREG, a3: C_LCON, a6: C_REG, type_: 42, size: 4}, + + {as: obj.AUNDEF, type_: 78, size: 4}, + {as: obj.APCDATA, a1: C_LCON, a6: C_LCON, type_: 0, size: 0}, + {as: obj.AFUNCDATA, a1: C_SCON, a6: C_ADDR, type_: 0, size: 0}, + {as: obj.ANOP, type_: 0, size: 0}, + {as: obj.ANOP, a1: C_LCON, type_: 0, size: 0}, // NOP operand variations added for #40689 + {as: obj.ANOP, a1: C_REG, type_: 0, size: 0}, // to preserve previous behavior + {as: obj.ANOP, a1: C_FREG, type_: 0, size: 0}, + {as: obj.ADUFFZERO, a6: C_LBRA, type_: 11, size: 4}, // same as ABR/ABL + {as: obj.ADUFFCOPY, a6: C_LBRA, type_: 11, size: 4}, // same as ABR/ABL + {as: obj.APCALIGN, a1: C_LCON, type_: 0, size: 0}, // align code +} + +var oprange [ALAST & obj.AMask][]Optab + +var xcmp [C_NCLASS][C_NCLASS]bool + +// padding bytes to add to align code as requested. +func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { + // For 16 and 32 byte alignment, there is a tradeoff + // between aligning the code and adding too many NOPs. + switch a { + case 8: + if pc&7 != 0 { + return 4 + } + case 16: + // Align to 16 bytes if possible but add at + // most 2 NOPs. + switch pc & 15 { + case 4, 12: + return 4 + case 8: + return 8 + } + case 32: + // Align to 32 bytes if possible but add at + // most 3 NOPs. + switch pc & 31 { + case 4, 20: + return 12 + case 8, 24: + return 8 + case 12, 28: + return 4 + } + // When 32 byte alignment is requested on Linux, + // promote the function's alignment to 32. On AIX + // the function alignment is not changed which might + // result in 16 byte alignment but that is still fine. + // TODO: alignment on AIX + if ctxt.Headtype != objabi.Haix && cursym.Func().Align < 32 { + cursym.Func().Align = 32 + } + default: + ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a) + } + return 0 +} + +// Get the implied register of a operand which doesn't specify one. These show up +// in handwritten asm like "MOVD R5, foosymbol" where a base register is not supplied, +// or "MOVD R5, foo+10(SP) or pseudo-register is used. The other common case is when +// generating constants in register like "MOVD $constant, Rx". +func (c *ctxt9) getimpliedreg(a *obj.Addr, p *obj.Prog) int { + class := oclass(a) + if class >= C_ZCON && class <= C_64CON { + return REGZERO + } + switch class { + case C_SACON, C_LACON: + return REGSP + case C_LOREG, C_SOREG, C_ZOREG, C_XOREG: + switch a.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + return REGSB + case obj.NAME_AUTO, obj.NAME_PARAM: + return REGSP + case obj.NAME_NONE: + return REGZERO + } + } + c.ctxt.Diag("failed to determine implied reg for class %v (%v)", DRconv(oclass(a)), p) + return 0 +} + +func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + p := cursym.Func().Text + if p == nil || p.Link == nil { // handle external functions and ELF section symbols + return + } + + if oprange[AANDN&obj.AMask] == nil { + ctxt.Diag("ppc64 ops not initialized, call ppc64.buildop first") + } + + c := ctxt9{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)} + + pc := int64(0) + p.Pc = pc + + var m int + var o *Optab + for p = p.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + m = int(o.size) + if m == 0 { + if p.As == obj.APCALIGN { + a := c.vregoff(&p.From) + m = addpad(pc, a, ctxt, cursym) + } else { + if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { + ctxt.Diag("zero-width instruction\n%v", p) + } + continue + } + } + pc += int64(m) + } + + c.cursym.Size = pc + + /* + * if any procedure is large enough to + * generate a large SBRA branch, then + * generate extra passes putting branches + * around jmps to fix. this is rare. + */ + bflag := 1 + + var otxt int64 + var q *obj.Prog + var out [5]uint32 + var falign int32 // Track increased alignment requirements for prefix. + for bflag != 0 { + bflag = 0 + pc = 0 + falign = 0 // Note, linker bumps function symbols to funcAlign. + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + + // very large conditional branches + if (o.type_ == 16 || o.type_ == 17) && p.To.Target() != nil { + otxt = p.To.Target().Pc - pc + if otxt < -(1<<15)+10 || otxt >= (1<<15)-10 { + // Assemble the instruction with a target not too far to figure out BI and BO fields. + // If only the CTR or BI (the CR bit) are tested, the conditional branch can be inverted, + // and only one extra branch is needed to reach the target. + tgt := p.To.Target() + p.To.SetTarget(p.Link) + o.asmout(&c, p, o, &out) + p.To.SetTarget(tgt) + + bo := int64(out[0]>>21) & 31 + bi := int16((out[0] >> 16) & 31) + invertible := false + + if bo&0x14 == 0x14 { + // A conditional branch that is unconditionally taken. This cannot be inverted. + } else if bo&0x10 == 0x10 { + // A branch based on the value of CTR. Invert the CTR comparison against zero bit. + bo ^= 0x2 + invertible = true + } else if bo&0x04 == 0x04 { + // A branch based on CR bit. Invert the BI comparison bit. + bo ^= 0x8 + invertible = true + } + + if invertible { + // Rewrite + // BC bo,...,far_away_target + // NEXT_INSN + // to: + // BC invert(bo),next_insn + // JMP far_away_target + // next_insn: + // NEXT_INSN + p.As = ABC + p.From = obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: bo} + q = c.newprog() + q.As = ABR + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.To.Target()) + q.Link = p.Link + p.To.SetTarget(p.Link) + p.Link = q + p.Reg = REG_CRBIT0 + bi + } else { + // Rewrite + // BC ...,far_away_target + // NEXT_INSN + // to + // BC ...,tmp + // JMP next_insn + // tmp: + // JMP far_away_target + // next_insn: + // NEXT_INSN + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = ABR + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.To.Target()) + p.To.SetTarget(q) + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = ABR + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(q.Link.Link) + } + bflag = 1 + } + } + + m = int(o.size) + if m == 0 { + if p.As == obj.APCALIGN { + a := c.vregoff(&p.From) + m = addpad(pc, a, ctxt, cursym) + } else { + if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { + ctxt.Diag("zero-width instruction\n%v", p) + } + continue + } + } + + // Prefixed instructions cannot be placed across a 64B boundary. + // Mark and adjust the PC of those which do. A nop will be + // inserted during final assembly. + if o.ispfx { + mark := p.Mark &^ PFX_X64B + if pc&63 == 60 { + p.Pc += 4 + m += 4 + mark |= PFX_X64B + } + + // Marks may be adjusted if a too-far conditional branch is + // fixed up above. Likewise, inserting a NOP may cause a + // branch target to become too far away. We need to run + // another iteration and verify no additional changes + // are needed. + if mark != p.Mark { + bflag = 1 + p.Mark = mark + } + + // Check for 16 or 32B crossing of this prefixed insn. + // These do no require padding, but do require increasing + // the function alignment to prevent them from potentially + // crossing a 64B boundary when the linker assigns the final + // PC. + switch p.Pc & 31 { + case 28: // 32B crossing + falign = 64 + case 12: // 16B crossing + if falign < 64 { + falign = 32 + } + } + } + + pc += int64(m) + } + + c.cursym.Size = pc + } + + c.cursym.Size = pc + c.cursym.Func().Align = falign + c.cursym.Grow(c.cursym.Size) + + // lay out the code, emitting code and data relocations. + + bp := c.cursym.P + nop := LOP_IRR(OP_ORI, REGZERO, REGZERO, 0) + var i int32 + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { + c.pc = p.Pc + o = c.oplook(p) + if int(o.size) > 4*len(out) { + log.Fatalf("out array in span9 is too small, need at least %d for %v", o.size/4, p) + } + // asmout is not set up to add large amounts of padding + if o.type_ == 0 && p.As == obj.APCALIGN { + aln := c.vregoff(&p.From) + v := addpad(p.Pc, aln, c.ctxt, c.cursym) + if v > 0 { + // Same padding instruction for all + for i = 0; i < int32(v/4); i++ { + c.ctxt.Arch.ByteOrder.PutUint32(bp, nop) + bp = bp[4:] + } + } + } else { + if p.Mark&PFX_X64B != 0 { + c.ctxt.Arch.ByteOrder.PutUint32(bp, nop) + bp = bp[4:] + } + o.asmout(&c, p, o, &out) + for i = 0; i < int32(o.size/4); i++ { + c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i]) + bp = bp[4:] + } + } + } +} + +func isint32(v int64) bool { + return int64(int32(v)) == v +} + +func isuint32(v uint64) bool { + return uint64(uint32(v)) == v +} + +func (c *ctxt9) aclassreg(reg int16) int { + if REG_R0 <= reg && reg <= REG_R31 { + return C_REGP + int(reg&1) + } + if REG_F0 <= reg && reg <= REG_F31 { + return C_FREGP + int(reg&1) + } + if REG_V0 <= reg && reg <= REG_V31 { + return C_VREG + } + if REG_VS0 <= reg && reg <= REG_VS63 { + return C_VSREGP + int(reg&1) + } + if REG_CR0 <= reg && reg <= REG_CR7 || reg == REG_CR { + return C_CREG + } + if REG_CR0LT <= reg && reg <= REG_CR7SO { + return C_CRBIT + } + if REG_SPR0 <= reg && reg <= REG_SPR0+1023 { + switch reg { + case REG_LR: + return C_LR + + case REG_XER: + return C_XER + + case REG_CTR: + return C_CTR + } + + return C_SPR + } + if REG_A0 <= reg && reg <= REG_A7 { + return C_AREG + } + if reg == REG_FPSCR { + return C_FPSCR + } + return C_GOK +} + +func (c *ctxt9) aclass(a *obj.Addr) int { + switch a.Type { + case obj.TYPE_NONE: + return C_NONE + + case obj.TYPE_REG: + return c.aclassreg(a.Reg) + + case obj.TYPE_MEM: + if a.Index != 0 { + if a.Name != obj.NAME_NONE || a.Offset != 0 { + c.ctxt.Logf("Unexpected Instruction operand index %d offset %d class %d \n", a.Index, a.Offset, a.Class) + + } + return C_XOREG + } + switch a.Name { + case obj.NAME_GOTREF, obj.NAME_TOCREF: + return C_ADDR + + case obj.NAME_EXTERN, + obj.NAME_STATIC: + c.instoffset = a.Offset + if a.Sym == nil { + break + } else if a.Sym.Type == objabi.STLSBSS { + // For PIC builds, use 12 byte got initial-exec TLS accesses. + if c.ctxt.Flag_shared { + return C_TLS_IE + } + // Otherwise, use 8 byte local-exec TLS accesses. + return C_TLS_LE + } else { + return C_ADDR + } + + case obj.NAME_AUTO: + c.instoffset = int64(c.autosize) + a.Offset + + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SOREG + } + return C_LOREG + + case obj.NAME_PARAM: + c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SOREG + } + return C_LOREG + + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Offset == 0 && a.Index == 0 { + return C_ZOREG + } else if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SOREG + } else { + return C_LOREG + } + } + + return C_GOK + + case obj.TYPE_TEXTSIZE: + return C_TEXTSIZE + + case obj.TYPE_FCONST: + // The only cases where FCONST will occur are with float64 +/- 0. + // All other float constants are generated in memory. + f64 := a.Val.(float64) + if f64 == 0 { + if math.Signbit(f64) { + return C_ADDCON + } + return C_ZCON + } + log.Fatalf("Unexpected nonzero FCONST operand %v", a) + + case obj.TYPE_CONST, + obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Reg != 0 { + if -BIG <= c.instoffset && c.instoffset < BIG { + return C_SACON + } + if isint32(c.instoffset) { + return C_LACON + } + return C_DACON + } + + case obj.NAME_EXTERN, + obj.NAME_STATIC: + s := a.Sym + if s == nil { + return C_GOK + } + c.instoffset = a.Offset + return C_LACON + + case obj.NAME_AUTO: + c.instoffset = int64(c.autosize) + a.Offset + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SACON + } + return C_LACON + + case obj.NAME_PARAM: + c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SACON + } + return C_LACON + + default: + return C_GOK + } + + if c.instoffset >= 0 { + sbits := bits.Len64(uint64(c.instoffset)) + switch { + case sbits <= 5: + return C_ZCON + sbits + case sbits <= 8: + return C_U8CON + case sbits <= 15: + return C_U15CON + case sbits <= 16: + return C_U16CON + case sbits <= 31: + // Special case, a positive int32 value which is a multiple of 2^16 + if c.instoffset&0xFFFF == 0 { + return C_U3216CON + } + return C_U32CON + case sbits <= 32: + return C_U32CON + case sbits <= 33: + return C_S34CON + default: + return C_64CON + } + } else { + sbits := bits.Len64(uint64(^c.instoffset)) + switch { + case sbits <= 15: + return C_S16CON + case sbits <= 31: + // Special case, a negative int32 value which is a multiple of 2^16 + if c.instoffset&0xFFFF == 0 { + return C_S3216CON + } + return C_S32CON + case sbits <= 33: + return C_S34CON + default: + return C_64CON + } + } + + case obj.TYPE_BRANCH: + if a.Sym != nil && c.ctxt.Flag_dynlink { + return C_LBRAPIC + } + return C_SBRA + } + + return C_GOK +} + +func prasm(p *obj.Prog) { + fmt.Printf("%v\n", p) +} + +func (c *ctxt9) oplook(p *obj.Prog) *Optab { + a1 := int(p.Optab) + if a1 != 0 { + return &optab[a1-1] + } + a1 = int(p.From.Class) + if a1 == 0 { + a1 = c.aclass(&p.From) + 1 + p.From.Class = int8(a1) + } + a1-- + + argsv := [3]int{C_NONE + 1, C_NONE + 1, C_NONE + 1} + for i, ap := range p.RestArgs { + argsv[i] = int(ap.Addr.Class) + if argsv[i] == 0 { + argsv[i] = c.aclass(&ap.Addr) + 1 + ap.Addr.Class = int8(argsv[i]) + } + + } + a3 := argsv[0] - 1 + a4 := argsv[1] - 1 + a5 := argsv[2] - 1 + + a6 := int(p.To.Class) + if a6 == 0 { + a6 = c.aclass(&p.To) + 1 + p.To.Class = int8(a6) + } + a6-- + + a2 := C_NONE + if p.Reg != 0 { + a2 = c.aclassreg(p.Reg) + } + + // c.ctxt.Logf("oplook %v %d %d %d %d\n", p, a1, a2, a3, a4, a5, a6) + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] + c2 := &xcmp[a2] + c3 := &xcmp[a3] + c4 := &xcmp[a4] + c5 := &xcmp[a5] + c6 := &xcmp[a6] + for i := range ops { + op := &ops[i] + if c1[op.a1] && c2[op.a2] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } + } + + c.ctxt.Diag("illegal combination %v %v %v %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3), DRconv(a4), DRconv(a5), DRconv(a6)) + prasm(p) + if ops == nil { + ops = optab + } + return &ops[0] +} + +// Compare two operand types (ex C_REG, or C_SCON) +// and return true if b is compatible with a. +// +// Argument comparison isn't reflexitive, so care must be taken. +// a is the argument type as found in optab, b is the argument as +// fitted by aclass. +func cmp(a int, b int) bool { + if a == b { + return true + } + switch a { + + case C_SPR: + if b == C_LR || b == C_XER || b == C_CTR { + return true + } + + case C_U1CON: + return cmp(C_ZCON, b) + case C_U2CON: + return cmp(C_U1CON, b) + case C_U3CON: + return cmp(C_U2CON, b) + case C_U4CON: + return cmp(C_U3CON, b) + case C_U5CON: + return cmp(C_U4CON, b) + case C_U8CON: + return cmp(C_U5CON, b) + case C_U15CON: + return cmp(C_U8CON, b) + case C_U16CON: + return cmp(C_U15CON, b) + + case C_S16CON: + return cmp(C_U15CON, b) + case C_32CON: + return cmp(C_S16CON, b) || cmp(C_U16CON, b) || cmp(C_32S16CON, b) + case C_S34CON: + return cmp(C_32CON, b) + case C_64CON: + return cmp(C_S34CON, b) + + case C_32S16CON: + return cmp(C_ZCON, b) + + case C_LACON: + return cmp(C_SACON, b) + + case C_LBRA: + return cmp(C_SBRA, b) + + case C_SOREG: + return cmp(C_ZOREG, b) + + case C_LOREG: + return cmp(C_SOREG, b) + + case C_XOREG: + return cmp(C_REG, b) || cmp(C_ZOREG, b) + + // An even/odd register input always matches the regular register types. + case C_REG: + return cmp(C_REGP, b) || (b == C_ZCON && r0iszero != 0) + case C_FREG: + return cmp(C_FREGP, b) + case C_VSREG: + /* Allow any VR argument as a VSR operand. */ + return cmp(C_VSREGP, b) || cmp(C_VREG, b) + + case C_ANY: + return true + } + + return false +} + +// Used when sorting the optab. Sorting is +// done in a way so that the best choice of +// opcode/operand combination is considered first. +func optabLess(i, j int) bool { + p1 := &optab[i] + p2 := &optab[j] + n := int(p1.as) - int(p2.as) + // same opcode + if n != 0 { + return n < 0 + } + // Consider those that generate fewer + // instructions first. + n = int(p1.size) - int(p2.size) + if n != 0 { + return n < 0 + } + // operand order should match + // better choices first + n = int(p1.a1) - int(p2.a1) + if n != 0 { + return n < 0 + } + n = int(p1.a2) - int(p2.a2) + if n != 0 { + return n < 0 + } + n = int(p1.a3) - int(p2.a3) + if n != 0 { + return n < 0 + } + n = int(p1.a4) - int(p2.a4) + if n != 0 { + return n < 0 + } + n = int(p1.a5) - int(p2.a5) + if n != 0 { + return n < 0 + } + n = int(p1.a6) - int(p2.a6) + if n != 0 { + return n < 0 + } + return false +} + +// Add an entry to the opcode table for +// a new opcode b0 with the same operand combinations +// as opcode a. +func opset(a, b0 obj.As) { + oprange[a&obj.AMask] = oprange[b0] +} + +// Build the opcode table +func buildop(ctxt *obj.Link) { + if oprange[AANDN&obj.AMask] != nil { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + for i := 0; i < C_NCLASS; i++ { + for n := 0; n < C_NCLASS; n++ { + if cmp(n, i) { + xcmp[i][n] = true + } + } + } + for i := range optab { + // Use the legacy assembler function if none provided. + if optab[i].asmout == nil { + optab[i].asmout = asmout + } + } + // Append the generated entries, sort, and fill out oprange. + optab = append(optab, optabGen...) + sort.Slice(optab, optabLess) + for i := 0; i < len(optab); { + r := optab[i].as + r0 := r & obj.AMask + start := i + for i < len(optab) && optab[i].as == r { + i++ + } + oprange[r0] = optab[start:i] + + switch r { + default: + if !opsetGen(r) { + ctxt.Diag("unknown op in build: %v", r) + log.Fatalf("instruction missing from switch in asm9.go:buildop: %v", r) + } + + case ADCBF: /* unary indexed: op (b+a); op (b) */ + opset(ADCBI, r0) + + opset(ADCBST, r0) + opset(ADCBT, r0) + opset(ADCBTST, r0) + opset(ADCBZ, r0) + opset(AICBI, r0) + + case ASTDCCC: /* indexed store: op s,(b+a); op s,(b) */ + opset(ASTWCCC, r0) + opset(ASTHCCC, r0) + opset(ASTBCCC, r0) + + case AREM: /* macro */ + opset(AREM, r0) + + case AREMU: + opset(AREMU, r0) + + case AREMD: + opset(AREMDU, r0) + + case AMULLW: + opset(AMULLD, r0) + + case ADIVW: /* op Rb[,Ra],Rd */ + opset(AMULHW, r0) + + opset(AMULHWCC, r0) + opset(AMULHWU, r0) + opset(AMULHWUCC, r0) + opset(AMULLWCC, r0) + opset(AMULLWVCC, r0) + opset(AMULLWV, r0) + opset(ADIVWCC, r0) + opset(ADIVWV, r0) + opset(ADIVWVCC, r0) + opset(ADIVWU, r0) + opset(ADIVWUCC, r0) + opset(ADIVWUV, r0) + opset(ADIVWUVCC, r0) + opset(AMODUD, r0) + opset(AMODUW, r0) + opset(AMODSD, r0) + opset(AMODSW, r0) + opset(AADDCC, r0) + opset(AADDCV, r0) + opset(AADDCVCC, r0) + opset(AADDV, r0) + opset(AADDVCC, r0) + opset(AADDE, r0) + opset(AADDECC, r0) + opset(AADDEV, r0) + opset(AADDEVCC, r0) + opset(AMULHD, r0) + opset(AMULHDCC, r0) + opset(AMULHDU, r0) + opset(AMULHDUCC, r0) + opset(AMULLDCC, r0) + opset(AMULLDVCC, r0) + opset(AMULLDV, r0) + opset(ADIVD, r0) + opset(ADIVDCC, r0) + opset(ADIVDE, r0) + opset(ADIVDEU, r0) + opset(ADIVDECC, r0) + opset(ADIVDEUCC, r0) + opset(ADIVDVCC, r0) + opset(ADIVDV, r0) + opset(ADIVDU, r0) + opset(ADIVDUV, r0) + opset(ADIVDUVCC, r0) + opset(ADIVDUCC, r0) + + case ACRAND: + opset(ACRANDN, r0) + opset(ACREQV, r0) + opset(ACRNAND, r0) + opset(ACRNOR, r0) + opset(ACROR, r0) + opset(ACRORN, r0) + opset(ACRXOR, r0) + + case APOPCNTD: /* popcntd, popcntw, popcntb, cnttzw, cnttzd */ + opset(APOPCNTW, r0) + opset(APOPCNTB, r0) + opset(ACNTTZW, r0) + opset(ACNTTZWCC, r0) + opset(ACNTTZD, r0) + opset(ACNTTZDCC, r0) + + case ACOPY: /* copy, paste. */ + opset(APASTECC, r0) + + case AMADDHD: /* maddhd, maddhdu, maddld */ + opset(AMADDHDU, r0) + opset(AMADDLD, r0) + + case AMOVBZ: /* lbz, stz, rlwm(r/r), lhz, lha, stz, and x variants */ + opset(AMOVH, r0) + opset(AMOVHZ, r0) + + case AMOVBZU: /* lbz[x]u, stb[x]u, lhz[x]u, lha[x]u, sth[u]x, ld[x]u, std[u]x */ + opset(AMOVHU, r0) + + opset(AMOVHZU, r0) + opset(AMOVWU, r0) + opset(AMOVWZU, r0) + opset(AMOVDU, r0) + opset(AMOVMW, r0) + + case ALVEBX: /* lvebx, lvehx, lvewx, lvx, lvxl, lvsl, lvsr */ + opset(ALVEHX, r0) + opset(ALVEWX, r0) + opset(ALVX, r0) + opset(ALVXL, r0) + opset(ALVSL, r0) + opset(ALVSR, r0) + + case ASTVEBX: /* stvebx, stvehx, stvewx, stvx, stvxl */ + opset(ASTVEHX, r0) + opset(ASTVEWX, r0) + opset(ASTVX, r0) + opset(ASTVXL, r0) + + case AVAND: /* vand, vandc, vnand */ + opset(AVAND, r0) + opset(AVANDC, r0) + opset(AVNAND, r0) + + case AVMRGOW: /* vmrgew, vmrgow */ + opset(AVMRGEW, r0) + + case AVOR: /* vor, vorc, vxor, vnor, veqv */ + opset(AVOR, r0) + opset(AVORC, r0) + opset(AVXOR, r0) + opset(AVNOR, r0) + opset(AVEQV, r0) + + case AVADDUM: /* vaddubm, vadduhm, vadduwm, vaddudm, vadduqm */ + opset(AVADDUBM, r0) + opset(AVADDUHM, r0) + opset(AVADDUWM, r0) + opset(AVADDUDM, r0) + opset(AVADDUQM, r0) + + case AVADDCU: /* vaddcuq, vaddcuw */ + opset(AVADDCUQ, r0) + opset(AVADDCUW, r0) + + case AVADDUS: /* vaddubs, vadduhs, vadduws */ + opset(AVADDUBS, r0) + opset(AVADDUHS, r0) + opset(AVADDUWS, r0) + + case AVADDSS: /* vaddsbs, vaddshs, vaddsws */ + opset(AVADDSBS, r0) + opset(AVADDSHS, r0) + opset(AVADDSWS, r0) + + case AVADDE: /* vaddeuqm, vaddecuq */ + opset(AVADDEUQM, r0) + opset(AVADDECUQ, r0) + + case AVSUBUM: /* vsububm, vsubuhm, vsubuwm, vsubudm, vsubuqm */ + opset(AVSUBUBM, r0) + opset(AVSUBUHM, r0) + opset(AVSUBUWM, r0) + opset(AVSUBUDM, r0) + opset(AVSUBUQM, r0) + + case AVSUBCU: /* vsubcuq, vsubcuw */ + opset(AVSUBCUQ, r0) + opset(AVSUBCUW, r0) + + case AVSUBUS: /* vsububs, vsubuhs, vsubuws */ + opset(AVSUBUBS, r0) + opset(AVSUBUHS, r0) + opset(AVSUBUWS, r0) + + case AVSUBSS: /* vsubsbs, vsubshs, vsubsws */ + opset(AVSUBSBS, r0) + opset(AVSUBSHS, r0) + opset(AVSUBSWS, r0) + + case AVSUBE: /* vsubeuqm, vsubecuq */ + opset(AVSUBEUQM, r0) + opset(AVSUBECUQ, r0) + + case AVMULESB: /* vmulesb, vmulosb, vmuleub, vmuloub, vmulosh, vmulouh, vmulesw, vmulosw, vmuleuw, vmulouw, vmuluwm */ + opset(AVMULOSB, r0) + opset(AVMULEUB, r0) + opset(AVMULOUB, r0) + opset(AVMULESH, r0) + opset(AVMULOSH, r0) + opset(AVMULEUH, r0) + opset(AVMULOUH, r0) + opset(AVMULESW, r0) + opset(AVMULOSW, r0) + opset(AVMULEUW, r0) + opset(AVMULOUW, r0) + opset(AVMULUWM, r0) + case AVPMSUM: /* vpmsumb, vpmsumh, vpmsumw, vpmsumd */ + opset(AVPMSUMB, r0) + opset(AVPMSUMH, r0) + opset(AVPMSUMW, r0) + opset(AVPMSUMD, r0) + + case AVR: /* vrlb, vrlh, vrlw, vrld */ + opset(AVRLB, r0) + opset(AVRLH, r0) + opset(AVRLW, r0) + opset(AVRLD, r0) + + case AVS: /* vs[l,r], vs[l,r]o, vs[l,r]b, vs[l,r]h, vs[l,r]w, vs[l,r]d */ + opset(AVSLB, r0) + opset(AVSLH, r0) + opset(AVSLW, r0) + opset(AVSL, r0) + opset(AVSLO, r0) + opset(AVSRB, r0) + opset(AVSRH, r0) + opset(AVSRW, r0) + opset(AVSR, r0) + opset(AVSRO, r0) + opset(AVSLD, r0) + opset(AVSRD, r0) + + case AVSA: /* vsrab, vsrah, vsraw, vsrad */ + opset(AVSRAB, r0) + opset(AVSRAH, r0) + opset(AVSRAW, r0) + opset(AVSRAD, r0) + + case AVSOI: /* vsldoi */ + opset(AVSLDOI, r0) + + case AVCLZ: /* vclzb, vclzh, vclzw, vclzd */ + opset(AVCLZB, r0) + opset(AVCLZH, r0) + opset(AVCLZW, r0) + opset(AVCLZD, r0) + + case AVPOPCNT: /* vpopcntb, vpopcnth, vpopcntw, vpopcntd */ + opset(AVPOPCNTB, r0) + opset(AVPOPCNTH, r0) + opset(AVPOPCNTW, r0) + opset(AVPOPCNTD, r0) + + case AVCMPEQ: /* vcmpequb[.], vcmpequh[.], vcmpequw[.], vcmpequd[.] */ + opset(AVCMPEQUB, r0) + opset(AVCMPEQUBCC, r0) + opset(AVCMPEQUH, r0) + opset(AVCMPEQUHCC, r0) + opset(AVCMPEQUW, r0) + opset(AVCMPEQUWCC, r0) + opset(AVCMPEQUD, r0) + opset(AVCMPEQUDCC, r0) + + case AVCMPGT: /* vcmpgt[u,s]b[.], vcmpgt[u,s]h[.], vcmpgt[u,s]w[.], vcmpgt[u,s]d[.] */ + opset(AVCMPGTUB, r0) + opset(AVCMPGTUBCC, r0) + opset(AVCMPGTUH, r0) + opset(AVCMPGTUHCC, r0) + opset(AVCMPGTUW, r0) + opset(AVCMPGTUWCC, r0) + opset(AVCMPGTUD, r0) + opset(AVCMPGTUDCC, r0) + opset(AVCMPGTSB, r0) + opset(AVCMPGTSBCC, r0) + opset(AVCMPGTSH, r0) + opset(AVCMPGTSHCC, r0) + opset(AVCMPGTSW, r0) + opset(AVCMPGTSWCC, r0) + opset(AVCMPGTSD, r0) + opset(AVCMPGTSDCC, r0) + + case AVCMPNEZB: /* vcmpnezb[.] */ + opset(AVCMPNEZBCC, r0) + opset(AVCMPNEB, r0) + opset(AVCMPNEBCC, r0) + opset(AVCMPNEH, r0) + opset(AVCMPNEHCC, r0) + opset(AVCMPNEW, r0) + opset(AVCMPNEWCC, r0) + + case AVPERM: /* vperm */ + opset(AVPERMXOR, r0) + opset(AVPERMR, r0) + + case AVBPERMQ: /* vbpermq, vbpermd */ + opset(AVBPERMD, r0) + + case AVSEL: /* vsel */ + opset(AVSEL, r0) + + case AVSPLTB: /* vspltb, vsplth, vspltw */ + opset(AVSPLTH, r0) + opset(AVSPLTW, r0) + + case AVSPLTISB: /* vspltisb, vspltish, vspltisw */ + opset(AVSPLTISH, r0) + opset(AVSPLTISW, r0) + + case AVCIPH: /* vcipher, vcipherlast */ + opset(AVCIPHER, r0) + opset(AVCIPHERLAST, r0) + + case AVNCIPH: /* vncipher, vncipherlast */ + opset(AVNCIPHER, r0) + opset(AVNCIPHERLAST, r0) + + case AVSBOX: /* vsbox */ + opset(AVSBOX, r0) + + case AVSHASIGMA: /* vshasigmaw, vshasigmad */ + opset(AVSHASIGMAW, r0) + opset(AVSHASIGMAD, r0) + + case ALXVD2X: /* lxvd2x, lxvdsx, lxvw4x, lxvh8x, lxvb16x */ + opset(ALXVDSX, r0) + opset(ALXVW4X, r0) + opset(ALXVH8X, r0) + opset(ALXVB16X, r0) + + case ALXV: /* lxv */ + opset(ALXV, r0) + + case ALXVL: /* lxvl, lxvll, lxvx */ + opset(ALXVLL, r0) + opset(ALXVX, r0) + + case ASTXVD2X: /* stxvd2x, stxvdsx, stxvw4x, stxvh8x, stxvb16x */ + opset(ASTXVW4X, r0) + opset(ASTXVH8X, r0) + opset(ASTXVB16X, r0) + + case ASTXV: /* stxv */ + opset(ASTXV, r0) + + case ASTXVL: /* stxvl, stxvll, stvx */ + opset(ASTXVLL, r0) + opset(ASTXVX, r0) + + case ALXSDX: /* lxsdx */ + opset(ALXSDX, r0) + + case ASTXSDX: /* stxsdx */ + opset(ASTXSDX, r0) + + case ALXSIWAX: /* lxsiwax, lxsiwzx */ + opset(ALXSIWZX, r0) + + case ASTXSIWX: /* stxsiwx */ + opset(ASTXSIWX, r0) + + case AMFVSRD: /* mfvsrd, mfvsrwz (and extended mnemonics), mfvsrld */ + opset(AMFFPRD, r0) + opset(AMFVRD, r0) + opset(AMFVSRWZ, r0) + opset(AMFVSRLD, r0) + + case AMTVSRD: /* mtvsrd, mtvsrwa, mtvsrwz (and extended mnemonics), mtvsrdd, mtvsrws */ + opset(AMTFPRD, r0) + opset(AMTVRD, r0) + opset(AMTVSRWA, r0) + opset(AMTVSRWZ, r0) + opset(AMTVSRWS, r0) + + case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */ + opset(AXXLANDC, r0) + opset(AXXLEQV, r0) + opset(AXXLNAND, r0) + + case AXXLOR: /* xxlorc, xxlnor, xxlor, xxlxor */ + opset(AXXLORC, r0) + opset(AXXLNOR, r0) + opset(AXXLORQ, r0) + opset(AXXLXOR, r0) + + case AXXSEL: /* xxsel */ + opset(AXXSEL, r0) + + case AXXMRGHW: /* xxmrghw, xxmrglw */ + opset(AXXMRGLW, r0) + + case AXXSPLTW: /* xxspltw */ + opset(AXXSPLTW, r0) + + case AXXSPLTIB: /* xxspltib */ + opset(AXXSPLTIB, r0) + + case AXXPERM: /* xxpermdi */ + opset(AXXPERM, r0) + + case AXXSLDWI: /* xxsldwi */ + opset(AXXPERMDI, r0) + opset(AXXSLDWI, r0) + + case AXXBRQ: /* xxbrq, xxbrd, xxbrw, xxbrh */ + opset(AXXBRD, r0) + opset(AXXBRW, r0) + opset(AXXBRH, r0) + + case AXSCVDPSP: /* xscvdpsp, xscvspdp, xscvdpspn, xscvspdpn */ + opset(AXSCVSPDP, r0) + opset(AXSCVDPSPN, r0) + opset(AXSCVSPDPN, r0) + + case AXVCVDPSP: /* xvcvdpsp, xvcvspdp */ + opset(AXVCVSPDP, r0) + + case AXSCVDPSXDS: /* xscvdpsxds, xscvdpsxws, xscvdpuxds, xscvdpuxws */ + opset(AXSCVDPSXWS, r0) + opset(AXSCVDPUXDS, r0) + opset(AXSCVDPUXWS, r0) + + case AXSCVSXDDP: /* xscvsxddp, xscvuxddp, xscvsxdsp, xscvuxdsp */ + opset(AXSCVUXDDP, r0) + opset(AXSCVSXDSP, r0) + opset(AXSCVUXDSP, r0) + + case AXVCVDPSXDS: /* xvcvdpsxds, xvcvdpsxws, xvcvdpuxds, xvcvdpuxws, xvcvspsxds, xvcvspsxws, xvcvspuxds, xvcvspuxws */ + opset(AXVCVDPSXDS, r0) + opset(AXVCVDPSXWS, r0) + opset(AXVCVDPUXDS, r0) + opset(AXVCVDPUXWS, r0) + opset(AXVCVSPSXDS, r0) + opset(AXVCVSPSXWS, r0) + opset(AXVCVSPUXDS, r0) + opset(AXVCVSPUXWS, r0) + + case AXVCVSXDDP: /* xvcvsxddp, xvcvsxwdp, xvcvuxddp, xvcvuxwdp, xvcvsxdsp, xvcvsxwsp, xvcvuxdsp, xvcvuxwsp */ + opset(AXVCVSXWDP, r0) + opset(AXVCVUXDDP, r0) + opset(AXVCVUXWDP, r0) + opset(AXVCVSXDSP, r0) + opset(AXVCVSXWSP, r0) + opset(AXVCVUXDSP, r0) + opset(AXVCVUXWSP, r0) + + case AAND: /* logical op Rb,Rs,Ra; no literal */ + opset(AANDN, r0) + opset(AANDNCC, r0) + opset(AEQV, r0) + opset(AEQVCC, r0) + opset(ANAND, r0) + opset(ANANDCC, r0) + opset(ANOR, r0) + opset(ANORCC, r0) + opset(AORCC, r0) + opset(AORN, r0) + opset(AORNCC, r0) + opset(AXORCC, r0) + + case AADDME: /* op Ra, Rd */ + opset(AADDMECC, r0) + + opset(AADDMEV, r0) + opset(AADDMEVCC, r0) + opset(AADDZE, r0) + opset(AADDZECC, r0) + opset(AADDZEV, r0) + opset(AADDZEVCC, r0) + opset(ASUBME, r0) + opset(ASUBMECC, r0) + opset(ASUBMEV, r0) + opset(ASUBMEVCC, r0) + opset(ASUBZE, r0) + opset(ASUBZECC, r0) + opset(ASUBZEV, r0) + opset(ASUBZEVCC, r0) + + case AADDC: + opset(AADDCCC, r0) + + case ABEQ: + opset(ABGE, r0) + opset(ABGT, r0) + opset(ABLE, r0) + opset(ABLT, r0) + opset(ABNE, r0) + opset(ABVC, r0) + opset(ABVS, r0) + + case ABR: + opset(ABL, r0) + + case ABC: + opset(ABCL, r0) + + case ABDNZ: + opset(ABDZ, r0) + + case AEXTSB: /* op Rs, Ra */ + opset(AEXTSBCC, r0) + + opset(AEXTSH, r0) + opset(AEXTSHCC, r0) + opset(ACNTLZW, r0) + opset(ACNTLZWCC, r0) + opset(ACNTLZD, r0) + opset(AEXTSW, r0) + opset(AEXTSWCC, r0) + opset(ACNTLZDCC, r0) + + case AFABS: /* fop [s,]d */ + opset(AFABSCC, r0) + + opset(AFNABS, r0) + opset(AFNABSCC, r0) + opset(AFNEG, r0) + opset(AFNEGCC, r0) + opset(AFRSP, r0) + opset(AFRSPCC, r0) + opset(AFCTIW, r0) + opset(AFCTIWCC, r0) + opset(AFCTIWZ, r0) + opset(AFCTIWZCC, r0) + opset(AFCTID, r0) + opset(AFCTIDCC, r0) + opset(AFCTIDZ, r0) + opset(AFCTIDZCC, r0) + opset(AFCFID, r0) + opset(AFCFIDCC, r0) + opset(AFCFIDU, r0) + opset(AFCFIDUCC, r0) + opset(AFCFIDS, r0) + opset(AFCFIDSCC, r0) + opset(AFRES, r0) + opset(AFRESCC, r0) + opset(AFRIM, r0) + opset(AFRIMCC, r0) + opset(AFRIP, r0) + opset(AFRIPCC, r0) + opset(AFRIZ, r0) + opset(AFRIZCC, r0) + opset(AFRIN, r0) + opset(AFRINCC, r0) + opset(AFRSQRTE, r0) + opset(AFRSQRTECC, r0) + opset(AFSQRT, r0) + opset(AFSQRTCC, r0) + opset(AFSQRTS, r0) + opset(AFSQRTSCC, r0) + + case AFADD: + opset(AFADDS, r0) + opset(AFADDCC, r0) + opset(AFADDSCC, r0) + opset(AFCPSGN, r0) + opset(AFCPSGNCC, r0) + opset(AFDIV, r0) + opset(AFDIVS, r0) + opset(AFDIVCC, r0) + opset(AFDIVSCC, r0) + opset(AFSUB, r0) + opset(AFSUBS, r0) + opset(AFSUBCC, r0) + opset(AFSUBSCC, r0) + + case AFMADD: + opset(AFMADDCC, r0) + opset(AFMADDS, r0) + opset(AFMADDSCC, r0) + opset(AFMSUB, r0) + opset(AFMSUBCC, r0) + opset(AFMSUBS, r0) + opset(AFMSUBSCC, r0) + opset(AFNMADD, r0) + opset(AFNMADDCC, r0) + opset(AFNMADDS, r0) + opset(AFNMADDSCC, r0) + opset(AFNMSUB, r0) + opset(AFNMSUBCC, r0) + opset(AFNMSUBS, r0) + opset(AFNMSUBSCC, r0) + opset(AFSEL, r0) + opset(AFSELCC, r0) + + case AFMUL: + opset(AFMULS, r0) + opset(AFMULCC, r0) + opset(AFMULSCC, r0) + + case AFCMPO: + opset(AFCMPU, r0) + + case AMTFSB0: + opset(AMTFSB0CC, r0) + opset(AMTFSB1, r0) + opset(AMTFSB1CC, r0) + + case ANEG: /* op [Ra,] Rd */ + opset(ANEGCC, r0) + + opset(ANEGV, r0) + opset(ANEGVCC, r0) + + case AOR: /* or/xor Rb,Rs,Ra; ori/xori $uimm,Rs,R */ + opset(AXOR, r0) + + case AORIS: /* oris/xoris $uimm,Rs,Ra */ + opset(AXORIS, r0) + + case ASLW: + opset(ASLWCC, r0) + opset(ASRW, r0) + opset(ASRWCC, r0) + opset(AROTLW, r0) + + case ASLD: + opset(ASLDCC, r0) + opset(ASRD, r0) + opset(ASRDCC, r0) + opset(AROTL, r0) + + case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ + opset(ASRAWCC, r0) + + case AEXTSWSLI: + opset(AEXTSWSLICC, r0) + + case ASRAD: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ + opset(ASRADCC, r0) + + case ASUB: /* SUB Ra,Rb,Rd => subf Rd,ra,rb */ + opset(ASUB, r0) + + opset(ASUBCC, r0) + opset(ASUBV, r0) + opset(ASUBVCC, r0) + opset(ASUBCCC, r0) + opset(ASUBCV, r0) + opset(ASUBCVCC, r0) + opset(ASUBE, r0) + opset(ASUBECC, r0) + opset(ASUBEV, r0) + opset(ASUBEVCC, r0) + + case ASYNC: + opset(AISYNC, r0) + opset(ALWSYNC, r0) + opset(APTESYNC, r0) + opset(ATLBSYNC, r0) + + case ARLWMI: + opset(ARLWMICC, r0) + opset(ARLWNM, r0) + opset(ARLWNMCC, r0) + + case ARLDMI: + opset(ARLDMICC, r0) + opset(ARLDIMI, r0) + opset(ARLDIMICC, r0) + + case ARLDC: + opset(ARLDCCC, r0) + + case ARLDCL: + opset(ARLDCR, r0) + opset(ARLDCLCC, r0) + opset(ARLDCRCC, r0) + + case ARLDICL: + opset(ARLDICLCC, r0) + opset(ARLDICR, r0) + opset(ARLDICRCC, r0) + opset(ARLDIC, r0) + opset(ARLDICCC, r0) + opset(ACLRLSLDI, r0) + + case AFMOVD: + opset(AFMOVDCC, r0) + opset(AFMOVDU, r0) + opset(AFMOVS, r0) + opset(AFMOVSU, r0) + + case ALDAR: + opset(ALBAR, r0) + opset(ALHAR, r0) + opset(ALWAR, r0) + + case ASYSCALL: /* just the op; flow of control */ + opset(ARFI, r0) + + opset(ARFCI, r0) + opset(ARFID, r0) + opset(AHRFID, r0) + + case AMOVHBR: + opset(AMOVWBR, r0) + opset(AMOVDBR, r0) + + case ASLBMFEE: + opset(ASLBMFEV, r0) + + case ATW: + opset(ATD, r0) + + case ATLBIE: + opset(ASLBIE, r0) + opset(ATLBIEL, r0) + + case AEIEIO: + opset(ASLBIA, r0) + + case ACMP: + opset(ACMPW, r0) + + case ACMPU: + opset(ACMPWU, r0) + + case ACMPB: + opset(ACMPB, r0) + + case AFTDIV: + opset(AFTDIV, r0) + + case AFTSQRT: + opset(AFTSQRT, r0) + + case AMOVW: /* load/store/move word with sign extension; move 32-bit literals */ + opset(AMOVWZ, r0) /* Same as above, but zero extended */ + + case AADD, + AADDIS, + AANDCC, /* and. Rb,Rs,Ra; andi. $uimm,Rs,Ra */ + AANDISCC, + AFMOVSX, + AFMOVSZ, + ALSW, + AMOVD, /* load/store/move 64-bit values, including 32-bit literals with/without sign-extension */ + AMOVB, /* macro: move byte with sign extension */ + AMOVBU, /* macro: move byte with sign extension & update */ + AMOVFL, + /* op $s[,r2],r3; op r1[,r2],r3; no cc/v */ + ASUBC, /* op r1,$s,r3; op r1[,r2],r3 */ + ASTSW, + ASLBMTE, + AWORD, + ADWORD, + ADARN, + AVMSUMUDM, + AADDEX, + ACMPEQB, + ACLRLSLWI, + AMTVSRDD, + APNOP, + AISEL, + obj.ANOP, + obj.ATEXT, + obj.AUNDEF, + obj.AFUNCDATA, + obj.APCALIGN, + obj.APCDATA, + obj.ADUFFZERO, + obj.ADUFFCOPY: + break + } + } +} + +func OPVXX1(o uint32, xo uint32, oe uint32) uint32 { + return o<<26 | xo<<1 | oe<<11 +} + +func OPVXX2(o uint32, xo uint32, oe uint32) uint32 { + return o<<26 | xo<<2 | oe<<11 +} + +func OPVXX2VA(o uint32, xo uint32, oe uint32) uint32 { + return o<<26 | xo<<2 | oe<<16 +} + +func OPVXX3(o uint32, xo uint32, oe uint32) uint32 { + return o<<26 | xo<<3 | oe<<11 +} + +func OPVXX4(o uint32, xo uint32, oe uint32) uint32 { + return o<<26 | xo<<4 | oe<<11 +} + +func OPDQ(o uint32, xo uint32, oe uint32) uint32 { + return o<<26 | xo | oe<<4 +} + +func OPVX(o uint32, xo uint32, oe uint32, rc uint32) uint32 { + return o<<26 | xo | oe<<11 | rc&1 +} + +func OPVC(o uint32, xo uint32, oe uint32, rc uint32) uint32 { + return o<<26 | xo | oe<<11 | (rc&1)<<10 +} + +func OPVCC(o uint32, xo uint32, oe uint32, rc uint32) uint32 { + return o<<26 | xo<<1 | oe<<10 | rc&1 +} + +func OPCC(o uint32, xo uint32, rc uint32) uint32 { + return OPVCC(o, xo, 0, rc) +} + +/* Generate MD-form opcode */ +func OPMD(o, xo, rc uint32) uint32 { + return o<<26 | xo<<2 | rc&1 +} + +/* the order is dest, a/s, b/imm for both arithmetic and logical operations. */ +func AOP_RRR(op uint32, d uint32, a uint32, b uint32) uint32 { + return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 +} + +/* VX-form 2-register operands, r/none/r */ +func AOP_RR(op uint32, d uint32, a uint32) uint32 { + return op | (d&31)<<21 | (a&31)<<11 +} + +/* VA-form 4-register operands */ +func AOP_RRRR(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { + return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&31)<<6 +} + +func AOP_IRR(op uint32, d uint32, a uint32, simm uint32) uint32 { + return op | (d&31)<<21 | (a&31)<<16 | simm&0xFFFF +} + +/* VX-form 2-register + UIM operands */ +func AOP_VIRR(op uint32, d uint32, a uint32, simm uint32) uint32 { + return op | (d&31)<<21 | (simm&0xFFFF)<<16 | (a&31)<<11 +} + +/* VX-form 2-register + ST + SIX operands */ +func AOP_IIRR(op uint32, d uint32, a uint32, sbit uint32, simm uint32) uint32 { + return op | (d&31)<<21 | (a&31)<<16 | (sbit&1)<<15 | (simm&0xF)<<11 +} + +/* VA-form 3-register + SHB operands */ +func AOP_IRRR(op uint32, d uint32, a uint32, b uint32, simm uint32) uint32 { + return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (simm&0xF)<<6 +} + +/* VX-form 1-register + SIM operands */ +func AOP_IR(op uint32, d uint32, simm uint32) uint32 { + return op | (d&31)<<21 | (simm&31)<<16 +} + +/* XX1-form 3-register operands, 1 VSR operand */ +func AOP_XX1(op uint32, r uint32, a uint32, b uint32) uint32 { + return op | (r&31)<<21 | (a&31)<<16 | (b&31)<<11 | (r&32)>>5 +} + +/* XX2-form 3-register operands, 2 VSR operands */ +func AOP_XX2(op uint32, xt uint32, a uint32, xb uint32) uint32 { + return op | (xt&31)<<21 | (a&3)<<16 | (xb&31)<<11 | (xb&32)>>4 | (xt&32)>>5 +} + +/* XX3-form 3 VSR operands */ +func AOP_XX3(op uint32, xt uint32, xa uint32, xb uint32) uint32 { + return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 +} + +/* XX3-form 3 VSR operands + immediate */ +func AOP_XX3I(op uint32, xt uint32, xa uint32, xb uint32, c uint32) uint32 { + return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (c&3)<<8 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 +} + +/* XX4-form, 4 VSR operands */ +func AOP_XX4(op uint32, xt uint32, xa uint32, xb uint32, xc uint32) uint32 { + return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 +} + +/* DQ-form, VSR register, register + offset operands */ +func AOP_DQ(op uint32, xt uint32, a uint32, b uint32) uint32 { + /* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */ + /* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */ + /* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */ + /* not -2047 or 2047), so 'b' needs to be adjusted to the expected 12-bit DQ value. Bear in mind that */ + /* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */ + /* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */ + dq := b >> 4 + return op | (xt&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (xt&32)>>2 +} + +/* Z23-form, 3-register operands + CY field */ +func AOP_Z23I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { + return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<9 +} + +/* X-form, 3-register operands + EH field */ +func AOP_RRRI(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { + return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c & 1) +} + +func LOP_RRR(op uint32, a uint32, s uint32, b uint32) uint32 { + return op | (s&31)<<21 | (a&31)<<16 | (b&31)<<11 +} + +func LOP_IRR(op uint32, a uint32, s uint32, uimm uint32) uint32 { + return op | (s&31)<<21 | (a&31)<<16 | uimm&0xFFFF +} + +func OP_BR(op uint32, li uint32, aa uint32) uint32 { + return op | li&0x03FFFFFC | aa<<1 +} + +func OP_BC(op uint32, bo uint32, bi uint32, bd uint32, aa uint32) uint32 { + return op | (bo&0x1F)<<21 | (bi&0x1F)<<16 | bd&0xFFFC | aa<<1 +} + +func OP_BCR(op uint32, bo uint32, bi uint32) uint32 { + return op | (bo&0x1F)<<21 | (bi&0x1F)<<16 +} + +func OP_RLW(op uint32, a uint32, s uint32, sh uint32, mb uint32, me uint32) uint32 { + return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | (mb&31)<<6 | (me&31)<<1 +} + +func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 { + return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5 +} + +func AOP_EXTSWSLI(op uint32, a uint32, s uint32, sh uint32) uint32 { + return op | (a&31)<<21 | (s&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 +} + +func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 { + return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6 +} + +const ( + /* each rhs is OPVCC(_, _, _, _) */ + OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 + OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 + OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 + OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 + OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 + OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 + OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 + OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 + OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 + OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 + OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 + OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 + OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 + OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 + OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 + OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 + OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 + OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 + OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 + OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 + OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 + OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 + OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 + OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 + OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 + OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 + OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 + OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 + OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 + OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 + OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 + OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 + OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 + OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_EXTSWSLI = 31<<26 | 445<<2 +) + +func oclass(a *obj.Addr) int { + return int(a.Class) - 1 +} + +const ( + D_FORM = iota + DS_FORM +) + +// This function determines when a non-indexed load or store is D or +// DS form for use in finding the size of the offset field in the instruction. +// The size is needed when setting the offset value in the instruction +// and when generating relocation for that field. +// DS form instructions include: ld, ldu, lwa, std, stdu. All other +// loads and stores with an offset field are D form. This function should +// only be called with the same opcodes as are handled by opstore and opload. +func (c *ctxt9) opform(insn uint32) int { + switch insn { + default: + c.ctxt.Diag("bad insn in loadform: %x", insn) + case OPVCC(58, 0, 0, 0), // ld + OPVCC(58, 0, 0, 1), // ldu + OPVCC(58, 0, 0, 0) | 1<<1, // lwa + OPVCC(62, 0, 0, 0), // std + OPVCC(62, 0, 0, 1): //stdu + return DS_FORM + case OP_ADDI, // add + OPVCC(32, 0, 0, 0), // lwz + OPVCC(33, 0, 0, 0), // lwzu + OPVCC(34, 0, 0, 0), // lbz + OPVCC(35, 0, 0, 0), // lbzu + OPVCC(40, 0, 0, 0), // lhz + OPVCC(41, 0, 0, 0), // lhzu + OPVCC(42, 0, 0, 0), // lha + OPVCC(43, 0, 0, 0), // lhau + OPVCC(46, 0, 0, 0), // lmw + OPVCC(48, 0, 0, 0), // lfs + OPVCC(49, 0, 0, 0), // lfsu + OPVCC(50, 0, 0, 0), // lfd + OPVCC(51, 0, 0, 0), // lfdu + OPVCC(36, 0, 0, 0), // stw + OPVCC(37, 0, 0, 0), // stwu + OPVCC(38, 0, 0, 0), // stb + OPVCC(39, 0, 0, 0), // stbu + OPVCC(44, 0, 0, 0), // sth + OPVCC(45, 0, 0, 0), // sthu + OPVCC(47, 0, 0, 0), // stmw + OPVCC(52, 0, 0, 0), // stfs + OPVCC(53, 0, 0, 0), // stfsu + OPVCC(54, 0, 0, 0), // stfd + OPVCC(55, 0, 0, 0): // stfdu + return D_FORM + } + return 0 +} + +// Encode instructions and create relocation for accessing s+d according to the +// instruction op with source or destination (as appropriate) register reg. +func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32) { + if c.ctxt.Headtype == objabi.Haix { + // Every symbol access must be made via a TOC anchor. + c.ctxt.Diag("symbolAccess called for %s", s.Name) + } + var base uint32 + form := c.opform(op) + if c.ctxt.Flag_shared { + base = REG_R2 + } else { + base = REG_R0 + } + // If reg can be reused when computing the symbol address, + // use it instead of REGTMP. + if !reuse { + o1 = AOP_IRR(OP_ADDIS, REGTMP, base, 0) + o2 = AOP_IRR(op, uint32(reg), REGTMP, 0) + } else { + o1 = AOP_IRR(OP_ADDIS, uint32(reg), base, 0) + o2 = AOP_IRR(op, uint32(reg), uint32(reg), 0) + } + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = s + rel.Add = d + if c.ctxt.Flag_shared { + switch form { + case D_FORM: + rel.Type = objabi.R_ADDRPOWER_TOCREL + case DS_FORM: + rel.Type = objabi.R_ADDRPOWER_TOCREL_DS + } + + } else { + switch form { + case D_FORM: + rel.Type = objabi.R_ADDRPOWER + case DS_FORM: + rel.Type = objabi.R_ADDRPOWER_DS + } + } + return +} + +/* + * 32-bit masks + */ +func getmask(m []byte, v uint32) bool { + m[1] = 0 + m[0] = m[1] + if v != ^uint32(0) && v&(1<<31) != 0 && v&1 != 0 { /* MB > ME */ + if getmask(m, ^v) { + i := int(m[0]) + m[0] = m[1] + 1 + m[1] = byte(i - 1) + return true + } + + return false + } + + for i := 0; i < 32; i++ { + if v&(1<<uint(31-i)) != 0 { + m[0] = byte(i) + for { + m[1] = byte(i) + i++ + if i >= 32 || v&(1<<uint(31-i)) == 0 { + break + } + } + + for ; i < 32; i++ { + if v&(1<<uint(31-i)) != 0 { + return false + } + } + return true + } + } + + return false +} + +func (c *ctxt9) maskgen(p *obj.Prog, m []byte, v uint32) { + if !getmask(m, v) { + c.ctxt.Diag("cannot generate mask #%x\n%v", v, p) + } +} + +/* + * 64-bit masks (rldic etc) + */ +func getmask64(m []byte, v uint64) bool { + m[1] = 0 + m[0] = m[1] + for i := 0; i < 64; i++ { + if v&(uint64(1)<<uint(63-i)) != 0 { + m[0] = byte(i) + for { + m[1] = byte(i) + i++ + if i >= 64 || v&(uint64(1)<<uint(63-i)) == 0 { + break + } + } + + for ; i < 64; i++ { + if v&(uint64(1)<<uint(63-i)) != 0 { + return false + } + } + return true + } + } + + return false +} + +func (c *ctxt9) maskgen64(p *obj.Prog, m []byte, v uint64) { + if !getmask64(m, v) { + c.ctxt.Diag("cannot generate mask #%x\n%v", v, p) + } +} + +func loadu32(r int, d int64) uint32 { + v := int32(d >> 16) + if isuint32(uint64(d)) { + return LOP_IRR(OP_ORIS, uint32(r), REGZERO, uint32(v)) + } + return AOP_IRR(OP_ADDIS, uint32(r), REGZERO, uint32(v)) +} + +func high16adjusted(d int32) uint16 { + if d&0x8000 != 0 { + return uint16((d >> 16) + 1) + } + return uint16(d >> 16) +} + +func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { + o1 := uint32(0) + o2 := uint32(0) + o3 := uint32(0) + o4 := uint32(0) + o5 := uint32(0) + + //print("%v => case %d\n", p, o->type); + switch o.type_ { + default: + c.ctxt.Diag("unknown type %d", o.type_) + prasm(p) + + case 0: /* pseudo ops */ + break + + case 2: /* int/cr/fp op Rb,[Ra],Rd */ + r := int(p.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg)) + + case 3: /* mov $soreg/addcon/andcon/ucon, r ==> addis/oris/addi/ori $i,reg',r */ + d := c.vregoff(&p.From) + + v := int32(d) + r := int(p.From.Reg) + if r == 0 { + r = c.getimpliedreg(&p.From, p) + } + if r0iszero != 0 /*TypeKind(100016)*/ && p.To.Reg == 0 && (r != 0 || v != 0) { + c.ctxt.Diag("literal operation on R0\n%v", p) + } + a := OP_ADDI + if o.a1 == C_UCON { + if d&0xffff != 0 { + log.Fatalf("invalid handling of %v", p) + } + // For UCON operands the value is right shifted 16, using ADDIS if the + // value should be signed, ORIS if unsigned. + v >>= 16 + if r == REGZERO && isuint32(uint64(d)) { + o1 = LOP_IRR(OP_ORIS, uint32(p.To.Reg), REGZERO, uint32(v)) + break + } + + a = OP_ADDIS + } else if int64(int16(d)) != d { + // Operand is 16 bit value with sign bit set + if o.a1 == C_ANDCON { + // Needs unsigned 16 bit so use ORI + if r == 0 || r == REGZERO { + o1 = LOP_IRR(uint32(OP_ORI), uint32(p.To.Reg), uint32(0), uint32(v)) + break + } + // With ADDCON, needs signed 16 bit value, fall through to use ADDI + } else if o.a1 != C_ADDCON { + log.Fatalf("invalid handling of %v", p) + } + } + + o1 = AOP_IRR(uint32(a), uint32(p.To.Reg), uint32(r), uint32(v)) + + case 4: /* add/mul $scon,[r1],r2 */ + v := c.regoff(&p.From) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + if r0iszero != 0 /*TypeKind(100016)*/ && p.To.Reg == 0 { + c.ctxt.Diag("literal operation on R0\n%v", p) + } + if int32(int16(v)) != v { + log.Fatalf("mishandled instruction %v", p) + } + o1 = AOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v)) + + case 5: /* syscall */ + o1 = c.oprrr(p.As) + + case 6: /* logical op Rb,[Rs,]Ra; no literal */ + r := int(p.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + // AROTL and AROTLW are extended mnemonics, which map to RLDCL and RLWNM. + switch p.As { + case AROTL: + o1 = AOP_RLDIC(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0)) + case AROTLW: + o1 = OP_RLW(OP_RLWNM, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), 0, 31) + default: + if p.As == AOR && p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 { + // Compile "OR $0, Rx, Ry" into ori. If Rx == Ry == 0, this is the preferred + // hardware no-op. This happens because $0 matches C_REG before C_ZCON. + o1 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(r), 0) + } else { + o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg)) + } + } + + case 7: /* mov r, soreg ==> stw o(r) */ + r := int(p.To.Reg) + + if r == 0 { + r = c.getimpliedreg(&p.To, p) + } + v := c.regoff(&p.To) + if int32(int16(v)) != v { + log.Fatalf("mishandled instruction %v", p) + } + // Offsets in DS form stores must be a multiple of 4 + inst := c.opstore(p.As) + if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + o1 = AOP_IRR(inst, uint32(p.From.Reg), uint32(r), uint32(v)) + + case 8: /* mov soreg, r ==> lbz/lhz/lwz o(r), lbz o(r) + extsb r,r */ + r := int(p.From.Reg) + + if r == 0 { + r = c.getimpliedreg(&p.From, p) + } + v := c.regoff(&p.From) + if int32(int16(v)) != v { + log.Fatalf("mishandled instruction %v", p) + } + // Offsets in DS form loads must be a multiple of 4 + inst := c.opload(p.As) + if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + o1 = AOP_IRR(inst, uint32(p.To.Reg), uint32(r), uint32(v)) + + // Sign extend MOVB operations. This is ignored for other cases (o.size == 4). + o2 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) + + case 10: /* sub Ra,[Rb],Rd => subf Rd,Ra,Rb */ + r := int(p.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(r)) + + case 11: /* br/bl lbra */ + v := int32(0) + + if p.To.Target() != nil { + v = int32(p.To.Target().Pc - p.Pc) + if v&03 != 0 { + c.ctxt.Diag("odd branch target address\n%v", p) + v &^= 03 + } + + if v < -(1<<25) || v >= 1<<24 { + c.ctxt.Diag("branch too far\n%v", p) + } + } + + o1 = OP_BR(c.opirr(p.As), uint32(v), 0) + if p.To.Sym != nil { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + v += int32(p.To.Offset) + if v&03 != 0 { + c.ctxt.Diag("odd branch target address\n%v", p) + v &^= 03 + } + + rel.Add = int64(v) + rel.Type = objabi.R_CALLPOWER + } + o2 = 0x60000000 // nop, sometimes overwritten by ld r2, 24(r1) when dynamic linking + + case 13: /* mov[bhwd]{z,} r,r */ + // This needs to handle "MOV* $0, Rx". This shows up because $0 also + // matches C_REG if r0iszero. This happens because C_REG sorts before C_ANDCON + // TODO: fix the above behavior and cleanup this exception. + if p.From.Type == obj.TYPE_CONST { + o1 = LOP_IRR(OP_ADDI, REGZERO, uint32(p.To.Reg), 0) + break + } + if p.To.Type == obj.TYPE_CONST { + c.ctxt.Diag("cannot move into constant 0\n%v", p) + } + + switch p.As { + case AMOVB: + o1 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.From.Reg), 0) + case AMOVBZ: + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.From.Reg), 0, 24, 31) + case AMOVH: + o1 = LOP_RRR(OP_EXTSH, uint32(p.To.Reg), uint32(p.From.Reg), 0) + case AMOVHZ: + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.From.Reg), 0, 16, 31) + case AMOVW: + o1 = LOP_RRR(OP_EXTSW, uint32(p.To.Reg), uint32(p.From.Reg), 0) + case AMOVWZ: + o1 = OP_RLW(OP_RLDIC, uint32(p.To.Reg), uint32(p.From.Reg), 0, 0, 0) | 1<<5 /* MB=32 */ + case AMOVD: + o1 = LOP_RRR(OP_OR, uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.From.Reg)) + default: + c.ctxt.Diag("internal: bad register move/truncation\n%v", p) + } + + case 14: /* rldc[lr] Rb,Rs,$mask,Ra -- left, right give different masks */ + r := int(p.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + d := c.vregoff(p.GetFrom3()) + var a int + switch p.As { + + // These opcodes expect a mask operand that has to be converted into the + // appropriate operand. The way these were defined, not all valid masks are possible. + // Left here for compatibility in case they were used or generated. + case ARLDCL, ARLDCLCC: + var mask [2]uint8 + c.maskgen64(p, mask[:], uint64(d)) + + a = int(mask[0]) /* MB */ + if mask[1] != 63 { + c.ctxt.Diag("invalid mask for rotate: %x (end != bit 63)\n%v", uint64(d), p) + } + o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg)) + o1 |= (uint32(a) & 31) << 6 + if a&0x20 != 0 { + o1 |= 1 << 5 /* mb[5] is top bit */ + } + + case ARLDCR, ARLDCRCC: + var mask [2]uint8 + c.maskgen64(p, mask[:], uint64(d)) + + a = int(mask[1]) /* ME */ + if mask[0] != 0 { + c.ctxt.Diag("invalid mask for rotate: %x %x (start != 0)\n%v", uint64(d), mask[0], p) + } + o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg)) + o1 |= (uint32(a) & 31) << 6 + if a&0x20 != 0 { + o1 |= 1 << 5 /* mb[5] is top bit */ + } + + // These opcodes use a shift count like the ppc64 asm, no mask conversion done + case ARLDICR, ARLDICRCC: + me := int(d) + sh := c.regoff(&p.From) + if me < 0 || me > 63 || sh > 63 { + c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh, p) + } + o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me)) + + case ARLDICL, ARLDICLCC, ARLDIC, ARLDICCC: + mb := int(d) + sh := c.regoff(&p.From) + if mb < 0 || mb > 63 || sh > 63 { + c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh, p) + } + o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb)) + + case ACLRLSLDI: + // This is an extended mnemonic defined in the ISA section C.8.1 + // clrlsldi ra,rs,b,n --> rldic ra,rs,n,b-n + // It maps onto RLDIC so is directly generated here based on the operands from + // the clrlsldi. + n := int32(d) + b := c.regoff(&p.From) + if n > b || b > 63 { + c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b, p) + } + o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n)) + + default: + c.ctxt.Diag("unexpected op in rldc case\n%v", p) + a = 0 + } + + case 17, /* bc bo,bi,lbra (same for now) */ + 16: /* bc bo,bi,sbra */ + a := 0 + + r := int(p.Reg) + + if p.From.Type == obj.TYPE_CONST { + a = int(c.regoff(&p.From)) + } else if p.From.Type == obj.TYPE_REG { + if r != 0 { + c.ctxt.Diag("unexpected register setting for branch with CR: %d\n", r) + } + // BI values for the CR + switch p.From.Reg { + case REG_CR0: + r = BI_CR0 + case REG_CR1: + r = BI_CR1 + case REG_CR2: + r = BI_CR2 + case REG_CR3: + r = BI_CR3 + case REG_CR4: + r = BI_CR4 + case REG_CR5: + r = BI_CR5 + case REG_CR6: + r = BI_CR6 + case REG_CR7: + r = BI_CR7 + default: + c.ctxt.Diag("unrecognized register: expecting CR\n") + } + } + v := int32(0) + if p.To.Target() != nil { + v = int32(p.To.Target().Pc - p.Pc) + } + if v&03 != 0 { + c.ctxt.Diag("odd branch target address\n%v", p) + v &^= 03 + } + + if v < -(1<<16) || v >= 1<<15 { + c.ctxt.Diag("branch too far\n%v", p) + } + o1 = OP_BC(c.opirr(p.As), uint32(a), uint32(r), uint32(v), 0) + + case 18: /* br/bl (lr/ctr); bc/bcl bo,bi,(lr/ctr) */ + var v int32 + var bh uint32 = 0 + if p.As == ABC || p.As == ABCL { + v = c.regoff(&p.From) & 31 + } else { + v = 20 /* unconditional */ + } + r := int(p.Reg) + if r == 0 { + r = 0 + } + switch oclass(&p.To) { + case C_CTR: + o1 = OPVCC(19, 528, 0, 0) + + case C_LR: + o1 = OPVCC(19, 16, 0, 0) + + default: + c.ctxt.Diag("bad optab entry (18): %d\n%v", p.To.Class, p) + v = 0 + } + + // Insert optional branch hint for bclr[l]/bcctr[l] + if p.From3Type() != obj.TYPE_NONE { + bh = uint32(p.GetFrom3().Offset) + if bh == 2 || bh > 3 { + log.Fatalf("BH must be 0,1,3 for %v", p) + } + o1 |= bh << 11 + } + + if p.As == ABL || p.As == ABCL { + o1 |= 1 + } + o1 = OP_BCR(o1, uint32(v), uint32(r)) + + case 19: /* mov $lcon,r ==> cau+or */ + d := c.vregoff(&p.From) + o1 = loadu32(int(p.To.Reg), d) + o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d))) + + case 20: /* add $ucon,,r | addis $addcon,r,r */ + v := c.regoff(&p.From) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + if p.As == AADD && (r0iszero == 0 /*TypeKind(100016)*/ && p.Reg == 0 || r0iszero != 0 /*TypeKind(100016)*/ && p.To.Reg == 0) { + c.ctxt.Diag("literal operation on R0\n%v", p) + } + if p.As == AADDIS { + o1 = AOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v)) + } else { + o1 = AOP_IRR(c.opirr(AADDIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16) + } + + case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add */ + if p.To.Reg == REGTMP || p.Reg == REGTMP { + c.ctxt.Diag("can't synthesize large constant\n%v", p) + } + d := c.vregoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + if p.From.Sym != nil { + c.ctxt.Diag("%v is not supported", p) + } + // If operand is ANDCON, generate 2 instructions using + // ORI for unsigned value; with LCON 3 instructions. + if o.size == 8 { + o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d))) + o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + } else { + o1 = loadu32(REGTMP, d) + o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d))) + o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + } + + case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */ + if p.To.Reg == REGTMP || p.Reg == REGTMP { + c.ctxt.Diag("can't synthesize large constant\n%v", p) + } + d := c.vregoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + + // With ADDCON operand, generate 2 instructions using ADDI for signed value, + // with LCON operand generate 3 instructions. + if o.size == 8 { + o1 = LOP_IRR(OP_ADDI, REGZERO, REGTMP, uint32(int32(d))) + o2 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + } else { + o1 = loadu32(REGTMP, d) + o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d))) + o3 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + } + if p.From.Sym != nil { + c.ctxt.Diag("%v is not supported", p) + } + + case 24: /* lfd fA,float64(0) -> xxlxor xsA,xsaA,xsaA + fneg for -0 */ + o1 = AOP_XX3I(c.oprrr(AXXLXOR), uint32(p.To.Reg), uint32(p.To.Reg), uint32(p.To.Reg), uint32(0)) + // This is needed for -0. + if o.size == 8 { + o2 = AOP_RRR(c.oprrr(AFNEG), uint32(p.To.Reg), 0, uint32(p.To.Reg)) + } + + case 25: + /* sld[.] $sh,rS,rA -> rldicr[.] $sh,rS,mask(0,63-sh),rA; srd[.] -> rldicl */ + v := c.regoff(&p.From) + + if v < 0 { + v = 0 + } else if v > 63 { + v = 63 + } + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + var a int + op := uint32(0) + switch p.As { + case ASLD, ASLDCC: + a = int(63 - v) + op = OP_RLDICR + + case ASRD, ASRDCC: + a = int(v) + v = 64 - v + op = OP_RLDICL + case AROTL: + a = int(0) + op = OP_RLDICL + case AEXTSWSLI, AEXTSWSLICC: + a = int(v) + default: + c.ctxt.Diag("unexpected op in sldi case\n%v", p) + a = 0 + o1 = 0 + } + + if p.As == AEXTSWSLI || p.As == AEXTSWSLICC { + o1 = AOP_EXTSWSLI(OP_EXTSWSLI, uint32(r), uint32(p.To.Reg), uint32(v)) + + } else { + o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) + } + if p.As == ASLDCC || p.As == ASRDCC || p.As == AEXTSWSLICC { + o1 |= 1 // Set the condition code bit + } + + case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */ + v := c.vregoff(&p.From) + r := int(p.From.Reg) + + switch p.From.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + // Load a 32 bit constant, or relocation depending on if a symbol is attached + o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true) + default: + if r == 0 { + r = c.getimpliedreg(&p.From, p) + } + // Add a 32 bit offset to a register. + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(int32(v)))) + o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(v)) + } + + case 27: /* subc ra,$simm,rd => subfic rd,ra,$simm */ + v := c.regoff(p.GetFrom3()) + + r := int(p.From.Reg) + o1 = AOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v)) + + case 28: /* subc r1,$lcon,r2 ==> cau+or+subfc */ + if p.To.Reg == REGTMP || p.From.Reg == REGTMP { + c.ctxt.Diag("can't synthesize large constant\n%v", p) + } + v := c.regoff(p.GetFrom3()) + o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, uint32(v)>>16) + o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(v)) + o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), REGTMP) + if p.From.Sym != nil { + c.ctxt.Diag("%v is not supported", p) + } + + case 29: /* rldic[lr]? $sh,s,$mask,a -- left, right, plain give different masks */ + v := c.regoff(&p.From) + + d := c.vregoff(p.GetFrom3()) + var mask [2]uint8 + c.maskgen64(p, mask[:], uint64(d)) + var a int + switch p.As { + case ARLDC, ARLDCCC: + a = int(mask[0]) /* MB */ + if int32(mask[1]) != (63 - v) { + c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[1], v, p) + } + + case ARLDCL, ARLDCLCC: + a = int(mask[0]) /* MB */ + if mask[1] != 63 { + c.ctxt.Diag("invalid mask for shift: %x %s (shift %d)\n%v", uint64(d), mask[1], v, p) + } + + case ARLDCR, ARLDCRCC: + a = int(mask[1]) /* ME */ + if mask[0] != 0 { + c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[0], v, p) + } + + default: + c.ctxt.Diag("unexpected op in rldic case\n%v", p) + a = 0 + } + + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F)) + o1 |= (uint32(a) & 31) << 6 + if v&0x20 != 0 { + o1 |= 1 << 1 + } + if a&0x20 != 0 { + o1 |= 1 << 5 /* mb[5] is top bit */ + } + + case 30: /* rldimi $sh,s,$mask,a */ + v := c.regoff(&p.From) + + d := c.vregoff(p.GetFrom3()) + + // Original opcodes had mask operands which had to be converted to a shift count as expected by + // the ppc64 asm. + switch p.As { + case ARLDMI, ARLDMICC: + var mask [2]uint8 + c.maskgen64(p, mask[:], uint64(d)) + if int32(mask[1]) != (63 - v) { + c.ctxt.Diag("invalid mask for shift: %x %x (shift %d)\n%v", uint64(d), mask[1], v, p) + } + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F)) + o1 |= (uint32(mask[0]) & 31) << 6 + if v&0x20 != 0 { + o1 |= 1 << 1 + } + if mask[0]&0x20 != 0 { + o1 |= 1 << 5 /* mb[5] is top bit */ + } + + // Opcodes with shift count operands. + case ARLDIMI, ARLDIMICC: + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), (uint32(v) & 0x1F)) + o1 |= (uint32(d) & 31) << 6 + if d&0x20 != 0 { + o1 |= 1 << 5 + } + if v&0x20 != 0 { + o1 |= 1 << 1 + } + } + + case 31: /* dword */ + d := c.vregoff(&p.From) + + if c.ctxt.Arch.ByteOrder == binary.BigEndian { + o1 = uint32(d >> 32) + o2 = uint32(d) + } else { + o1 = uint32(d) + o2 = uint32(d >> 32) + } + + if p.From.Sym != nil { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDR + o2 = 0 + o1 = o2 + } + + case 32: /* fmul frc,fra,frd */ + r := int(p.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), 0) | (uint32(p.From.Reg)&31)<<6 + + case 33: /* fabs [frb,]frd; fmr. frb,frd */ + r := int(p.From.Reg) + + if oclass(&p.From) == C_NONE { + r = int(p.To.Reg) + } + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), 0, uint32(r)) + + case 34: /* FMADDx fra,frb,frc,frt (t=a*c±b) */ + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg)) | (uint32(p.GetFrom3().Reg)&31)<<6 + + case 35: /* mov r,lext/lauto/loreg ==> cau $(v>>16),sb,r'; store o(r') */ + v := c.regoff(&p.To) + + r := int(p.To.Reg) + if r == 0 { + r = c.getimpliedreg(&p.To, p) + } + // Offsets in DS form stores must be a multiple of 4 + inst := c.opstore(p.As) + if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v))) + o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v)) + + case 36: /* mov b/bz/h/hz lext/lauto/lreg,r ==> lbz+extsb/lbz/lha/lhz etc */ + v := c.regoff(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = c.getimpliedreg(&p.From, p) + } + if o.a6 == C_REG { + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v))) + o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v)) + } else { + o1 = AOP_IRR(OP_ADDIS, uint32(REGTMP), uint32(r), uint32(high16adjusted(v))) + o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(REGTMP), uint32(v)) + } + + // Sign extend MOVB if needed + o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) + + case 40: /* word */ + o1 = uint32(c.regoff(&p.From)) + + case 41: /* stswi */ + if p.To.Type == obj.TYPE_MEM && p.To.Index == 0 && p.To.Offset != 0 { + c.ctxt.Diag("Invalid addressing mode used in index type instruction: %v", p.As) + } + + o1 = AOP_RRR(c.opirr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), 0) | (uint32(c.regoff(p.GetFrom3()))&0x7F)<<11 + + case 42: /* lswi */ + if p.From.Type == obj.TYPE_MEM && p.From.Index == 0 && p.From.Offset != 0 { + c.ctxt.Diag("Invalid addressing mode used in index type instruction: %v", p.As) + } + o1 = AOP_RRR(c.opirr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), 0) | (uint32(c.regoff(p.GetFrom3()))&0x7F)<<11 + + case 43: /* data cache instructions: op (Ra+[Rb]), [th|l] */ + /* TH field for dcbt/dcbtst: */ + /* 0 = Block access - program will soon access EA. */ + /* 8-15 = Stream access - sequence of access (data stream). See section 4.3.2 of the ISA for details. */ + /* 16 = Block access - program will soon make a transient access to EA. */ + /* 17 = Block access - program will not access EA for a long time. */ + + /* L field for dcbf: */ + /* 0 = invalidates the block containing EA in all processors. */ + /* 1 = same as 0, but with limited scope (i.e. block in the current processor will not be reused soon). */ + /* 3 = same as 1, but with even more limited scope (i.e. block in the current processor primary cache will not be reused soon). */ + if p.To.Type == obj.TYPE_NONE { + o1 = AOP_RRR(c.oprrr(p.As), 0, uint32(p.From.Index), uint32(p.From.Reg)) + } else { + th := c.regoff(&p.To) + o1 = AOP_RRR(c.oprrr(p.As), uint32(th), uint32(p.From.Index), uint32(p.From.Reg)) + } + + case 44: /* indexed store */ + o1 = AOP_RRR(c.opstorex(p.As), uint32(p.From.Reg), uint32(p.To.Index), uint32(p.To.Reg)) + + case 45: /* indexed load */ + switch p.As { + /* The assembler accepts a 4-operand l*arx instruction. The fourth operand is an Exclusive Access Hint (EH) */ + /* The EH field can be used as a lock acquire/release hint as follows: */ + /* 0 = Atomic Update (fetch-and-operate or similar algorithm) */ + /* 1 = Exclusive Access (lock acquire and release) */ + case ALBAR, ALHAR, ALWAR, ALDAR: + if p.From3Type() != obj.TYPE_NONE { + eh := int(c.regoff(p.GetFrom3())) + if eh > 1 { + c.ctxt.Diag("illegal EH field\n%v", p) + } + o1 = AOP_RRRI(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg), uint32(eh)) + } else { + o1 = AOP_RRR(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg)) + } + default: + o1 = AOP_RRR(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg)) + } + case 46: /* plain op */ + o1 = c.oprrr(p.As) + + case 47: /* op Ra, Rd; also op [Ra,] Rd */ + r := int(p.From.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), 0) + + case 48: /* op Rs, Ra */ + r := int(p.From.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), 0) + + case 49: /* op Rb; op $n, Rb */ + if p.From.Type != obj.TYPE_REG { /* tlbie $L, rB */ + v := c.regoff(&p.From) & 1 + o1 = AOP_RRR(c.oprrr(p.As), 0, 0, uint32(p.To.Reg)) | uint32(v)<<21 + } else { + o1 = AOP_RRR(c.oprrr(p.As), 0, 0, uint32(p.From.Reg)) + } + + case 50: /* rem[u] r1[,r2],r3 */ + r := int(p.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + v := c.oprrr(p.As) + t := v & (1<<10 | 1) /* OE|Rc */ + o1 = AOP_RRR(v&^t, REGTMP, uint32(r), uint32(p.From.Reg)) + o2 = AOP_RRR(OP_MULLW, REGTMP, REGTMP, uint32(p.From.Reg)) + o3 = AOP_RRR(OP_SUBF|t, uint32(p.To.Reg), REGTMP, uint32(r)) + if p.As == AREMU { + o4 = o3 + + /* Clear top 32 bits */ + o3 = OP_RLW(OP_RLDIC, REGTMP, REGTMP, 0, 0, 0) | 1<<5 + } + + case 51: /* remd[u] r1[,r2],r3 */ + r := int(p.Reg) + + if r == 0 { + r = int(p.To.Reg) + } + v := c.oprrr(p.As) + t := v & (1<<10 | 1) /* OE|Rc */ + o1 = AOP_RRR(v&^t, REGTMP, uint32(r), uint32(p.From.Reg)) + o2 = AOP_RRR(OP_MULLD, REGTMP, REGTMP, uint32(p.From.Reg)) + o3 = AOP_RRR(OP_SUBF|t, uint32(p.To.Reg), REGTMP, uint32(r)) + /* cases 50,51: removed; can be reused. */ + + /* cases 50,51: removed; can be reused. */ + + case 52: /* mtfsbNx cr(n) */ + v := c.regoff(&p.From) & 31 + + o1 = AOP_RRR(c.oprrr(p.As), uint32(v), 0, 0) + + case 53: /* mffsX ,fr1 */ + o1 = AOP_RRR(OP_MFFS, uint32(p.To.Reg), 0, 0) + + case 55: /* op Rb, Rd */ + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), 0, uint32(p.From.Reg)) + + case 56: /* sra $sh,[s,]a; srd $sh,[s,]a */ + v := c.regoff(&p.From) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 = AOP_RRR(c.opirr(p.As), uint32(r), uint32(p.To.Reg), uint32(v)&31) + if (p.As == ASRAD || p.As == ASRADCC) && (v&0x20 != 0) { + o1 |= 1 << 1 /* mb[5] */ + } + + case 57: /* slw $sh,[s,]a -> rlwinm ... */ + v := c.regoff(&p.From) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + + /* + * Let user (gs) shoot himself in the foot. + * qc has already complained. + * + if(v < 0 || v > 31) + ctxt->diag("illegal shift %ld\n%v", v, p); + */ + if v < 0 { + v = 0 + } else if v > 32 { + v = 32 + } + var mask [2]uint8 + switch p.As { + case AROTLW: + mask[0], mask[1] = 0, 31 + case ASRW, ASRWCC: + mask[0], mask[1] = uint8(v), 31 + v = 32 - v + default: + mask[0], mask[1] = 0, uint8(31-v) + } + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(r), uint32(v), uint32(mask[0]), uint32(mask[1])) + if p.As == ASLWCC || p.As == ASRWCC { + o1 |= 1 // set the condition code + } + + case 58: /* logical $andcon,[s],a */ + v := c.regoff(&p.From) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 = LOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v)) + + case 59: /* or/xor/and $ucon,,r | oris/xoris/andis $addcon,r,r */ + v := c.regoff(&p.From) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + switch p.As { + case AOR: + o1 = LOP_IRR(c.opirr(AORIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16) /* oris, xoris, andis. */ + case AXOR: + o1 = LOP_IRR(c.opirr(AXORIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16) + case AANDCC: + o1 = LOP_IRR(c.opirr(AANDISCC), uint32(p.To.Reg), uint32(r), uint32(v)>>16) + default: + o1 = LOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v)) + } + + case 60: /* tw to,a,b */ + r := int(c.regoff(&p.From) & 31) + + o1 = AOP_RRR(c.oprrr(p.As), uint32(r), uint32(p.Reg), uint32(p.To.Reg)) + + case 61: /* tw to,a,$simm */ + r := int(c.regoff(&p.From) & 31) + + v := c.regoff(&p.To) + o1 = AOP_IRR(c.opirr(p.As), uint32(r), uint32(p.Reg), uint32(v)) + + case 62: /* rlwmi $sh,s,$mask,a */ + v := c.regoff(&p.From) + switch p.As { + case ACLRLSLWI: + n := c.regoff(p.GetFrom3()) + // This is an extended mnemonic described in the ISA C.8.2 + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n + // It maps onto rlwinm which is directly generated here. + if n > v || v >= 32 { + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) + } + + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) + default: + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + } + + case 63: /* rlwmi b,s,$mask,a */ + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + + case 64: /* mtfsf fr[, $m] {,fpcsr} */ + var v int32 + if p.From3Type() != obj.TYPE_NONE { + v = c.regoff(p.GetFrom3()) & 255 + } else { + v = 255 + } + o1 = OP_MTFSF | uint32(v)<<17 | uint32(p.From.Reg)<<11 + + case 65: /* MOVFL $imm,FPSCR(n) => mtfsfi crfd,imm */ + if p.To.Reg == 0 { + c.ctxt.Diag("must specify FPSCR(n)\n%v", p) + } + o1 = OP_MTFSFI | (uint32(p.To.Reg)&15)<<23 | (uint32(c.regoff(&p.From))&31)<<12 + + case 66: /* mov spr,r1; mov r1,spr */ + var r int + var v int32 + if REG_R0 <= p.From.Reg && p.From.Reg <= REG_R31 { + r = int(p.From.Reg) + v = int32(p.To.Reg) + o1 = OPVCC(31, 467, 0, 0) /* mtspr */ + } else { + r = int(p.To.Reg) + v = int32(p.From.Reg) + o1 = OPVCC(31, 339, 0, 0) /* mfspr */ + } + + o1 = AOP_RRR(o1, uint32(r), 0, 0) | (uint32(v)&0x1f)<<16 | ((uint32(v)>>5)&0x1f)<<11 + + case 67: /* mcrf crfD,crfS */ + if p.From.Reg == REG_CR || p.To.Reg == REG_CR { + c.ctxt.Diag("CR argument must be a conditional register field (CR0-CR7)\n%v", p) + } + o1 = AOP_RRR(OP_MCRF, ((uint32(p.To.Reg) & 7) << 2), ((uint32(p.From.Reg) & 7) << 2), 0) + + case 68: /* mfcr rD; mfocrf CRM,rD */ + o1 = AOP_RRR(OP_MFCR, uint32(p.To.Reg), 0, 0) /* form, whole register */ + if p.From.Reg != REG_CR { + v := uint32(1) << uint(7-(p.From.Reg&7)) /* CR(n) */ + o1 |= 1<<20 | v<<12 /* new form, mfocrf */ + } + + case 69: /* mtcrf CRM,rS, mtocrf CRx,rS */ + var v uint32 + if p.To.Reg == REG_CR { + v = 0xff + } else if p.To.Offset != 0 { // MOVFL gpr, constant + v = uint32(p.To.Offset) + } else { // p.To.Reg == REG_CRx + v = 1 << uint(7-(p.To.Reg&7)) + } + // Use mtocrf form if only one CR field moved. + if bits.OnesCount32(v) == 1 { + v |= 1 << 8 + } + + o1 = AOP_RRR(OP_MTCRF, uint32(p.From.Reg), 0, 0) | uint32(v)<<12 + + case 70: /* [f]cmp r,r,cr*/ + var r int + if p.Reg == 0 { + r = 0 + } else { + r = (int(p.Reg) & 7) << 2 + } + o1 = AOP_RRR(c.oprrr(p.As), uint32(r), uint32(p.From.Reg), uint32(p.To.Reg)) + + case 71: /* cmp[l] r,i,cr*/ + var r int + if p.Reg == 0 { + r = 0 + } else { + r = (int(p.Reg) & 7) << 2 + } + o1 = AOP_RRR(c.opirr(p.As), uint32(r), uint32(p.From.Reg), 0) | uint32(c.regoff(&p.To))&0xffff + + case 72: /* slbmte (Rb+Rs -> slb[Rb]) -> Rs, Rb */ + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.From.Reg), 0, uint32(p.To.Reg)) + + case 73: /* mcrfs crfD,crfS */ + if p.From.Type != obj.TYPE_REG || p.From.Reg != REG_FPSCR || p.To.Type != obj.TYPE_REG || p.To.Reg < REG_CR0 || REG_CR7 < p.To.Reg { + c.ctxt.Diag("illegal FPSCR/CR field number\n%v", p) + } + o1 = AOP_RRR(OP_MCRFS, ((uint32(p.To.Reg) & 7) << 2), ((0 & 7) << 2), 0) + + case 77: /* syscall $scon, syscall Rx */ + if p.From.Type == obj.TYPE_CONST { + if p.From.Offset > BIG || p.From.Offset < -BIG { + c.ctxt.Diag("illegal syscall, sysnum too large: %v", p) + } + o1 = AOP_IRR(OP_ADDI, REGZERO, REGZERO, uint32(p.From.Offset)) + } else if p.From.Type == obj.TYPE_REG { + o1 = LOP_RRR(OP_OR, REGZERO, uint32(p.From.Reg), uint32(p.From.Reg)) + } else { + c.ctxt.Diag("illegal syscall: %v", p) + o1 = 0x7fe00008 // trap always + } + + o2 = c.oprrr(p.As) + o3 = AOP_RRR(c.oprrr(AXOR), REGZERO, REGZERO, REGZERO) // XOR R0, R0 + + case 78: /* undef */ + o1 = 0 /* "An instruction consisting entirely of binary 0s is guaranteed + always to be an illegal instruction." */ + + /* relocation operations */ + case 74: + v := c.vregoff(&p.To) + // Offsets in DS form stores must be a multiple of 4 + inst := c.opstore(p.As) + if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + // Can't reuse base for store instructions. + o1, o2 = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false) + + case 75: // 32 bit offset symbol loads (got/toc/addr) + v := p.From.Offset + + // Offsets in DS form loads must be a multiple of 4 + inst := c.opload(p.As) + if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + switch p.From.Name { + case obj.NAME_GOTREF, obj.NAME_TOCREF: + if v != 0 { + c.ctxt.Diag("invalid offset for GOT/TOC access %v", p) + } + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) + o2 = AOP_IRR(inst, uint32(p.To.Reg), uint32(p.To.Reg), 0) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + switch p.From.Name { + case obj.NAME_GOTREF: + rel.Type = objabi.R_ADDRPOWER_GOT + case obj.NAME_TOCREF: + rel.Type = objabi.R_ADDRPOWER_TOCREL_DS + } + default: + reuseBaseReg := o.a6 == C_REG + // Reuse To.Reg as base register if it is a GPR. + o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg) + } + + o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) + + case 79: + if p.From.Offset != 0 { + c.ctxt.Diag("invalid offset against tls var %v", p) + } + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0) + o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Type = objabi.R_POWER_TLS_LE + + case 80: + if p.From.Offset != 0 { + c.ctxt.Diag("invalid offset against tls var %v", p) + } + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) + o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0) + o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Type = objabi.R_POWER_TLS_IE + rel = obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + 8 + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Type = objabi.R_POWER_TLS + + case 82: /* vector instructions, VX-form and VC-form */ + if p.From.Type == obj.TYPE_REG { + /* reg reg none OR reg reg reg */ + /* 3-register operand order: VRA, VRB, VRT */ + /* 2-register operand order: VRA, VRT */ + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg)) + } else if p.From3Type() == obj.TYPE_CONST { + /* imm imm reg reg */ + /* operand order: SIX, VRA, ST, VRT */ + six := int(c.regoff(&p.From)) + st := int(c.regoff(p.GetFrom3())) + o1 = AOP_IIRR(c.opiirr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(st), uint32(six)) + } else if p.From3Type() == obj.TYPE_NONE && p.Reg != 0 { + /* imm reg reg */ + /* operand order: UIM, VRB, VRT */ + uim := int(c.regoff(&p.From)) + o1 = AOP_VIRR(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(uim)) + } else { + /* imm reg */ + /* operand order: SIM, VRT */ + sim := int(c.regoff(&p.From)) + o1 = AOP_IR(c.opirr(p.As), uint32(p.To.Reg), uint32(sim)) + } + + case 83: /* vector instructions, VA-form */ + if p.From.Type == obj.TYPE_REG { + /* reg reg reg reg */ + /* 4-register operand order: VRA, VRB, VRC, VRT */ + o1 = AOP_RRRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg)) + } else if p.From.Type == obj.TYPE_CONST { + /* imm reg reg reg */ + /* operand order: SHB, VRA, VRB, VRT */ + shb := int(c.regoff(&p.From)) + o1 = AOP_IRRR(c.opirrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), uint32(shb)) + } + + case 84: // ISEL BC,RA,RB,RT -> isel rt,ra,rb,bc + bc := c.vregoff(&p.From) + if o.a1 == C_CRBIT { + // CR bit is encoded as a register, not a constant. + bc = int64(p.From.Reg) + } + + // rt = To.Reg, ra = p.Reg, rb = p.From3.Reg + o1 = AOP_ISEL(OP_ISEL, uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), uint32(bc)) + + case 85: /* vector instructions, VX-form */ + /* reg none reg */ + /* 2-register operand order: VRB, VRT */ + o1 = AOP_RR(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg)) + + case 86: /* VSX indexed store, XX1-form */ + /* reg reg reg */ + /* 3-register operand order: XT, (RB)(RA*1) */ + o1 = AOP_XX1(c.opstorex(p.As), uint32(p.From.Reg), uint32(p.To.Index), uint32(p.To.Reg)) + + case 87: /* VSX indexed load, XX1-form */ + /* reg reg reg */ + /* 3-register operand order: (RB)(RA*1), XT */ + o1 = AOP_XX1(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg)) + + case 88: /* VSX mfvsr* instructions, XX1-form XS,RA */ + o1 = AOP_XX1(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg)) + + case 89: /* VSX instructions, XX2-form */ + /* reg none reg OR reg imm reg */ + /* 2-register operand order: XB, XT or XB, UIM, XT*/ + uim := int(c.regoff(p.GetFrom3())) + o1 = AOP_XX2(c.oprrr(p.As), uint32(p.To.Reg), uint32(uim), uint32(p.From.Reg)) + + case 90: /* VSX instructions, XX3-form */ + if p.From3Type() == obj.TYPE_NONE { + /* reg reg reg */ + /* 3-register operand order: XA, XB, XT */ + o1 = AOP_XX3(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg)) + } else if p.From3Type() == obj.TYPE_CONST { + /* reg reg reg imm */ + /* operand order: XA, XB, DM, XT */ + dm := int(c.regoff(p.GetFrom3())) + o1 = AOP_XX3I(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), uint32(dm)) + } + + case 91: /* VSX instructions, XX4-form */ + /* reg reg reg reg */ + /* 3-register operand order: XA, XB, XC, XT */ + o1 = AOP_XX4(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg)) + + case 92: /* X-form instructions, 3-operands */ + if p.To.Type == obj.TYPE_CONST { + /* imm reg reg */ + xf := int32(p.From.Reg) + if REG_F0 <= xf && xf <= REG_F31 { + /* operand order: FRA, FRB, BF */ + bf := int(c.regoff(&p.To)) << 2 + o1 = AOP_RRR(c.opirr(p.As), uint32(bf), uint32(p.From.Reg), uint32(p.Reg)) + } else { + /* operand order: RA, RB, L */ + l := int(c.regoff(&p.To)) + o1 = AOP_RRR(c.opirr(p.As), uint32(l), uint32(p.From.Reg), uint32(p.Reg)) + } + } else if p.From3Type() == obj.TYPE_CONST { + /* reg reg imm */ + /* operand order: RB, L, RA */ + l := int(c.regoff(p.GetFrom3())) + o1 = AOP_RRR(c.opirr(p.As), uint32(l), uint32(p.To.Reg), uint32(p.From.Reg)) + } else if p.To.Type == obj.TYPE_REG { + cr := int32(p.To.Reg) + if REG_CR0 <= cr && cr <= REG_CR7 { + /* cr reg reg */ + /* operand order: RA, RB, BF */ + bf := (int(p.To.Reg) & 7) << 2 + o1 = AOP_RRR(c.opirr(p.As), uint32(bf), uint32(p.From.Reg), uint32(p.Reg)) + } else if p.From.Type == obj.TYPE_CONST { + /* reg imm */ + /* operand order: L, RT */ + l := int(c.regoff(&p.From)) + o1 = AOP_RRR(c.opirr(p.As), uint32(p.To.Reg), uint32(l), uint32(p.Reg)) + } else { + switch p.As { + case ACOPY, APASTECC: + o1 = AOP_RRR(c.opirr(p.As), uint32(1), uint32(p.From.Reg), uint32(p.To.Reg)) + default: + /* reg reg reg */ + /* operand order: RS, RB, RA */ + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg)) + } + } + } + + case 93: /* X-form instructions, 2-operands */ + if p.To.Type == obj.TYPE_CONST { + /* imm reg */ + /* operand order: FRB, BF */ + bf := int(c.regoff(&p.To)) << 2 + o1 = AOP_RR(c.opirr(p.As), uint32(bf), uint32(p.From.Reg)) + } else if p.Reg == 0 { + /* popcnt* r,r, X-form */ + /* operand order: RS, RA */ + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg)) + } + + case 94: /* Z23-form instructions, 4-operands */ + /* reg reg reg imm */ + /* operand order: RA, RB, CY, RT */ + cy := int(c.regoff(p.GetFrom3())) + o1 = AOP_Z23I(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), uint32(cy)) + + case 96: /* VSX load, DQ-form */ + /* reg imm reg */ + /* operand order: (RA)(DQ), XT */ + dq := int16(c.regoff(&p.From)) + if (dq & 15) != 0 { + c.ctxt.Diag("invalid offset for DQ form load/store %v", dq) + } + o1 = AOP_DQ(c.opload(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(dq)) + + case 97: /* VSX store, DQ-form */ + /* reg imm reg */ + /* operand order: XT, (RA)(DQ) */ + dq := int16(c.regoff(&p.To)) + if (dq & 15) != 0 { + c.ctxt.Diag("invalid offset for DQ form load/store %v", dq) + } + o1 = AOP_DQ(c.opstore(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(dq)) + case 98: /* VSX indexed load or load with length (also left-justified), x-form */ + /* vsreg, reg, reg */ + o1 = AOP_XX1(c.opload(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg)) + case 99: /* VSX store with length (also left-justified) x-form */ + /* reg, reg, vsreg */ + o1 = AOP_XX1(c.opstore(p.As), uint32(p.From.Reg), uint32(p.Reg), uint32(p.To.Reg)) + case 100: /* VSX X-form XXSPLTIB */ + if p.From.Type == obj.TYPE_CONST { + /* imm reg */ + uim := int(c.regoff(&p.From)) + /* imm reg */ + /* Use AOP_XX1 form with 0 for one of the registers. */ + o1 = AOP_XX1(c.oprrr(p.As), uint32(p.To.Reg), uint32(0), uint32(uim)) + } else { + c.ctxt.Diag("invalid ops for %v", p.As) + } + case 101: + o1 = AOP_XX2(c.oprrr(p.As), uint32(p.To.Reg), uint32(0), uint32(p.From.Reg)) + + case 102: /* RLWMI $sh,rs,$mb,$me,rt (M-form opcode)*/ + mb := uint32(c.regoff(&p.RestArgs[0].Addr)) + me := uint32(c.regoff(&p.RestArgs[1].Addr)) + sh := uint32(c.regoff(&p.From)) + o1 = OP_RLW(c.opirr(p.As), uint32(p.To.Reg), uint32(p.Reg), sh, mb, me) + + case 103: /* RLWMI rb,rs,$mb,$me,rt (M-form opcode)*/ + mb := uint32(c.regoff(&p.RestArgs[0].Addr)) + me := uint32(c.regoff(&p.RestArgs[1].Addr)) + o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me) + + case 104: /* VSX mtvsr* instructions, XX1-form RA,RB,XT */ + o1 = AOP_XX1(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg)) + + case 106: /* MOVD spr, soreg */ + v := int32(p.From.Reg) + o1 = OPVCC(31, 339, 0, 0) /* mfspr */ + o1 = AOP_RRR(o1, uint32(REGTMP), 0, 0) | (uint32(v)&0x1f)<<16 | ((uint32(v)>>5)&0x1f)<<11 + so := c.regoff(&p.To) + o2 = AOP_IRR(c.opstore(AMOVD), uint32(REGTMP), uint32(p.To.Reg), uint32(so)) + if so&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + if p.To.Reg == REGTMP { + log.Fatalf("SPR move to memory will clobber R31 %v", p) + } + + case 107: /* MOVD soreg, spr */ + v := int32(p.From.Reg) + so := c.regoff(&p.From) + o1 = AOP_IRR(c.opload(AMOVD), uint32(REGTMP), uint32(v), uint32(so)) + o2 = OPVCC(31, 467, 0, 0) /* mtspr */ + v = int32(p.To.Reg) + o2 = AOP_RRR(o2, uint32(REGTMP), 0, 0) | (uint32(v)&0x1f)<<16 | ((uint32(v)>>5)&0x1f)<<11 + if so&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + + case 108: /* mov r, xoreg ==> stwx rx,ry */ + r := int(p.To.Reg) + o1 = AOP_RRR(c.opstorex(p.As), uint32(p.From.Reg), uint32(p.To.Index), uint32(r)) + + case 109: /* mov xoreg, r ==> lbzx/lhzx/lwzx rx,ry, lbzx rx,ry + extsb r,r */ + r := int(p.From.Reg) + + o1 = AOP_RRR(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(r)) + // Sign extend MOVB operations. This is ignored for other cases (o.size == 4). + o2 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) + } + + out[0] = o1 + out[1] = o2 + out[2] = o3 + out[3] = o4 + out[4] = o5 +} + +func (c *ctxt9) vregoff(a *obj.Addr) int64 { + c.instoffset = 0 + if a != nil { + c.aclass(a) + } + return c.instoffset +} + +func (c *ctxt9) regoff(a *obj.Addr) int32 { + return int32(c.vregoff(a)) +} + +func (c *ctxt9) oprrr(a obj.As) uint32 { + switch a { + case AADD: + return OPVCC(31, 266, 0, 0) + case AADDCC: + return OPVCC(31, 266, 0, 1) + case AADDV: + return OPVCC(31, 266, 1, 0) + case AADDVCC: + return OPVCC(31, 266, 1, 1) + case AADDC: + return OPVCC(31, 10, 0, 0) + case AADDCCC: + return OPVCC(31, 10, 0, 1) + case AADDCV: + return OPVCC(31, 10, 1, 0) + case AADDCVCC: + return OPVCC(31, 10, 1, 1) + case AADDE: + return OPVCC(31, 138, 0, 0) + case AADDECC: + return OPVCC(31, 138, 0, 1) + case AADDEV: + return OPVCC(31, 138, 1, 0) + case AADDEVCC: + return OPVCC(31, 138, 1, 1) + case AADDME: + return OPVCC(31, 234, 0, 0) + case AADDMECC: + return OPVCC(31, 234, 0, 1) + case AADDMEV: + return OPVCC(31, 234, 1, 0) + case AADDMEVCC: + return OPVCC(31, 234, 1, 1) + case AADDZE: + return OPVCC(31, 202, 0, 0) + case AADDZECC: + return OPVCC(31, 202, 0, 1) + case AADDZEV: + return OPVCC(31, 202, 1, 0) + case AADDZEVCC: + return OPVCC(31, 202, 1, 1) + case AADDEX: + return OPVCC(31, 170, 0, 0) /* addex - v3.0b */ + + case AAND: + return OPVCC(31, 28, 0, 0) + case AANDCC: + return OPVCC(31, 28, 0, 1) + case AANDN: + return OPVCC(31, 60, 0, 0) + case AANDNCC: + return OPVCC(31, 60, 0, 1) + + case ACMP: + return OPVCC(31, 0, 0, 0) | 1<<21 /* L=1 */ + case ACMPU: + return OPVCC(31, 32, 0, 0) | 1<<21 + case ACMPW: + return OPVCC(31, 0, 0, 0) /* L=0 */ + case ACMPWU: + return OPVCC(31, 32, 0, 0) + case ACMPB: + return OPVCC(31, 508, 0, 0) /* cmpb - v2.05 */ + case ACMPEQB: + return OPVCC(31, 224, 0, 0) /* cmpeqb - v3.00 */ + + case ACNTLZW: + return OPVCC(31, 26, 0, 0) + case ACNTLZWCC: + return OPVCC(31, 26, 0, 1) + case ACNTLZD: + return OPVCC(31, 58, 0, 0) + case ACNTLZDCC: + return OPVCC(31, 58, 0, 1) + + case ACRAND: + return OPVCC(19, 257, 0, 0) + case ACRANDN: + return OPVCC(19, 129, 0, 0) + case ACREQV: + return OPVCC(19, 289, 0, 0) + case ACRNAND: + return OPVCC(19, 225, 0, 0) + case ACRNOR: + return OPVCC(19, 33, 0, 0) + case ACROR: + return OPVCC(19, 449, 0, 0) + case ACRORN: + return OPVCC(19, 417, 0, 0) + case ACRXOR: + return OPVCC(19, 193, 0, 0) + + case ADCBF: + return OPVCC(31, 86, 0, 0) + case ADCBI: + return OPVCC(31, 470, 0, 0) + case ADCBST: + return OPVCC(31, 54, 0, 0) + case ADCBT: + return OPVCC(31, 278, 0, 0) + case ADCBTST: + return OPVCC(31, 246, 0, 0) + case ADCBZ: + return OPVCC(31, 1014, 0, 0) + + case AMODUD: + return OPVCC(31, 265, 0, 0) /* modud - v3.0 */ + case AMODUW: + return OPVCC(31, 267, 0, 0) /* moduw - v3.0 */ + case AMODSD: + return OPVCC(31, 777, 0, 0) /* modsd - v3.0 */ + case AMODSW: + return OPVCC(31, 779, 0, 0) /* modsw - v3.0 */ + + case ADIVW, AREM: + return OPVCC(31, 491, 0, 0) + + case ADIVWCC: + return OPVCC(31, 491, 0, 1) + + case ADIVWV: + return OPVCC(31, 491, 1, 0) + + case ADIVWVCC: + return OPVCC(31, 491, 1, 1) + + case ADIVWU, AREMU: + return OPVCC(31, 459, 0, 0) + + case ADIVWUCC: + return OPVCC(31, 459, 0, 1) + + case ADIVWUV: + return OPVCC(31, 459, 1, 0) + + case ADIVWUVCC: + return OPVCC(31, 459, 1, 1) + + case ADIVD, AREMD: + return OPVCC(31, 489, 0, 0) + + case ADIVDCC: + return OPVCC(31, 489, 0, 1) + + case ADIVDE: + return OPVCC(31, 425, 0, 0) + + case ADIVDECC: + return OPVCC(31, 425, 0, 1) + + case ADIVDEU: + return OPVCC(31, 393, 0, 0) + + case ADIVDEUCC: + return OPVCC(31, 393, 0, 1) + + case ADIVDV: + return OPVCC(31, 489, 1, 0) + + case ADIVDVCC: + return OPVCC(31, 489, 1, 1) + + case ADIVDU, AREMDU: + return OPVCC(31, 457, 0, 0) + + case ADIVDUCC: + return OPVCC(31, 457, 0, 1) + + case ADIVDUV: + return OPVCC(31, 457, 1, 0) + + case ADIVDUVCC: + return OPVCC(31, 457, 1, 1) + + case AEIEIO: + return OPVCC(31, 854, 0, 0) + + case AEQV: + return OPVCC(31, 284, 0, 0) + case AEQVCC: + return OPVCC(31, 284, 0, 1) + + case AEXTSB: + return OPVCC(31, 954, 0, 0) + case AEXTSBCC: + return OPVCC(31, 954, 0, 1) + case AEXTSH: + return OPVCC(31, 922, 0, 0) + case AEXTSHCC: + return OPVCC(31, 922, 0, 1) + case AEXTSW: + return OPVCC(31, 986, 0, 0) + case AEXTSWCC: + return OPVCC(31, 986, 0, 1) + + case AFABS: + return OPVCC(63, 264, 0, 0) + case AFABSCC: + return OPVCC(63, 264, 0, 1) + case AFADD: + return OPVCC(63, 21, 0, 0) + case AFADDCC: + return OPVCC(63, 21, 0, 1) + case AFADDS: + return OPVCC(59, 21, 0, 0) + case AFADDSCC: + return OPVCC(59, 21, 0, 1) + case AFCMPO: + return OPVCC(63, 32, 0, 0) + case AFCMPU: + return OPVCC(63, 0, 0, 0) + case AFCFID: + return OPVCC(63, 846, 0, 0) + case AFCFIDCC: + return OPVCC(63, 846, 0, 1) + case AFCFIDU: + return OPVCC(63, 974, 0, 0) + case AFCFIDUCC: + return OPVCC(63, 974, 0, 1) + case AFCFIDS: + return OPVCC(59, 846, 0, 0) + case AFCFIDSCC: + return OPVCC(59, 846, 0, 1) + case AFCTIW: + return OPVCC(63, 14, 0, 0) + case AFCTIWCC: + return OPVCC(63, 14, 0, 1) + case AFCTIWZ: + return OPVCC(63, 15, 0, 0) + case AFCTIWZCC: + return OPVCC(63, 15, 0, 1) + case AFCTID: + return OPVCC(63, 814, 0, 0) + case AFCTIDCC: + return OPVCC(63, 814, 0, 1) + case AFCTIDZ: + return OPVCC(63, 815, 0, 0) + case AFCTIDZCC: + return OPVCC(63, 815, 0, 1) + case AFDIV: + return OPVCC(63, 18, 0, 0) + case AFDIVCC: + return OPVCC(63, 18, 0, 1) + case AFDIVS: + return OPVCC(59, 18, 0, 0) + case AFDIVSCC: + return OPVCC(59, 18, 0, 1) + case AFMADD: + return OPVCC(63, 29, 0, 0) + case AFMADDCC: + return OPVCC(63, 29, 0, 1) + case AFMADDS: + return OPVCC(59, 29, 0, 0) + case AFMADDSCC: + return OPVCC(59, 29, 0, 1) + + case AFMOVS, AFMOVD: + return OPVCC(63, 72, 0, 0) /* load */ + case AFMOVDCC: + return OPVCC(63, 72, 0, 1) + case AFMSUB: + return OPVCC(63, 28, 0, 0) + case AFMSUBCC: + return OPVCC(63, 28, 0, 1) + case AFMSUBS: + return OPVCC(59, 28, 0, 0) + case AFMSUBSCC: + return OPVCC(59, 28, 0, 1) + case AFMUL: + return OPVCC(63, 25, 0, 0) + case AFMULCC: + return OPVCC(63, 25, 0, 1) + case AFMULS: + return OPVCC(59, 25, 0, 0) + case AFMULSCC: + return OPVCC(59, 25, 0, 1) + case AFNABS: + return OPVCC(63, 136, 0, 0) + case AFNABSCC: + return OPVCC(63, 136, 0, 1) + case AFNEG: + return OPVCC(63, 40, 0, 0) + case AFNEGCC: + return OPVCC(63, 40, 0, 1) + case AFNMADD: + return OPVCC(63, 31, 0, 0) + case AFNMADDCC: + return OPVCC(63, 31, 0, 1) + case AFNMADDS: + return OPVCC(59, 31, 0, 0) + case AFNMADDSCC: + return OPVCC(59, 31, 0, 1) + case AFNMSUB: + return OPVCC(63, 30, 0, 0) + case AFNMSUBCC: + return OPVCC(63, 30, 0, 1) + case AFNMSUBS: + return OPVCC(59, 30, 0, 0) + case AFNMSUBSCC: + return OPVCC(59, 30, 0, 1) + case AFCPSGN: + return OPVCC(63, 8, 0, 0) + case AFCPSGNCC: + return OPVCC(63, 8, 0, 1) + case AFRES: + return OPVCC(59, 24, 0, 0) + case AFRESCC: + return OPVCC(59, 24, 0, 1) + case AFRIM: + return OPVCC(63, 488, 0, 0) + case AFRIMCC: + return OPVCC(63, 488, 0, 1) + case AFRIP: + return OPVCC(63, 456, 0, 0) + case AFRIPCC: + return OPVCC(63, 456, 0, 1) + case AFRIZ: + return OPVCC(63, 424, 0, 0) + case AFRIZCC: + return OPVCC(63, 424, 0, 1) + case AFRIN: + return OPVCC(63, 392, 0, 0) + case AFRINCC: + return OPVCC(63, 392, 0, 1) + case AFRSP: + return OPVCC(63, 12, 0, 0) + case AFRSPCC: + return OPVCC(63, 12, 0, 1) + case AFRSQRTE: + return OPVCC(63, 26, 0, 0) + case AFRSQRTECC: + return OPVCC(63, 26, 0, 1) + case AFSEL: + return OPVCC(63, 23, 0, 0) + case AFSELCC: + return OPVCC(63, 23, 0, 1) + case AFSQRT: + return OPVCC(63, 22, 0, 0) + case AFSQRTCC: + return OPVCC(63, 22, 0, 1) + case AFSQRTS: + return OPVCC(59, 22, 0, 0) + case AFSQRTSCC: + return OPVCC(59, 22, 0, 1) + case AFSUB: + return OPVCC(63, 20, 0, 0) + case AFSUBCC: + return OPVCC(63, 20, 0, 1) + case AFSUBS: + return OPVCC(59, 20, 0, 0) + case AFSUBSCC: + return OPVCC(59, 20, 0, 1) + + case AICBI: + return OPVCC(31, 982, 0, 0) + case AISYNC: + return OPVCC(19, 150, 0, 0) + + case AMTFSB0: + return OPVCC(63, 70, 0, 0) + case AMTFSB0CC: + return OPVCC(63, 70, 0, 1) + case AMTFSB1: + return OPVCC(63, 38, 0, 0) + case AMTFSB1CC: + return OPVCC(63, 38, 0, 1) + + case AMULHW: + return OPVCC(31, 75, 0, 0) + case AMULHWCC: + return OPVCC(31, 75, 0, 1) + case AMULHWU: + return OPVCC(31, 11, 0, 0) + case AMULHWUCC: + return OPVCC(31, 11, 0, 1) + case AMULLW: + return OPVCC(31, 235, 0, 0) + case AMULLWCC: + return OPVCC(31, 235, 0, 1) + case AMULLWV: + return OPVCC(31, 235, 1, 0) + case AMULLWVCC: + return OPVCC(31, 235, 1, 1) + + case AMULHD: + return OPVCC(31, 73, 0, 0) + case AMULHDCC: + return OPVCC(31, 73, 0, 1) + case AMULHDU: + return OPVCC(31, 9, 0, 0) + case AMULHDUCC: + return OPVCC(31, 9, 0, 1) + case AMULLD: + return OPVCC(31, 233, 0, 0) + case AMULLDCC: + return OPVCC(31, 233, 0, 1) + case AMULLDV: + return OPVCC(31, 233, 1, 0) + case AMULLDVCC: + return OPVCC(31, 233, 1, 1) + + case ANAND: + return OPVCC(31, 476, 0, 0) + case ANANDCC: + return OPVCC(31, 476, 0, 1) + case ANEG: + return OPVCC(31, 104, 0, 0) + case ANEGCC: + return OPVCC(31, 104, 0, 1) + case ANEGV: + return OPVCC(31, 104, 1, 0) + case ANEGVCC: + return OPVCC(31, 104, 1, 1) + case ANOR: + return OPVCC(31, 124, 0, 0) + case ANORCC: + return OPVCC(31, 124, 0, 1) + case AOR: + return OPVCC(31, 444, 0, 0) + case AORCC: + return OPVCC(31, 444, 0, 1) + case AORN: + return OPVCC(31, 412, 0, 0) + case AORNCC: + return OPVCC(31, 412, 0, 1) + + case APOPCNTD: + return OPVCC(31, 506, 0, 0) /* popcntd - v2.06 */ + case APOPCNTW: + return OPVCC(31, 378, 0, 0) /* popcntw - v2.06 */ + case APOPCNTB: + return OPVCC(31, 122, 0, 0) /* popcntb - v2.02 */ + case ACNTTZW: + return OPVCC(31, 538, 0, 0) /* cnttzw - v3.00 */ + case ACNTTZWCC: + return OPVCC(31, 538, 0, 1) /* cnttzw. - v3.00 */ + case ACNTTZD: + return OPVCC(31, 570, 0, 0) /* cnttzd - v3.00 */ + case ACNTTZDCC: + return OPVCC(31, 570, 0, 1) /* cnttzd. - v3.00 */ + + case ARFI: + return OPVCC(19, 50, 0, 0) + case ARFCI: + return OPVCC(19, 51, 0, 0) + case ARFID: + return OPVCC(19, 18, 0, 0) + case AHRFID: + return OPVCC(19, 274, 0, 0) + + case ARLWMI: + return OPVCC(20, 0, 0, 0) + case ARLWMICC: + return OPVCC(20, 0, 0, 1) + case ARLWNM: + return OPVCC(23, 0, 0, 0) + case ARLWNMCC: + return OPVCC(23, 0, 0, 1) + + case ARLDCL: + return OPVCC(30, 8, 0, 0) + case ARLDCLCC: + return OPVCC(30, 0, 0, 1) + + case ARLDCR: + return OPVCC(30, 9, 0, 0) + case ARLDCRCC: + return OPVCC(30, 9, 0, 1) + + case ARLDICL: + return OPVCC(30, 0, 0, 0) + case ARLDICLCC: + return OPVCC(30, 0, 0, 1) + case ARLDICR: + return OPMD(30, 1, 0) // rldicr + case ARLDICRCC: + return OPMD(30, 1, 1) // rldicr. + + case ARLDIC: + return OPMD(30, 2, 0) // rldic + case ARLDICCC: + return OPMD(30, 2, 1) // rldic. + + case ASYSCALL: + return OPVCC(17, 1, 0, 0) + + case ASLW: + return OPVCC(31, 24, 0, 0) + case ASLWCC: + return OPVCC(31, 24, 0, 1) + case ASLD: + return OPVCC(31, 27, 0, 0) + case ASLDCC: + return OPVCC(31, 27, 0, 1) + + case ASRAW: + return OPVCC(31, 792, 0, 0) + case ASRAWCC: + return OPVCC(31, 792, 0, 1) + case ASRAD: + return OPVCC(31, 794, 0, 0) + case ASRADCC: + return OPVCC(31, 794, 0, 1) + + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) + + case ASRW: + return OPVCC(31, 536, 0, 0) + case ASRWCC: + return OPVCC(31, 536, 0, 1) + case ASRD: + return OPVCC(31, 539, 0, 0) + case ASRDCC: + return OPVCC(31, 539, 0, 1) + + case ASUB: + return OPVCC(31, 40, 0, 0) + case ASUBCC: + return OPVCC(31, 40, 0, 1) + case ASUBV: + return OPVCC(31, 40, 1, 0) + case ASUBVCC: + return OPVCC(31, 40, 1, 1) + case ASUBC: + return OPVCC(31, 8, 0, 0) + case ASUBCCC: + return OPVCC(31, 8, 0, 1) + case ASUBCV: + return OPVCC(31, 8, 1, 0) + case ASUBCVCC: + return OPVCC(31, 8, 1, 1) + case ASUBE: + return OPVCC(31, 136, 0, 0) + case ASUBECC: + return OPVCC(31, 136, 0, 1) + case ASUBEV: + return OPVCC(31, 136, 1, 0) + case ASUBEVCC: + return OPVCC(31, 136, 1, 1) + case ASUBME: + return OPVCC(31, 232, 0, 0) + case ASUBMECC: + return OPVCC(31, 232, 0, 1) + case ASUBMEV: + return OPVCC(31, 232, 1, 0) + case ASUBMEVCC: + return OPVCC(31, 232, 1, 1) + case ASUBZE: + return OPVCC(31, 200, 0, 0) + case ASUBZECC: + return OPVCC(31, 200, 0, 1) + case ASUBZEV: + return OPVCC(31, 200, 1, 0) + case ASUBZEVCC: + return OPVCC(31, 200, 1, 1) + + case ASYNC: + return OPVCC(31, 598, 0, 0) + case ALWSYNC: + return OPVCC(31, 598, 0, 0) | 1<<21 + + case APTESYNC: + return OPVCC(31, 598, 0, 0) | 2<<21 + + case ATLBIE: + return OPVCC(31, 306, 0, 0) + case ATLBIEL: + return OPVCC(31, 274, 0, 0) + case ATLBSYNC: + return OPVCC(31, 566, 0, 0) + case ASLBIA: + return OPVCC(31, 498, 0, 0) + case ASLBIE: + return OPVCC(31, 434, 0, 0) + case ASLBMFEE: + return OPVCC(31, 915, 0, 0) + case ASLBMFEV: + return OPVCC(31, 851, 0, 0) + case ASLBMTE: + return OPVCC(31, 402, 0, 0) + + case ATW: + return OPVCC(31, 4, 0, 0) + case ATD: + return OPVCC(31, 68, 0, 0) + + /* Vector (VMX/Altivec) instructions */ + /* ISA 2.03 enables these for PPC970. For POWERx processors, these */ + /* are enabled starting at POWER6 (ISA 2.05). */ + case AVAND: + return OPVX(4, 1028, 0, 0) /* vand - v2.03 */ + case AVANDC: + return OPVX(4, 1092, 0, 0) /* vandc - v2.03 */ + case AVNAND: + return OPVX(4, 1412, 0, 0) /* vnand - v2.07 */ + + case AVOR: + return OPVX(4, 1156, 0, 0) /* vor - v2.03 */ + case AVORC: + return OPVX(4, 1348, 0, 0) /* vorc - v2.07 */ + case AVNOR: + return OPVX(4, 1284, 0, 0) /* vnor - v2.03 */ + case AVXOR: + return OPVX(4, 1220, 0, 0) /* vxor - v2.03 */ + case AVEQV: + return OPVX(4, 1668, 0, 0) /* veqv - v2.07 */ + + case AVADDUBM: + return OPVX(4, 0, 0, 0) /* vaddubm - v2.03 */ + case AVADDUHM: + return OPVX(4, 64, 0, 0) /* vadduhm - v2.03 */ + case AVADDUWM: + return OPVX(4, 128, 0, 0) /* vadduwm - v2.03 */ + case AVADDUDM: + return OPVX(4, 192, 0, 0) /* vaddudm - v2.07 */ + case AVADDUQM: + return OPVX(4, 256, 0, 0) /* vadduqm - v2.07 */ + + case AVADDCUQ: + return OPVX(4, 320, 0, 0) /* vaddcuq - v2.07 */ + case AVADDCUW: + return OPVX(4, 384, 0, 0) /* vaddcuw - v2.03 */ + + case AVADDUBS: + return OPVX(4, 512, 0, 0) /* vaddubs - v2.03 */ + case AVADDUHS: + return OPVX(4, 576, 0, 0) /* vadduhs - v2.03 */ + case AVADDUWS: + return OPVX(4, 640, 0, 0) /* vadduws - v2.03 */ + + case AVADDSBS: + return OPVX(4, 768, 0, 0) /* vaddsbs - v2.03 */ + case AVADDSHS: + return OPVX(4, 832, 0, 0) /* vaddshs - v2.03 */ + case AVADDSWS: + return OPVX(4, 896, 0, 0) /* vaddsws - v2.03 */ + + case AVADDEUQM: + return OPVX(4, 60, 0, 0) /* vaddeuqm - v2.07 */ + case AVADDECUQ: + return OPVX(4, 61, 0, 0) /* vaddecuq - v2.07 */ + + case AVMULESB: + return OPVX(4, 776, 0, 0) /* vmulesb - v2.03 */ + case AVMULOSB: + return OPVX(4, 264, 0, 0) /* vmulosb - v2.03 */ + case AVMULEUB: + return OPVX(4, 520, 0, 0) /* vmuleub - v2.03 */ + case AVMULOUB: + return OPVX(4, 8, 0, 0) /* vmuloub - v2.03 */ + case AVMULESH: + return OPVX(4, 840, 0, 0) /* vmulesh - v2.03 */ + case AVMULOSH: + return OPVX(4, 328, 0, 0) /* vmulosh - v2.03 */ + case AVMULEUH: + return OPVX(4, 584, 0, 0) /* vmuleuh - v2.03 */ + case AVMULOUH: + return OPVX(4, 72, 0, 0) /* vmulouh - v2.03 */ + case AVMULESW: + return OPVX(4, 904, 0, 0) /* vmulesw - v2.07 */ + case AVMULOSW: + return OPVX(4, 392, 0, 0) /* vmulosw - v2.07 */ + case AVMULEUW: + return OPVX(4, 648, 0, 0) /* vmuleuw - v2.07 */ + case AVMULOUW: + return OPVX(4, 136, 0, 0) /* vmulouw - v2.07 */ + case AVMULUWM: + return OPVX(4, 137, 0, 0) /* vmuluwm - v2.07 */ + + case AVPMSUMB: + return OPVX(4, 1032, 0, 0) /* vpmsumb - v2.07 */ + case AVPMSUMH: + return OPVX(4, 1096, 0, 0) /* vpmsumh - v2.07 */ + case AVPMSUMW: + return OPVX(4, 1160, 0, 0) /* vpmsumw - v2.07 */ + case AVPMSUMD: + return OPVX(4, 1224, 0, 0) /* vpmsumd - v2.07 */ + + case AVMSUMUDM: + return OPVX(4, 35, 0, 0) /* vmsumudm - v3.00b */ + + case AVSUBUBM: + return OPVX(4, 1024, 0, 0) /* vsububm - v2.03 */ + case AVSUBUHM: + return OPVX(4, 1088, 0, 0) /* vsubuhm - v2.03 */ + case AVSUBUWM: + return OPVX(4, 1152, 0, 0) /* vsubuwm - v2.03 */ + case AVSUBUDM: + return OPVX(4, 1216, 0, 0) /* vsubudm - v2.07 */ + case AVSUBUQM: + return OPVX(4, 1280, 0, 0) /* vsubuqm - v2.07 */ + + case AVSUBCUQ: + return OPVX(4, 1344, 0, 0) /* vsubcuq - v2.07 */ + case AVSUBCUW: + return OPVX(4, 1408, 0, 0) /* vsubcuw - v2.03 */ + + case AVSUBUBS: + return OPVX(4, 1536, 0, 0) /* vsububs - v2.03 */ + case AVSUBUHS: + return OPVX(4, 1600, 0, 0) /* vsubuhs - v2.03 */ + case AVSUBUWS: + return OPVX(4, 1664, 0, 0) /* vsubuws - v2.03 */ + + case AVSUBSBS: + return OPVX(4, 1792, 0, 0) /* vsubsbs - v2.03 */ + case AVSUBSHS: + return OPVX(4, 1856, 0, 0) /* vsubshs - v2.03 */ + case AVSUBSWS: + return OPVX(4, 1920, 0, 0) /* vsubsws - v2.03 */ + + case AVSUBEUQM: + return OPVX(4, 62, 0, 0) /* vsubeuqm - v2.07 */ + case AVSUBECUQ: + return OPVX(4, 63, 0, 0) /* vsubecuq - v2.07 */ + + case AVRLB: + return OPVX(4, 4, 0, 0) /* vrlb - v2.03 */ + case AVRLH: + return OPVX(4, 68, 0, 0) /* vrlh - v2.03 */ + case AVRLW: + return OPVX(4, 132, 0, 0) /* vrlw - v2.03 */ + case AVRLD: + return OPVX(4, 196, 0, 0) /* vrld - v2.07 */ + + case AVMRGOW: + return OPVX(4, 1676, 0, 0) /* vmrgow - v2.07 */ + case AVMRGEW: + return OPVX(4, 1932, 0, 0) /* vmrgew - v2.07 */ + + case AVSLB: + return OPVX(4, 260, 0, 0) /* vslh - v2.03 */ + case AVSLH: + return OPVX(4, 324, 0, 0) /* vslh - v2.03 */ + case AVSLW: + return OPVX(4, 388, 0, 0) /* vslw - v2.03 */ + case AVSL: + return OPVX(4, 452, 0, 0) /* vsl - v2.03 */ + case AVSLO: + return OPVX(4, 1036, 0, 0) /* vsl - v2.03 */ + case AVSRB: + return OPVX(4, 516, 0, 0) /* vsrb - v2.03 */ + case AVSRH: + return OPVX(4, 580, 0, 0) /* vsrh - v2.03 */ + case AVSRW: + return OPVX(4, 644, 0, 0) /* vsrw - v2.03 */ + case AVSR: + return OPVX(4, 708, 0, 0) /* vsr - v2.03 */ + case AVSRO: + return OPVX(4, 1100, 0, 0) /* vsro - v2.03 */ + case AVSLD: + return OPVX(4, 1476, 0, 0) /* vsld - v2.07 */ + case AVSRD: + return OPVX(4, 1732, 0, 0) /* vsrd - v2.07 */ + + case AVSRAB: + return OPVX(4, 772, 0, 0) /* vsrab - v2.03 */ + case AVSRAH: + return OPVX(4, 836, 0, 0) /* vsrah - v2.03 */ + case AVSRAW: + return OPVX(4, 900, 0, 0) /* vsraw - v2.03 */ + case AVSRAD: + return OPVX(4, 964, 0, 0) /* vsrad - v2.07 */ + + case AVBPERMQ: + return OPVC(4, 1356, 0, 0) /* vbpermq - v2.07 */ + case AVBPERMD: + return OPVC(4, 1484, 0, 0) /* vbpermd - v3.00 */ + + case AVCLZB: + return OPVX(4, 1794, 0, 0) /* vclzb - v2.07 */ + case AVCLZH: + return OPVX(4, 1858, 0, 0) /* vclzh - v2.07 */ + case AVCLZW: + return OPVX(4, 1922, 0, 0) /* vclzw - v2.07 */ + case AVCLZD: + return OPVX(4, 1986, 0, 0) /* vclzd - v2.07 */ + + case AVPOPCNTB: + return OPVX(4, 1795, 0, 0) /* vpopcntb - v2.07 */ + case AVPOPCNTH: + return OPVX(4, 1859, 0, 0) /* vpopcnth - v2.07 */ + case AVPOPCNTW: + return OPVX(4, 1923, 0, 0) /* vpopcntw - v2.07 */ + case AVPOPCNTD: + return OPVX(4, 1987, 0, 0) /* vpopcntd - v2.07 */ + + case AVCMPEQUB: + return OPVC(4, 6, 0, 0) /* vcmpequb - v2.03 */ + case AVCMPEQUBCC: + return OPVC(4, 6, 0, 1) /* vcmpequb. - v2.03 */ + case AVCMPEQUH: + return OPVC(4, 70, 0, 0) /* vcmpequh - v2.03 */ + case AVCMPEQUHCC: + return OPVC(4, 70, 0, 1) /* vcmpequh. - v2.03 */ + case AVCMPEQUW: + return OPVC(4, 134, 0, 0) /* vcmpequw - v2.03 */ + case AVCMPEQUWCC: + return OPVC(4, 134, 0, 1) /* vcmpequw. - v2.03 */ + case AVCMPEQUD: + return OPVC(4, 199, 0, 0) /* vcmpequd - v2.07 */ + case AVCMPEQUDCC: + return OPVC(4, 199, 0, 1) /* vcmpequd. - v2.07 */ + + case AVCMPGTUB: + return OPVC(4, 518, 0, 0) /* vcmpgtub - v2.03 */ + case AVCMPGTUBCC: + return OPVC(4, 518, 0, 1) /* vcmpgtub. - v2.03 */ + case AVCMPGTUH: + return OPVC(4, 582, 0, 0) /* vcmpgtuh - v2.03 */ + case AVCMPGTUHCC: + return OPVC(4, 582, 0, 1) /* vcmpgtuh. - v2.03 */ + case AVCMPGTUW: + return OPVC(4, 646, 0, 0) /* vcmpgtuw - v2.03 */ + case AVCMPGTUWCC: + return OPVC(4, 646, 0, 1) /* vcmpgtuw. - v2.03 */ + case AVCMPGTUD: + return OPVC(4, 711, 0, 0) /* vcmpgtud - v2.07 */ + case AVCMPGTUDCC: + return OPVC(4, 711, 0, 1) /* vcmpgtud. v2.07 */ + case AVCMPGTSB: + return OPVC(4, 774, 0, 0) /* vcmpgtsb - v2.03 */ + case AVCMPGTSBCC: + return OPVC(4, 774, 0, 1) /* vcmpgtsb. - v2.03 */ + case AVCMPGTSH: + return OPVC(4, 838, 0, 0) /* vcmpgtsh - v2.03 */ + case AVCMPGTSHCC: + return OPVC(4, 838, 0, 1) /* vcmpgtsh. - v2.03 */ + case AVCMPGTSW: + return OPVC(4, 902, 0, 0) /* vcmpgtsw - v2.03 */ + case AVCMPGTSWCC: + return OPVC(4, 902, 0, 1) /* vcmpgtsw. - v2.03 */ + case AVCMPGTSD: + return OPVC(4, 967, 0, 0) /* vcmpgtsd - v2.07 */ + case AVCMPGTSDCC: + return OPVC(4, 967, 0, 1) /* vcmpgtsd. - v2.07 */ + + case AVCMPNEZB: + return OPVC(4, 263, 0, 0) /* vcmpnezb - v3.00 */ + case AVCMPNEZBCC: + return OPVC(4, 263, 0, 1) /* vcmpnezb. - v3.00 */ + case AVCMPNEB: + return OPVC(4, 7, 0, 0) /* vcmpneb - v3.00 */ + case AVCMPNEBCC: + return OPVC(4, 7, 0, 1) /* vcmpneb. - v3.00 */ + case AVCMPNEH: + return OPVC(4, 71, 0, 0) /* vcmpneh - v3.00 */ + case AVCMPNEHCC: + return OPVC(4, 71, 0, 1) /* vcmpneh. - v3.00 */ + case AVCMPNEW: + return OPVC(4, 135, 0, 0) /* vcmpnew - v3.00 */ + case AVCMPNEWCC: + return OPVC(4, 135, 0, 1) /* vcmpnew. - v3.00 */ + + case AVPERM: + return OPVX(4, 43, 0, 0) /* vperm - v2.03 */ + case AVPERMXOR: + return OPVX(4, 45, 0, 0) /* vpermxor - v2.03 */ + case AVPERMR: + return OPVX(4, 59, 0, 0) /* vpermr - v3.0 */ + + case AVSEL: + return OPVX(4, 42, 0, 0) /* vsel - v2.03 */ + + case AVCIPHER: + return OPVX(4, 1288, 0, 0) /* vcipher - v2.07 */ + case AVCIPHERLAST: + return OPVX(4, 1289, 0, 0) /* vcipherlast - v2.07 */ + case AVNCIPHER: + return OPVX(4, 1352, 0, 0) /* vncipher - v2.07 */ + case AVNCIPHERLAST: + return OPVX(4, 1353, 0, 0) /* vncipherlast - v2.07 */ + case AVSBOX: + return OPVX(4, 1480, 0, 0) /* vsbox - v2.07 */ + /* End of vector instructions */ + + /* Vector scalar (VSX) instructions */ + /* ISA 2.06 enables these for POWER7. */ + case AMFVSRD, AMFVRD, AMFFPRD: + return OPVXX1(31, 51, 0) /* mfvsrd - v2.07 */ + case AMFVSRWZ: + return OPVXX1(31, 115, 0) /* mfvsrwz - v2.07 */ + case AMFVSRLD: + return OPVXX1(31, 307, 0) /* mfvsrld - v3.00 */ + + case AMTVSRD, AMTFPRD, AMTVRD: + return OPVXX1(31, 179, 0) /* mtvsrd - v2.07 */ + case AMTVSRWA: + return OPVXX1(31, 211, 0) /* mtvsrwa - v2.07 */ + case AMTVSRWZ: + return OPVXX1(31, 243, 0) /* mtvsrwz - v2.07 */ + case AMTVSRDD: + return OPVXX1(31, 435, 0) /* mtvsrdd - v3.00 */ + case AMTVSRWS: + return OPVXX1(31, 403, 0) /* mtvsrws - v3.00 */ + + case AXXLAND: + return OPVXX3(60, 130, 0) /* xxland - v2.06 */ + case AXXLANDC: + return OPVXX3(60, 138, 0) /* xxlandc - v2.06 */ + case AXXLEQV: + return OPVXX3(60, 186, 0) /* xxleqv - v2.07 */ + case AXXLNAND: + return OPVXX3(60, 178, 0) /* xxlnand - v2.07 */ + + case AXXLORC: + return OPVXX3(60, 170, 0) /* xxlorc - v2.07 */ + case AXXLNOR: + return OPVXX3(60, 162, 0) /* xxlnor - v2.06 */ + case AXXLOR, AXXLORQ: + return OPVXX3(60, 146, 0) /* xxlor - v2.06 */ + case AXXLXOR: + return OPVXX3(60, 154, 0) /* xxlxor - v2.06 */ + + case AXXSEL: + return OPVXX4(60, 3, 0) /* xxsel - v2.06 */ + + case AXXMRGHW: + return OPVXX3(60, 18, 0) /* xxmrghw - v2.06 */ + case AXXMRGLW: + return OPVXX3(60, 50, 0) /* xxmrglw - v2.06 */ + + case AXXSPLTW: + return OPVXX2(60, 164, 0) /* xxspltw - v2.06 */ + + case AXXSPLTIB: + return OPVCC(60, 360, 0, 0) /* xxspltib - v3.0 */ + + case AXXPERM: + return OPVXX3(60, 26, 0) /* xxperm - v2.06 */ + case AXXPERMDI: + return OPVXX3(60, 10, 0) /* xxpermdi - v2.06 */ + + case AXXSLDWI: + return OPVXX3(60, 2, 0) /* xxsldwi - v2.06 */ + + case AXXBRQ: + return OPVXX2VA(60, 475, 31) /* xxbrq - v3.0 */ + case AXXBRD: + return OPVXX2VA(60, 475, 23) /* xxbrd - v3.0 */ + case AXXBRW: + return OPVXX2VA(60, 475, 15) /* xxbrw - v3.0 */ + case AXXBRH: + return OPVXX2VA(60, 475, 7) /* xxbrh - v3.0 */ + + case AXSCVDPSP: + return OPVXX2(60, 265, 0) /* xscvdpsp - v2.06 */ + case AXSCVSPDP: + return OPVXX2(60, 329, 0) /* xscvspdp - v2.06 */ + case AXSCVDPSPN: + return OPVXX2(60, 267, 0) /* xscvdpspn - v2.07 */ + case AXSCVSPDPN: + return OPVXX2(60, 331, 0) /* xscvspdpn - v2.07 */ + + case AXVCVDPSP: + return OPVXX2(60, 393, 0) /* xvcvdpsp - v2.06 */ + case AXVCVSPDP: + return OPVXX2(60, 457, 0) /* xvcvspdp - v2.06 */ + + case AXSCVDPSXDS: + return OPVXX2(60, 344, 0) /* xscvdpsxds - v2.06 */ + case AXSCVDPSXWS: + return OPVXX2(60, 88, 0) /* xscvdpsxws - v2.06 */ + case AXSCVDPUXDS: + return OPVXX2(60, 328, 0) /* xscvdpuxds - v2.06 */ + case AXSCVDPUXWS: + return OPVXX2(60, 72, 0) /* xscvdpuxws - v2.06 */ + + case AXSCVSXDDP: + return OPVXX2(60, 376, 0) /* xscvsxddp - v2.06 */ + case AXSCVUXDDP: + return OPVXX2(60, 360, 0) /* xscvuxddp - v2.06 */ + case AXSCVSXDSP: + return OPVXX2(60, 312, 0) /* xscvsxdsp - v2.06 */ + case AXSCVUXDSP: + return OPVXX2(60, 296, 0) /* xscvuxdsp - v2.06 */ + + case AXVCVDPSXDS: + return OPVXX2(60, 472, 0) /* xvcvdpsxds - v2.06 */ + case AXVCVDPSXWS: + return OPVXX2(60, 216, 0) /* xvcvdpsxws - v2.06 */ + case AXVCVDPUXDS: + return OPVXX2(60, 456, 0) /* xvcvdpuxds - v2.06 */ + case AXVCVDPUXWS: + return OPVXX2(60, 200, 0) /* xvcvdpuxws - v2.06 */ + case AXVCVSPSXDS: + return OPVXX2(60, 408, 0) /* xvcvspsxds - v2.07 */ + case AXVCVSPSXWS: + return OPVXX2(60, 152, 0) /* xvcvspsxws - v2.07 */ + case AXVCVSPUXDS: + return OPVXX2(60, 392, 0) /* xvcvspuxds - v2.07 */ + case AXVCVSPUXWS: + return OPVXX2(60, 136, 0) /* xvcvspuxws - v2.07 */ + + case AXVCVSXDDP: + return OPVXX2(60, 504, 0) /* xvcvsxddp - v2.06 */ + case AXVCVSXWDP: + return OPVXX2(60, 248, 0) /* xvcvsxwdp - v2.06 */ + case AXVCVUXDDP: + return OPVXX2(60, 488, 0) /* xvcvuxddp - v2.06 */ + case AXVCVUXWDP: + return OPVXX2(60, 232, 0) /* xvcvuxwdp - v2.06 */ + case AXVCVSXDSP: + return OPVXX2(60, 440, 0) /* xvcvsxdsp - v2.06 */ + case AXVCVSXWSP: + return OPVXX2(60, 184, 0) /* xvcvsxwsp - v2.06 */ + case AXVCVUXDSP: + return OPVXX2(60, 424, 0) /* xvcvuxdsp - v2.06 */ + case AXVCVUXWSP: + return OPVXX2(60, 168, 0) /* xvcvuxwsp - v2.06 */ + /* End of VSX instructions */ + + case AMADDHD: + return OPVX(4, 48, 0, 0) /* maddhd - v3.00 */ + case AMADDHDU: + return OPVX(4, 49, 0, 0) /* maddhdu - v3.00 */ + case AMADDLD: + return OPVX(4, 51, 0, 0) /* maddld - v3.00 */ + + case AXOR: + return OPVCC(31, 316, 0, 0) + case AXORCC: + return OPVCC(31, 316, 0, 1) + } + + c.ctxt.Diag("bad r/r, r/r/r or r/r/r/r opcode %v", a) + return 0 +} + +func (c *ctxt9) opirrr(a obj.As) uint32 { + switch a { + /* Vector (VMX/Altivec) instructions */ + /* ISA 2.03 enables these for PPC970. For POWERx processors, these */ + /* are enabled starting at POWER6 (ISA 2.05). */ + case AVSLDOI: + return OPVX(4, 44, 0, 0) /* vsldoi - v2.03 */ + } + + c.ctxt.Diag("bad i/r/r/r opcode %v", a) + return 0 +} + +func (c *ctxt9) opiirr(a obj.As) uint32 { + switch a { + /* Vector (VMX/Altivec) instructions */ + /* ISA 2.07 enables these for POWER8 and beyond. */ + case AVSHASIGMAW: + return OPVX(4, 1666, 0, 0) /* vshasigmaw - v2.07 */ + case AVSHASIGMAD: + return OPVX(4, 1730, 0, 0) /* vshasigmad - v2.07 */ + } + + c.ctxt.Diag("bad i/i/r/r opcode %v", a) + return 0 +} + +func (c *ctxt9) opirr(a obj.As) uint32 { + switch a { + case AADD: + return OPVCC(14, 0, 0, 0) + case AADDC: + return OPVCC(12, 0, 0, 0) + case AADDCCC: + return OPVCC(13, 0, 0, 0) + case AADDIS: + return OPVCC(15, 0, 0, 0) /* ADDIS */ + + case AANDCC: + return OPVCC(28, 0, 0, 0) + case AANDISCC: + return OPVCC(29, 0, 0, 0) /* ANDIS. */ + + case ABR: + return OPVCC(18, 0, 0, 0) + case ABL: + return OPVCC(18, 0, 0, 0) | 1 + case obj.ADUFFZERO: + return OPVCC(18, 0, 0, 0) | 1 + case obj.ADUFFCOPY: + return OPVCC(18, 0, 0, 0) | 1 + case ABC: + return OPVCC(16, 0, 0, 0) + case ABCL: + return OPVCC(16, 0, 0, 0) | 1 + + case ABEQ: + return AOP_RRR(16<<26, BO_BCR, BI_EQ, 0) + case ABGE: + return AOP_RRR(16<<26, BO_NOTBCR, BI_LT, 0) + case ABGT: + return AOP_RRR(16<<26, BO_BCR, BI_GT, 0) + case ABLE: + return AOP_RRR(16<<26, BO_NOTBCR, BI_GT, 0) + case ABLT: + return AOP_RRR(16<<26, BO_BCR, BI_LT, 0) + case ABNE: + return AOP_RRR(16<<26, BO_NOTBCR, BI_EQ, 0) + case ABVC: + return AOP_RRR(16<<26, BO_NOTBCR, BI_FU, 0) + case ABVS: + return AOP_RRR(16<<26, BO_BCR, BI_FU, 0) + case ABDZ: + return AOP_RRR(16<<26, BO_NOTBCTR, 0, 0) + case ABDNZ: + return AOP_RRR(16<<26, BO_BCTR, 0, 0) + + case ACMP: + return OPVCC(11, 0, 0, 0) | 1<<21 /* L=1 */ + case ACMPU: + return OPVCC(10, 0, 0, 0) | 1<<21 + case ACMPW: + return OPVCC(11, 0, 0, 0) /* L=0 */ + case ACMPWU: + return OPVCC(10, 0, 0, 0) + case ACMPEQB: + return OPVCC(31, 224, 0, 0) /* cmpeqb - v3.00 */ + + case ALSW: + return OPVCC(31, 597, 0, 0) + + case ACOPY: + return OPVCC(31, 774, 0, 0) /* copy - v3.00 */ + case APASTECC: + return OPVCC(31, 902, 0, 1) /* paste. - v3.00 */ + case ADARN: + return OPVCC(31, 755, 0, 0) /* darn - v3.00 */ + + case AMULLW, AMULLD: + return OPVCC(7, 0, 0, 0) /* mulli works with MULLW or MULLD */ + + case AOR: + return OPVCC(24, 0, 0, 0) + case AORIS: + return OPVCC(25, 0, 0, 0) /* ORIS */ + + case ARLWMI: + return OPVCC(20, 0, 0, 0) /* rlwimi */ + case ARLWMICC: + return OPVCC(20, 0, 0, 1) + case ARLDMI: + return OPMD(30, 3, 0) /* rldimi */ + case ARLDMICC: + return OPMD(30, 3, 1) /* rldimi. */ + case ARLDIMI: + return OPMD(30, 3, 0) /* rldimi */ + case ARLDIMICC: + return OPMD(30, 3, 1) /* rldimi. */ + case ARLWNM: + return OPVCC(21, 0, 0, 0) /* rlwinm */ + case ARLWNMCC: + return OPVCC(21, 0, 0, 1) + + case ARLDCL: + return OPMD(30, 0, 0) /* rldicl */ + case ARLDCLCC: + return OPMD(30, 0, 1) /* rldicl. */ + case ARLDCR: + return OPMD(30, 1, 0) /* rldicr */ + case ARLDCRCC: + return OPMD(30, 1, 1) /* rldicr. */ + case ARLDC: + return OPMD(30, 2, 0) /* rldic */ + case ARLDCCC: + return OPMD(30, 2, 1) /* rldic. */ + + case ASRAW: + return OPVCC(31, 824, 0, 0) + case ASRAWCC: + return OPVCC(31, 824, 0, 1) + case ASRAD: + return OPVCC(31, (413 << 1), 0, 0) + case ASRADCC: + return OPVCC(31, (413 << 1), 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) + + case ASTSW: + return OPVCC(31, 725, 0, 0) + + case ASUBC: + return OPVCC(8, 0, 0, 0) + + case ATW: + return OPVCC(3, 0, 0, 0) + case ATD: + return OPVCC(2, 0, 0, 0) + + /* Vector (VMX/Altivec) instructions */ + /* ISA 2.03 enables these for PPC970. For POWERx processors, these */ + /* are enabled starting at POWER6 (ISA 2.05). */ + case AVSPLTB: + return OPVX(4, 524, 0, 0) /* vspltb - v2.03 */ + case AVSPLTH: + return OPVX(4, 588, 0, 0) /* vsplth - v2.03 */ + case AVSPLTW: + return OPVX(4, 652, 0, 0) /* vspltw - v2.03 */ + + case AVSPLTISB: + return OPVX(4, 780, 0, 0) /* vspltisb - v2.03 */ + case AVSPLTISH: + return OPVX(4, 844, 0, 0) /* vspltish - v2.03 */ + case AVSPLTISW: + return OPVX(4, 908, 0, 0) /* vspltisw - v2.03 */ + /* End of vector instructions */ + + case AFTDIV: + return OPVCC(63, 128, 0, 0) /* ftdiv - v2.06 */ + case AFTSQRT: + return OPVCC(63, 160, 0, 0) /* ftsqrt - v2.06 */ + + case AXOR: + return OPVCC(26, 0, 0, 0) /* XORIL */ + case AXORIS: + return OPVCC(27, 0, 0, 0) /* XORIS */ + } + + c.ctxt.Diag("bad opcode i/r or i/r/r %v", a) + return 0 +} + +/* + * load o(a),d + */ +func (c *ctxt9) opload(a obj.As) uint32 { + switch a { + case AMOVD: + return OPVCC(58, 0, 0, 0) /* ld */ + case AMOVDU: + return OPVCC(58, 0, 0, 1) /* ldu */ + case AMOVWZ: + return OPVCC(32, 0, 0, 0) /* lwz */ + case AMOVWZU: + return OPVCC(33, 0, 0, 0) /* lwzu */ + case AMOVW: + return OPVCC(58, 0, 0, 0) | 1<<1 /* lwa */ + case ALXV: + return OPDQ(61, 1, 0) /* lxv - ISA v3.0 */ + case ALXVL: + return OPVXX1(31, 269, 0) /* lxvl - ISA v3.0 */ + case ALXVLL: + return OPVXX1(31, 301, 0) /* lxvll - ISA v3.0 */ + case ALXVX: + return OPVXX1(31, 268, 0) /* lxvx - ISA v3.0 */ + + /* no AMOVWU */ + case AMOVB, AMOVBZ: + return OPVCC(34, 0, 0, 0) + /* load */ + + case AMOVBU, AMOVBZU: + return OPVCC(35, 0, 0, 0) + case AFMOVD: + return OPVCC(50, 0, 0, 0) + case AFMOVDU: + return OPVCC(51, 0, 0, 0) + case AFMOVS: + return OPVCC(48, 0, 0, 0) + case AFMOVSU: + return OPVCC(49, 0, 0, 0) + case AMOVH: + return OPVCC(42, 0, 0, 0) + case AMOVHU: + return OPVCC(43, 0, 0, 0) + case AMOVHZ: + return OPVCC(40, 0, 0, 0) + case AMOVHZU: + return OPVCC(41, 0, 0, 0) + case AMOVMW: + return OPVCC(46, 0, 0, 0) /* lmw */ + } + + c.ctxt.Diag("bad load opcode %v", a) + return 0 +} + +/* + * indexed load a(b),d + */ +func (c *ctxt9) oploadx(a obj.As) uint32 { + switch a { + case AMOVWZ: + return OPVCC(31, 23, 0, 0) /* lwzx */ + case AMOVWZU: + return OPVCC(31, 55, 0, 0) /* lwzux */ + case AMOVW: + return OPVCC(31, 341, 0, 0) /* lwax */ + case AMOVWU: + return OPVCC(31, 373, 0, 0) /* lwaux */ + + case AMOVB, AMOVBZ: + return OPVCC(31, 87, 0, 0) /* lbzx */ + + case AMOVBU, AMOVBZU: + return OPVCC(31, 119, 0, 0) /* lbzux */ + case AFMOVD: + return OPVCC(31, 599, 0, 0) /* lfdx */ + case AFMOVDU: + return OPVCC(31, 631, 0, 0) /* lfdux */ + case AFMOVS: + return OPVCC(31, 535, 0, 0) /* lfsx */ + case AFMOVSU: + return OPVCC(31, 567, 0, 0) /* lfsux */ + case AFMOVSX: + return OPVCC(31, 855, 0, 0) /* lfiwax - power6, isa 2.05 */ + case AFMOVSZ: + return OPVCC(31, 887, 0, 0) /* lfiwzx - power7, isa 2.06 */ + case AMOVH: + return OPVCC(31, 343, 0, 0) /* lhax */ + case AMOVHU: + return OPVCC(31, 375, 0, 0) /* lhaux */ + case AMOVHBR: + return OPVCC(31, 790, 0, 0) /* lhbrx */ + case AMOVWBR: + return OPVCC(31, 534, 0, 0) /* lwbrx */ + case AMOVDBR: + return OPVCC(31, 532, 0, 0) /* ldbrx */ + case AMOVHZ: + return OPVCC(31, 279, 0, 0) /* lhzx */ + case AMOVHZU: + return OPVCC(31, 311, 0, 0) /* lhzux */ + case ALBAR: + return OPVCC(31, 52, 0, 0) /* lbarx */ + case ALHAR: + return OPVCC(31, 116, 0, 0) /* lharx */ + case ALWAR: + return OPVCC(31, 20, 0, 0) /* lwarx */ + case ALDAR: + return OPVCC(31, 84, 0, 0) /* ldarx */ + case ALSW: + return OPVCC(31, 533, 0, 0) /* lswx */ + case AMOVD: + return OPVCC(31, 21, 0, 0) /* ldx */ + case AMOVDU: + return OPVCC(31, 53, 0, 0) /* ldux */ + + /* Vector (VMX/Altivec) instructions */ + case ALVEBX: + return OPVCC(31, 7, 0, 0) /* lvebx - v2.03 */ + case ALVEHX: + return OPVCC(31, 39, 0, 0) /* lvehx - v2.03 */ + case ALVEWX: + return OPVCC(31, 71, 0, 0) /* lvewx - v2.03 */ + case ALVX: + return OPVCC(31, 103, 0, 0) /* lvx - v2.03 */ + case ALVXL: + return OPVCC(31, 359, 0, 0) /* lvxl - v2.03 */ + case ALVSL: + return OPVCC(31, 6, 0, 0) /* lvsl - v2.03 */ + case ALVSR: + return OPVCC(31, 38, 0, 0) /* lvsr - v2.03 */ + /* End of vector instructions */ + + /* Vector scalar (VSX) instructions */ + case ALXVX: + return OPVXX1(31, 268, 0) /* lxvx - ISA v3.0 */ + case ALXVD2X: + return OPVXX1(31, 844, 0) /* lxvd2x - v2.06 */ + case ALXVW4X: + return OPVXX1(31, 780, 0) /* lxvw4x - v2.06 */ + case ALXVH8X: + return OPVXX1(31, 812, 0) /* lxvh8x - v3.00 */ + case ALXVB16X: + return OPVXX1(31, 876, 0) /* lxvb16x - v3.00 */ + case ALXVDSX: + return OPVXX1(31, 332, 0) /* lxvdsx - v2.06 */ + case ALXSDX: + return OPVXX1(31, 588, 0) /* lxsdx - v2.06 */ + case ALXSIWAX: + return OPVXX1(31, 76, 0) /* lxsiwax - v2.07 */ + case ALXSIWZX: + return OPVXX1(31, 12, 0) /* lxsiwzx - v2.07 */ + } + + c.ctxt.Diag("bad loadx opcode %v", a) + return 0 +} + +/* + * store s,o(d) + */ +func (c *ctxt9) opstore(a obj.As) uint32 { + switch a { + case AMOVB, AMOVBZ: + return OPVCC(38, 0, 0, 0) /* stb */ + + case AMOVBU, AMOVBZU: + return OPVCC(39, 0, 0, 0) /* stbu */ + case AFMOVD: + return OPVCC(54, 0, 0, 0) /* stfd */ + case AFMOVDU: + return OPVCC(55, 0, 0, 0) /* stfdu */ + case AFMOVS: + return OPVCC(52, 0, 0, 0) /* stfs */ + case AFMOVSU: + return OPVCC(53, 0, 0, 0) /* stfsu */ + + case AMOVHZ, AMOVH: + return OPVCC(44, 0, 0, 0) /* sth */ + + case AMOVHZU, AMOVHU: + return OPVCC(45, 0, 0, 0) /* sthu */ + case AMOVMW: + return OPVCC(47, 0, 0, 0) /* stmw */ + case ASTSW: + return OPVCC(31, 725, 0, 0) /* stswi */ + + case AMOVWZ, AMOVW: + return OPVCC(36, 0, 0, 0) /* stw */ + + case AMOVWZU, AMOVWU: + return OPVCC(37, 0, 0, 0) /* stwu */ + case AMOVD: + return OPVCC(62, 0, 0, 0) /* std */ + case AMOVDU: + return OPVCC(62, 0, 0, 1) /* stdu */ + case ASTXV: + return OPDQ(61, 5, 0) /* stxv ISA 3.0 */ + case ASTXVL: + return OPVXX1(31, 397, 0) /* stxvl ISA 3.0 */ + case ASTXVLL: + return OPVXX1(31, 429, 0) /* stxvll ISA 3.0 */ + case ASTXVX: + return OPVXX1(31, 396, 0) /* stxvx - ISA v3.0 */ + + } + + c.ctxt.Diag("unknown store opcode %v", a) + return 0 +} + +/* + * indexed store s,a(b) + */ +func (c *ctxt9) opstorex(a obj.As) uint32 { + switch a { + case AMOVB, AMOVBZ: + return OPVCC(31, 215, 0, 0) /* stbx */ + + case AMOVBU, AMOVBZU: + return OPVCC(31, 247, 0, 0) /* stbux */ + case AFMOVD: + return OPVCC(31, 727, 0, 0) /* stfdx */ + case AFMOVDU: + return OPVCC(31, 759, 0, 0) /* stfdux */ + case AFMOVS: + return OPVCC(31, 663, 0, 0) /* stfsx */ + case AFMOVSU: + return OPVCC(31, 695, 0, 0) /* stfsux */ + case AFMOVSX: + return OPVCC(31, 983, 0, 0) /* stfiwx */ + + case AMOVHZ, AMOVH: + return OPVCC(31, 407, 0, 0) /* sthx */ + case AMOVHBR: + return OPVCC(31, 918, 0, 0) /* sthbrx */ + + case AMOVHZU, AMOVHU: + return OPVCC(31, 439, 0, 0) /* sthux */ + + case AMOVWZ, AMOVW: + return OPVCC(31, 151, 0, 0) /* stwx */ + + case AMOVWZU, AMOVWU: + return OPVCC(31, 183, 0, 0) /* stwux */ + case ASTSW: + return OPVCC(31, 661, 0, 0) /* stswx */ + case AMOVWBR: + return OPVCC(31, 662, 0, 0) /* stwbrx */ + case AMOVDBR: + return OPVCC(31, 660, 0, 0) /* stdbrx */ + case ASTBCCC: + return OPVCC(31, 694, 0, 1) /* stbcx. */ + case ASTHCCC: + return OPVCC(31, 726, 0, 1) /* sthcx. */ + case ASTWCCC: + return OPVCC(31, 150, 0, 1) /* stwcx. */ + case ASTDCCC: + return OPVCC(31, 214, 0, 1) /* stwdx. */ + case AMOVD: + return OPVCC(31, 149, 0, 0) /* stdx */ + case AMOVDU: + return OPVCC(31, 181, 0, 0) /* stdux */ + + /* Vector (VMX/Altivec) instructions */ + case ASTVEBX: + return OPVCC(31, 135, 0, 0) /* stvebx - v2.03 */ + case ASTVEHX: + return OPVCC(31, 167, 0, 0) /* stvehx - v2.03 */ + case ASTVEWX: + return OPVCC(31, 199, 0, 0) /* stvewx - v2.03 */ + case ASTVX: + return OPVCC(31, 231, 0, 0) /* stvx - v2.03 */ + case ASTVXL: + return OPVCC(31, 487, 0, 0) /* stvxl - v2.03 */ + /* End of vector instructions */ + + /* Vector scalar (VSX) instructions */ + case ASTXVX: + return OPVXX1(31, 396, 0) /* stxvx - v3.0 */ + case ASTXVD2X: + return OPVXX1(31, 972, 0) /* stxvd2x - v2.06 */ + case ASTXVW4X: + return OPVXX1(31, 908, 0) /* stxvw4x - v2.06 */ + case ASTXVH8X: + return OPVXX1(31, 940, 0) /* stxvh8x - v3.0 */ + case ASTXVB16X: + return OPVXX1(31, 1004, 0) /* stxvb16x - v3.0 */ + + case ASTXSDX: + return OPVXX1(31, 716, 0) /* stxsdx - v2.06 */ + + case ASTXSIWX: + return OPVXX1(31, 140, 0) /* stxsiwx - v2.07 */ + + /* End of vector scalar instructions */ + + } + + c.ctxt.Diag("unknown storex opcode %v", a) + return 0 +} diff --git a/src/cmd/internal/obj/ppc64/asm9_gtables.go b/src/cmd/internal/obj/ppc64/asm9_gtables.go new file mode 100644 index 0000000..953b148 --- /dev/null +++ b/src/cmd/internal/obj/ppc64/asm9_gtables.go @@ -0,0 +1,1660 @@ +// DO NOT EDIT +// generated by: ppc64map -fmt=encoder pp64.csv + +package ppc64 + +import ( + "cmd/internal/obj" +) + +const ( + AXXSETACCZ = ALASTAOUT + iota + AXXMTACC + AXXMFACC + AXXGENPCVWM + AXXGENPCVHM + AXXGENPCVDM + AXXGENPCVBM + AXVTLSBB + AXVI8GER4SPP + AXVI8GER4PP + AXVI8GER4 + AXVI4GER8PP + AXVI4GER8 + AXVI16GER2SPP + AXVI16GER2S + AXVI16GER2PP + AXVI16GER2 + AXVF64GERPP + AXVF64GERPN + AXVF64GERNP + AXVF64GERNN + AXVF64GER + AXVF32GERPP + AXVF32GERPN + AXVF32GERNP + AXVF32GERNN + AXVF32GER + AXVF16GER2PP + AXVF16GER2PN + AXVF16GER2NP + AXVF16GER2NN + AXVF16GER2 + AXVCVSPBF16 + AXVCVBF16SPN + AXVBF16GER2PP + AXVBF16GER2PN + AXVBF16GER2NP + AXVBF16GER2NN + AXVBF16GER2 + AXSMINCQP + AXSMAXCQP + AXSCVUQQP + AXSCVSQQP + AXSCVQPUQZ + AXSCVQPSQZ + AXSCMPGTQP + AXSCMPGEQP + AXSCMPEQQP + AVSTRIHRCC + AVSTRIHR + AVSTRIHLCC + AVSTRIHL + AVSTRIBRCC + AVSTRIBR + AVSTRIBLCC + AVSTRIBL + AVSRQ + AVSRDBI + AVSRAQ + AVSLQ + AVSLDBI + AVRLQNM + AVRLQMI + AVRLQ + AVPEXTD + AVPDEPD + AVMULOUD + AVMULOSD + AVMULLD + AVMULHUW + AVMULHUD + AVMULHSW + AVMULHSD + AVMULEUD + AVMULESD + AVMSUMCUD + AVMODUW + AVMODUQ + AVMODUD + AVMODSW + AVMODSQ + AVMODSD + AVINSWVRX + AVINSWVLX + AVINSWRX + AVINSWLX + AVINSW + AVINSHVRX + AVINSHVLX + AVINSHRX + AVINSHLX + AVINSDRX + AVINSDLX + AVINSD + AVINSBVRX + AVINSBVLX + AVINSBRX + AVINSBLX + AVGNB + AVEXTSD2Q + AVEXTRACTWM + AVEXTRACTQM + AVEXTRACTHM + AVEXTRACTDM + AVEXTRACTBM + AVEXTDUWVRX + AVEXTDUWVLX + AVEXTDUHVRX + AVEXTDUHVLX + AVEXTDUBVRX + AVEXTDUBVLX + AVEXTDDVRX + AVEXTDDVLX + AVEXPANDWM + AVEXPANDQM + AVEXPANDHM + AVEXPANDDM + AVEXPANDBM + AVDIVUW + AVDIVUQ + AVDIVUD + AVDIVSW + AVDIVSQ + AVDIVSD + AVDIVEUW + AVDIVEUQ + AVDIVEUD + AVDIVESW + AVDIVESQ + AVDIVESD + AVCTZDM + AVCNTMBW + AVCNTMBH + AVCNTMBD + AVCNTMBB + AVCMPUQ + AVCMPSQ + AVCMPGTUQCC + AVCMPGTUQ + AVCMPGTSQCC + AVCMPGTSQ + AVCMPEQUQCC + AVCMPEQUQ + AVCLZDM + AVCLRRB + AVCLRLB + AVCFUGED + ASTXVRWX + ASTXVRHX + ASTXVRDX + ASTXVRBX + ASTXVPX + ASTXVP + ASETNBCR + ASETNBC + ASETBCR + ASETBC + APEXTD + APDEPD + AMTVSRWM + AMTVSRQM + AMTVSRHM + AMTVSRDM + AMTVSRBMI + AMTVSRBM + ALXVRWX + ALXVRHX + ALXVRDX + ALXVRBX + ALXVPX + ALXVP + ALXVKQ + ADCTFIXQQ + ADCFFIXQQ + ACNTTZDM + ACNTLZDM + ACFUGED + ABRW + ABRH + ABRD + AHASHSTP + AHASHST + AHASHCHKP + AHASHCHK + AXXSPLTIW + AXXSPLTIDP + AXXSPLTI32DX + AXXPERMX + AXXEVAL + AXXBLENDVW + AXXBLENDVH + AXXBLENDVD + AXXBLENDVB + APSTXVP + APSTXV + APSTXSSP + APSTXSD + APSTW + APSTQ + APSTH + APSTFS + APSTFD + APSTD + APSTB + APNOP + APMXVI8GER4SPP + APMXVI8GER4PP + APMXVI8GER4 + APMXVI4GER8PP + APMXVI4GER8 + APMXVI16GER2SPP + APMXVI16GER2S + APMXVI16GER2PP + APMXVI16GER2 + APMXVF64GERPP + APMXVF64GERPN + APMXVF64GERNP + APMXVF64GERNN + APMXVF64GER + APMXVF32GERPP + APMXVF32GERPN + APMXVF32GERNP + APMXVF32GERNN + APMXVF32GER + APMXVF16GER2PP + APMXVF16GER2PN + APMXVF16GER2NP + APMXVF16GER2NN + APMXVF16GER2 + APMXVBF16GER2PP + APMXVBF16GER2PN + APMXVBF16GER2NP + APMXVBF16GER2NN + APMXVBF16GER2 + APLXVP + APLXV + APLXSSP + APLXSD + APLWZ + APLWA + APLQ + APLHZ + APLHA + APLFS + APLFD + APLD + APLBZ + APADDI + ALASTGEN + AFIRSTGEN = AXXSETACCZ +) + +var GenAnames = []string{ + "XXSETACCZ", + "XXMTACC", + "XXMFACC", + "XXGENPCVWM", + "XXGENPCVHM", + "XXGENPCVDM", + "XXGENPCVBM", + "XVTLSBB", + "XVI8GER4SPP", + "XVI8GER4PP", + "XVI8GER4", + "XVI4GER8PP", + "XVI4GER8", + "XVI16GER2SPP", + "XVI16GER2S", + "XVI16GER2PP", + "XVI16GER2", + "XVF64GERPP", + "XVF64GERPN", + "XVF64GERNP", + "XVF64GERNN", + "XVF64GER", + "XVF32GERPP", + "XVF32GERPN", + "XVF32GERNP", + "XVF32GERNN", + "XVF32GER", + "XVF16GER2PP", + "XVF16GER2PN", + "XVF16GER2NP", + "XVF16GER2NN", + "XVF16GER2", + "XVCVSPBF16", + "XVCVBF16SPN", + "XVBF16GER2PP", + "XVBF16GER2PN", + "XVBF16GER2NP", + "XVBF16GER2NN", + "XVBF16GER2", + "XSMINCQP", + "XSMAXCQP", + "XSCVUQQP", + "XSCVSQQP", + "XSCVQPUQZ", + "XSCVQPSQZ", + "XSCMPGTQP", + "XSCMPGEQP", + "XSCMPEQQP", + "VSTRIHRCC", + "VSTRIHR", + "VSTRIHLCC", + "VSTRIHL", + "VSTRIBRCC", + "VSTRIBR", + "VSTRIBLCC", + "VSTRIBL", + "VSRQ", + "VSRDBI", + "VSRAQ", + "VSLQ", + "VSLDBI", + "VRLQNM", + "VRLQMI", + "VRLQ", + "VPEXTD", + "VPDEPD", + "VMULOUD", + "VMULOSD", + "VMULLD", + "VMULHUW", + "VMULHUD", + "VMULHSW", + "VMULHSD", + "VMULEUD", + "VMULESD", + "VMSUMCUD", + "VMODUW", + "VMODUQ", + "VMODUD", + "VMODSW", + "VMODSQ", + "VMODSD", + "VINSWVRX", + "VINSWVLX", + "VINSWRX", + "VINSWLX", + "VINSW", + "VINSHVRX", + "VINSHVLX", + "VINSHRX", + "VINSHLX", + "VINSDRX", + "VINSDLX", + "VINSD", + "VINSBVRX", + "VINSBVLX", + "VINSBRX", + "VINSBLX", + "VGNB", + "VEXTSD2Q", + "VEXTRACTWM", + "VEXTRACTQM", + "VEXTRACTHM", + "VEXTRACTDM", + "VEXTRACTBM", + "VEXTDUWVRX", + "VEXTDUWVLX", + "VEXTDUHVRX", + "VEXTDUHVLX", + "VEXTDUBVRX", + "VEXTDUBVLX", + "VEXTDDVRX", + "VEXTDDVLX", + "VEXPANDWM", + "VEXPANDQM", + "VEXPANDHM", + "VEXPANDDM", + "VEXPANDBM", + "VDIVUW", + "VDIVUQ", + "VDIVUD", + "VDIVSW", + "VDIVSQ", + "VDIVSD", + "VDIVEUW", + "VDIVEUQ", + "VDIVEUD", + "VDIVESW", + "VDIVESQ", + "VDIVESD", + "VCTZDM", + "VCNTMBW", + "VCNTMBH", + "VCNTMBD", + "VCNTMBB", + "VCMPUQ", + "VCMPSQ", + "VCMPGTUQCC", + "VCMPGTUQ", + "VCMPGTSQCC", + "VCMPGTSQ", + "VCMPEQUQCC", + "VCMPEQUQ", + "VCLZDM", + "VCLRRB", + "VCLRLB", + "VCFUGED", + "STXVRWX", + "STXVRHX", + "STXVRDX", + "STXVRBX", + "STXVPX", + "STXVP", + "SETNBCR", + "SETNBC", + "SETBCR", + "SETBC", + "PEXTD", + "PDEPD", + "MTVSRWM", + "MTVSRQM", + "MTVSRHM", + "MTVSRDM", + "MTVSRBMI", + "MTVSRBM", + "LXVRWX", + "LXVRHX", + "LXVRDX", + "LXVRBX", + "LXVPX", + "LXVP", + "LXVKQ", + "DCTFIXQQ", + "DCFFIXQQ", + "CNTTZDM", + "CNTLZDM", + "CFUGED", + "BRW", + "BRH", + "BRD", + "HASHSTP", + "HASHST", + "HASHCHKP", + "HASHCHK", + "XXSPLTIW", + "XXSPLTIDP", + "XXSPLTI32DX", + "XXPERMX", + "XXEVAL", + "XXBLENDVW", + "XXBLENDVH", + "XXBLENDVD", + "XXBLENDVB", + "PSTXVP", + "PSTXV", + "PSTXSSP", + "PSTXSD", + "PSTW", + "PSTQ", + "PSTH", + "PSTFS", + "PSTFD", + "PSTD", + "PSTB", + "PNOP", + "PMXVI8GER4SPP", + "PMXVI8GER4PP", + "PMXVI8GER4", + "PMXVI4GER8PP", + "PMXVI4GER8", + "PMXVI16GER2SPP", + "PMXVI16GER2S", + "PMXVI16GER2PP", + "PMXVI16GER2", + "PMXVF64GERPP", + "PMXVF64GERPN", + "PMXVF64GERNP", + "PMXVF64GERNN", + "PMXVF64GER", + "PMXVF32GERPP", + "PMXVF32GERPN", + "PMXVF32GERNP", + "PMXVF32GERNN", + "PMXVF32GER", + "PMXVF16GER2PP", + "PMXVF16GER2PN", + "PMXVF16GER2NP", + "PMXVF16GER2NN", + "PMXVF16GER2", + "PMXVBF16GER2PP", + "PMXVBF16GER2PN", + "PMXVBF16GER2NP", + "PMXVBF16GER2NN", + "PMXVBF16GER2", + "PLXVP", + "PLXV", + "PLXSSP", + "PLXSD", + "PLWZ", + "PLWA", + "PLQ", + "PLHZ", + "PLHA", + "PLFS", + "PLFD", + "PLD", + "PLBZ", + "PADDI", +} + +var GenOpcodes = [...]uint32{ + 0x7c030162, // AXXSETACCZ + 0x7c010162, // AXXMTACC + 0x7c000162, // AXXMFACC + 0xf0000768, // AXXGENPCVWM + 0xf000072a, // AXXGENPCVHM + 0xf000076a, // AXXGENPCVDM + 0xf0000728, // AXXGENPCVBM + 0xf002076c, // AXVTLSBB + 0xec000318, // AXVI8GER4SPP + 0xec000010, // AXVI8GER4PP + 0xec000018, // AXVI8GER4 + 0xec000110, // AXVI4GER8PP + 0xec000118, // AXVI4GER8 + 0xec000150, // AXVI16GER2SPP + 0xec000158, // AXVI16GER2S + 0xec000358, // AXVI16GER2PP + 0xec000258, // AXVI16GER2 + 0xec0001d0, // AXVF64GERPP + 0xec0005d0, // AXVF64GERPN + 0xec0003d0, // AXVF64GERNP + 0xec0007d0, // AXVF64GERNN + 0xec0001d8, // AXVF64GER + 0xec0000d0, // AXVF32GERPP + 0xec0004d0, // AXVF32GERPN + 0xec0002d0, // AXVF32GERNP + 0xec0006d0, // AXVF32GERNN + 0xec0000d8, // AXVF32GER + 0xec000090, // AXVF16GER2PP + 0xec000490, // AXVF16GER2PN + 0xec000290, // AXVF16GER2NP + 0xec000690, // AXVF16GER2NN + 0xec000098, // AXVF16GER2 + 0xf011076c, // AXVCVSPBF16 + 0xf010076c, // AXVCVBF16SPN + 0xec000190, // AXVBF16GER2PP + 0xec000590, // AXVBF16GER2PN + 0xec000390, // AXVBF16GER2NP + 0xec000790, // AXVBF16GER2NN + 0xec000198, // AXVBF16GER2 + 0xfc0005c8, // AXSMINCQP + 0xfc000548, // AXSMAXCQP + 0xfc030688, // AXSCVUQQP + 0xfc0b0688, // AXSCVSQQP + 0xfc000688, // AXSCVQPUQZ + 0xfc080688, // AXSCVQPSQZ + 0xfc0001c8, // AXSCMPGTQP + 0xfc000188, // AXSCMPGEQP + 0xfc000088, // AXSCMPEQQP + 0x1003040d, // AVSTRIHRCC + 0x1003000d, // AVSTRIHR + 0x1002040d, // AVSTRIHLCC + 0x1002000d, // AVSTRIHL + 0x1001040d, // AVSTRIBRCC + 0x1001000d, // AVSTRIBR + 0x1000040d, // AVSTRIBLCC + 0x1000000d, // AVSTRIBL + 0x10000205, // AVSRQ + 0x10000216, // AVSRDBI + 0x10000305, // AVSRAQ + 0x10000105, // AVSLQ + 0x10000016, // AVSLDBI + 0x10000145, // AVRLQNM + 0x10000045, // AVRLQMI + 0x10000005, // AVRLQ + 0x1000058d, // AVPEXTD + 0x100005cd, // AVPDEPD + 0x100000c8, // AVMULOUD + 0x100001c8, // AVMULOSD + 0x100001c9, // AVMULLD + 0x10000289, // AVMULHUW + 0x100002c9, // AVMULHUD + 0x10000389, // AVMULHSW + 0x100003c9, // AVMULHSD + 0x100002c8, // AVMULEUD + 0x100003c8, // AVMULESD + 0x10000017, // AVMSUMCUD + 0x1000068b, // AVMODUW + 0x1000060b, // AVMODUQ + 0x100006cb, // AVMODUD + 0x1000078b, // AVMODSW + 0x1000070b, // AVMODSQ + 0x100007cb, // AVMODSD + 0x1000018f, // AVINSWVRX + 0x1000008f, // AVINSWVLX + 0x1000038f, // AVINSWRX + 0x1000028f, // AVINSWLX + 0x100000cf, // AVINSW + 0x1000014f, // AVINSHVRX + 0x1000004f, // AVINSHVLX + 0x1000034f, // AVINSHRX + 0x1000024f, // AVINSHLX + 0x100003cf, // AVINSDRX + 0x100002cf, // AVINSDLX + 0x100001cf, // AVINSD + 0x1000010f, // AVINSBVRX + 0x1000000f, // AVINSBVLX + 0x1000030f, // AVINSBRX + 0x1000020f, // AVINSBLX + 0x100004cc, // AVGNB + 0x101b0602, // AVEXTSD2Q + 0x100a0642, // AVEXTRACTWM + 0x100c0642, // AVEXTRACTQM + 0x10090642, // AVEXTRACTHM + 0x100b0642, // AVEXTRACTDM + 0x10080642, // AVEXTRACTBM + 0x1000001d, // AVEXTDUWVRX + 0x1000001c, // AVEXTDUWVLX + 0x1000001b, // AVEXTDUHVRX + 0x1000001a, // AVEXTDUHVLX + 0x10000019, // AVEXTDUBVRX + 0x10000018, // AVEXTDUBVLX + 0x1000001f, // AVEXTDDVRX + 0x1000001e, // AVEXTDDVLX + 0x10020642, // AVEXPANDWM + 0x10040642, // AVEXPANDQM + 0x10010642, // AVEXPANDHM + 0x10030642, // AVEXPANDDM + 0x10000642, // AVEXPANDBM + 0x1000008b, // AVDIVUW + 0x1000000b, // AVDIVUQ + 0x100000cb, // AVDIVUD + 0x1000018b, // AVDIVSW + 0x1000010b, // AVDIVSQ + 0x100001cb, // AVDIVSD + 0x1000028b, // AVDIVEUW + 0x1000020b, // AVDIVEUQ + 0x100002cb, // AVDIVEUD + 0x1000038b, // AVDIVESW + 0x1000030b, // AVDIVESQ + 0x100003cb, // AVDIVESD + 0x100007c4, // AVCTZDM + 0x101c0642, // AVCNTMBW + 0x101a0642, // AVCNTMBH + 0x101e0642, // AVCNTMBD + 0x10180642, // AVCNTMBB + 0x10000101, // AVCMPUQ + 0x10000141, // AVCMPSQ + 0x10000687, // AVCMPGTUQCC + 0x10000287, // AVCMPGTUQ + 0x10000787, // AVCMPGTSQCC + 0x10000387, // AVCMPGTSQ + 0x100005c7, // AVCMPEQUQCC + 0x100001c7, // AVCMPEQUQ + 0x10000784, // AVCLZDM + 0x100001cd, // AVCLRRB + 0x1000018d, // AVCLRLB + 0x1000054d, // AVCFUGED + 0x7c00019a, // ASTXVRWX + 0x7c00015a, // ASTXVRHX + 0x7c0001da, // ASTXVRDX + 0x7c00011a, // ASTXVRBX + 0x7c00039a, // ASTXVPX + 0x18000001, // ASTXVP + 0x7c0003c0, // ASETNBCR + 0x7c000380, // ASETNBC + 0x7c000340, // ASETBCR + 0x7c000300, // ASETBC + 0x7c000178, // APEXTD + 0x7c000138, // APDEPD + 0x10120642, // AMTVSRWM + 0x10140642, // AMTVSRQM + 0x10110642, // AMTVSRHM + 0x10130642, // AMTVSRDM + 0x10000014, // AMTVSRBMI + 0x10100642, // AMTVSRBM + 0x7c00009a, // ALXVRWX + 0x7c00005a, // ALXVRHX + 0x7c0000da, // ALXVRDX + 0x7c00001a, // ALXVRBX + 0x7c00029a, // ALXVPX + 0x18000000, // ALXVP + 0xf01f02d0, // ALXVKQ + 0xfc0107c4, // ADCTFIXQQ + 0xfc0007c4, // ADCFFIXQQ + 0x7c000476, // ACNTTZDM + 0x7c000076, // ACNTLZDM + 0x7c0001b8, // ACFUGED + 0x7c000136, // ABRW + 0x7c0001b6, // ABRH + 0x7c000176, // ABRD + 0x7c000524, // AHASHSTP + 0x7c0005a4, // AHASHST + 0x7c000564, // AHASHCHKP + 0x7c0005e4, // AHASHCHK + 0x80060000, // AXXSPLTIW + 0x80040000, // AXXSPLTIDP + 0x80000000, // AXXSPLTI32DX + 0x88000000, // AXXPERMX + 0x88000010, // AXXEVAL + 0x84000020, // AXXBLENDVW + 0x84000010, // AXXBLENDVH + 0x84000030, // AXXBLENDVD + 0x84000000, // AXXBLENDVB + 0xf8000000, // APSTXVP + 0xd8000000, // APSTXV + 0xbc000000, // APSTXSSP + 0xb8000000, // APSTXSD + 0x90000000, // APSTW + 0xf0000000, // APSTQ + 0xb0000000, // APSTH + 0xd0000000, // APSTFS + 0xd8000000, // APSTFD + 0xf4000000, // APSTD + 0x98000000, // APSTB + 0x00000000, // APNOP + 0xec000318, // APMXVI8GER4SPP + 0xec000010, // APMXVI8GER4PP + 0xec000018, // APMXVI8GER4 + 0xec000110, // APMXVI4GER8PP + 0xec000118, // APMXVI4GER8 + 0xec000150, // APMXVI16GER2SPP + 0xec000158, // APMXVI16GER2S + 0xec000358, // APMXVI16GER2PP + 0xec000258, // APMXVI16GER2 + 0xec0001d0, // APMXVF64GERPP + 0xec0005d0, // APMXVF64GERPN + 0xec0003d0, // APMXVF64GERNP + 0xec0007d0, // APMXVF64GERNN + 0xec0001d8, // APMXVF64GER + 0xec0000d0, // APMXVF32GERPP + 0xec0004d0, // APMXVF32GERPN + 0xec0002d0, // APMXVF32GERNP + 0xec0006d0, // APMXVF32GERNN + 0xec0000d8, // APMXVF32GER + 0xec000090, // APMXVF16GER2PP + 0xec000490, // APMXVF16GER2PN + 0xec000290, // APMXVF16GER2NP + 0xec000690, // APMXVF16GER2NN + 0xec000098, // APMXVF16GER2 + 0xec000190, // APMXVBF16GER2PP + 0xec000590, // APMXVBF16GER2PN + 0xec000390, // APMXVBF16GER2NP + 0xec000790, // APMXVBF16GER2NN + 0xec000198, // APMXVBF16GER2 + 0xe8000000, // APLXVP + 0xc8000000, // APLXV + 0xac000000, // APLXSSP + 0xa8000000, // APLXSD + 0x80000000, // APLWZ + 0xa4000000, // APLWA + 0xe0000000, // APLQ + 0xa0000000, // APLHZ + 0xa8000000, // APLHA + 0xc0000000, // APLFS + 0xc8000000, // APLFD + 0xe4000000, // APLD + 0x88000000, // APLBZ + 0x38000000, // APADDI +} + +var GenPfxOpcodes = [...]uint32{ + 0x05000000, // AXXSPLTIW + 0x05000000, // AXXSPLTIDP + 0x05000000, // AXXSPLTI32DX + 0x05000000, // AXXPERMX + 0x05000000, // AXXEVAL + 0x05000000, // AXXBLENDVW + 0x05000000, // AXXBLENDVH + 0x05000000, // AXXBLENDVD + 0x05000000, // AXXBLENDVB + 0x04000000, // APSTXVP + 0x04000000, // APSTXV + 0x04000000, // APSTXSSP + 0x04000000, // APSTXSD + 0x06000000, // APSTW + 0x04000000, // APSTQ + 0x06000000, // APSTH + 0x06000000, // APSTFS + 0x06000000, // APSTFD + 0x04000000, // APSTD + 0x06000000, // APSTB + 0x07000000, // APNOP + 0x07900000, // APMXVI8GER4SPP + 0x07900000, // APMXVI8GER4PP + 0x07900000, // APMXVI8GER4 + 0x07900000, // APMXVI4GER8PP + 0x07900000, // APMXVI4GER8 + 0x07900000, // APMXVI16GER2SPP + 0x07900000, // APMXVI16GER2S + 0x07900000, // APMXVI16GER2PP + 0x07900000, // APMXVI16GER2 + 0x07900000, // APMXVF64GERPP + 0x07900000, // APMXVF64GERPN + 0x07900000, // APMXVF64GERNP + 0x07900000, // APMXVF64GERNN + 0x07900000, // APMXVF64GER + 0x07900000, // APMXVF32GERPP + 0x07900000, // APMXVF32GERPN + 0x07900000, // APMXVF32GERNP + 0x07900000, // APMXVF32GERNN + 0x07900000, // APMXVF32GER + 0x07900000, // APMXVF16GER2PP + 0x07900000, // APMXVF16GER2PN + 0x07900000, // APMXVF16GER2NP + 0x07900000, // APMXVF16GER2NN + 0x07900000, // APMXVF16GER2 + 0x07900000, // APMXVBF16GER2PP + 0x07900000, // APMXVBF16GER2PN + 0x07900000, // APMXVBF16GER2NP + 0x07900000, // APMXVBF16GER2NN + 0x07900000, // APMXVBF16GER2 + 0x04000000, // APLXVP + 0x04000000, // APLXV + 0x04000000, // APLXSSP + 0x04000000, // APLXSD + 0x06000000, // APLWZ + 0x04000000, // APLWA + 0x04000000, // APLQ + 0x06000000, // APLHZ + 0x06000000, // APLHA + 0x06000000, // APLFS + 0x06000000, // APLFD + 0x04000000, // APLD + 0x06000000, // APLBZ + 0x06000000, // APADDI +} + +var optabGen = []Optab{ + {as: ABRW, a1: C_REG, a6: C_REG, asmout: type_brw, size: 4}, + {as: ADCFFIXQQ, a1: C_VREG, a6: C_FREGP, asmout: type_xscvuqqp, size: 4}, + {as: ADCTFIXQQ, a1: C_FREGP, a6: C_VREG, asmout: type_xscvuqqp, size: 4}, + {as: AHASHCHKP, a1: C_SOREG, a6: C_REG, asmout: type_hashchkp, size: 4}, + {as: AHASHSTP, a1: C_REG, a6: C_SOREG, asmout: type_hashstp, size: 4}, + {as: ALXVKQ, a1: C_U5CON, a6: C_VSREG, asmout: type_lxvkq, size: 4}, + {as: ALXVP, a1: C_SOREG, a6: C_VSREGP, asmout: type_lxvp, size: 4}, + {as: ALXVPX, a1: C_XOREG, a6: C_VSREGP, asmout: type_lxvpx, size: 4}, + {as: ALXVRWX, a1: C_XOREG, a6: C_VSREG, asmout: type_lxvrwx, size: 4}, + {as: AMTVSRBMI, a1: C_U16CON, a6: C_VREG, asmout: type_mtvsrbmi, size: 4}, + {as: AMTVSRWM, a1: C_REG, a6: C_VREG, asmout: type_xscvuqqp, size: 4}, + {as: APADDI, a1: C_REG, a3: C_S34CON, a4: C_U1CON, a6: C_REG, asmout: type_paddi, ispfx: true, size: 8}, + {as: APEXTD, a1: C_REG, a2: C_REG, a6: C_REG, asmout: type_pextd, size: 4}, + {as: APLFS, a1: C_LOREG, a3: C_U1CON, a6: C_FREG, asmout: type_plxssp, ispfx: true, size: 8}, + {as: APLQ, a1: C_LOREG, a3: C_U1CON, a6: C_REGP, asmout: type_plxssp, ispfx: true, size: 8}, + {as: APLWZ, a1: C_LOREG, a3: C_U1CON, a6: C_REG, asmout: type_plxssp, ispfx: true, size: 8}, + {as: APLXSSP, a1: C_LOREG, a3: C_U1CON, a6: C_VREG, asmout: type_plxssp, ispfx: true, size: 8}, + {as: APLXV, a1: C_LOREG, a3: C_U1CON, a6: C_VSREG, asmout: type_plxv, ispfx: true, size: 8}, + {as: APLXVP, a1: C_LOREG, a3: C_U1CON, a6: C_VSREGP, asmout: type_plxvp, ispfx: true, size: 8}, + {as: APMXVF32GERPP, a1: C_VSREG, a2: C_VSREG, a3: C_U4CON, a4: C_U4CON, a6: C_AREG, asmout: type_pmxvf32gerpp, ispfx: true, size: 8}, + {as: APMXVF64GERPP, a1: C_VSREGP, a2: C_VSREG, a3: C_U4CON, a4: C_U2CON, a6: C_AREG, asmout: type_pmxvf64gerpp, ispfx: true, size: 8}, + {as: APMXVI16GER2SPP, a1: C_VSREG, a2: C_VSREG, a3: C_U4CON, a4: C_U4CON, a5: C_U2CON, a6: C_AREG, asmout: type_pmxvi16ger2spp, ispfx: true, size: 8}, + {as: APMXVI4GER8PP, a1: C_VSREG, a2: C_VSREG, a3: C_U4CON, a4: C_U4CON, a5: C_U8CON, a6: C_AREG, asmout: type_pmxvi4ger8pp, ispfx: true, size: 8}, + {as: APMXVI8GER4SPP, a1: C_VSREG, a2: C_VSREG, a3: C_U4CON, a4: C_U4CON, a5: C_U4CON, a6: C_AREG, asmout: type_pmxvi8ger4spp, ispfx: true, size: 8}, + {as: APNOP, asmout: type_pnop, ispfx: true, size: 8}, + {as: APSTFS, a1: C_FREG, a3: C_U1CON, a6: C_LOREG, asmout: type_pstxssp, ispfx: true, size: 8}, + {as: APSTQ, a1: C_REGP, a3: C_U1CON, a6: C_LOREG, asmout: type_pstxssp, ispfx: true, size: 8}, + {as: APSTW, a1: C_REG, a3: C_U1CON, a6: C_LOREG, asmout: type_pstxssp, ispfx: true, size: 8}, + {as: APSTXSSP, a1: C_VREG, a3: C_U1CON, a6: C_LOREG, asmout: type_pstxssp, ispfx: true, size: 8}, + {as: APSTXV, a1: C_VSREG, a3: C_U1CON, a6: C_LOREG, asmout: type_pstxv, ispfx: true, size: 8}, + {as: APSTXVP, a1: C_VSREGP, a3: C_U1CON, a6: C_LOREG, asmout: type_pstxvp, ispfx: true, size: 8}, + {as: ASETNBCR, a1: C_CRBIT, a6: C_REG, asmout: type_setnbcr, size: 4}, + {as: ASTXVP, a1: C_VSREGP, a6: C_SOREG, asmout: type_stxvp, size: 4}, + {as: ASTXVPX, a1: C_VSREGP, a6: C_XOREG, asmout: type_stxvpx, size: 4}, + {as: ASTXVRWX, a1: C_VSREG, a6: C_XOREG, asmout: type_stxvrwx, size: 4}, + {as: AVCLRRB, a1: C_VREG, a2: C_REG, a6: C_VREG, asmout: type_xsmincqp, size: 4}, + {as: AVCMPUQ, a1: C_VREG, a2: C_VREG, a6: C_CREG, asmout: type_vcmpuq, size: 4}, + {as: AVCNTMBW, a1: C_VREG, a3: C_U1CON, a6: C_REG, asmout: type_vcntmbw, size: 4}, + {as: AVEXTDUWVRX, a1: C_VREG, a2: C_VREG, a3: C_REG, a6: C_VREG, asmout: type_vmsumcud, size: 4}, + {as: AVEXTRACTWM, a1: C_VREG, a6: C_REG, asmout: type_xscvuqqp, size: 4}, + {as: AVGNB, a1: C_VREG, a3: C_U3CON, a6: C_REG, asmout: type_vgnb, size: 4}, + {as: AVINSW, a1: C_REG, a3: C_U4CON, a6: C_VREG, asmout: type_vinsw, size: 4}, + {as: AVINSWRX, a1: C_REG, a2: C_REG, a6: C_VREG, asmout: type_xsmincqp, size: 4}, + {as: AVINSWVRX, a1: C_REG, a2: C_VREG, a6: C_VREG, asmout: type_xsmincqp, size: 4}, + {as: AVMSUMCUD, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG, asmout: type_vmsumcud, size: 4}, + {as: AVSRDBI, a1: C_VREG, a2: C_VREG, a3: C_U3CON, a6: C_VREG, asmout: type_vsrdbi, size: 4}, + {as: AXSCVUQQP, a1: C_VREG, a6: C_VREG, asmout: type_xscvuqqp, size: 4}, + {as: AXSMINCQP, a1: C_VREG, a2: C_VREG, a6: C_VREG, asmout: type_xsmincqp, size: 4}, + {as: AXVCVSPBF16, a1: C_VSREG, a6: C_VSREG, asmout: type_xvcvspbf16, size: 4}, + {as: AXVI8GER4SPP, a1: C_VSREG, a2: C_VSREG, a6: C_AREG, asmout: type_xvi8ger4spp, size: 4}, + {as: AXVTLSBB, a1: C_VSREG, a6: C_CREG, asmout: type_xvtlsbb, size: 4}, + {as: AXXBLENDVW, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a6: C_VSREG, asmout: type_xxblendvw, ispfx: true, size: 8}, + {as: AXXEVAL, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a4: C_U8CON, a6: C_VSREG, asmout: type_xxeval, ispfx: true, size: 8}, + {as: AXXGENPCVWM, a1: C_VREG, a3: C_U5CON, a6: C_VSREG, asmout: type_xxgenpcvwm, size: 4}, + {as: AXXPERMX, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a4: C_U3CON, a6: C_VSREG, asmout: type_xxpermx, ispfx: true, size: 8}, + {as: AXXSETACCZ, a6: C_AREG, asmout: type_xxsetaccz, size: 4}, + {as: AXXSPLTI32DX, a1: C_U1CON, a3: C_U32CON, a6: C_VSREG, asmout: type_xxsplti32dx, ispfx: true, size: 8}, + {as: AXXSPLTIW, a1: C_U32CON, a6: C_VSREG, asmout: type_xxspltiw, ispfx: true, size: 8}, +} + +// brw RA,RS +func type_brw(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 16 // RA + o0 |= uint32(p.From.Reg&0x1f) << 21 // RS + out[0] = o0 +} + +// hashchkp RB,offset(RA) +func type_hashchkp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 11 // RB + o0 |= uint32((p.From.Offset>>8)&0x1) << 0 // DX + o0 |= uint32((p.From.Offset>>3)&0x1f) << 21 // D + o0 |= uint32(p.From.Reg&0x1f) << 16 // RA + if p.From.Offset&0xfffffe07 != 0xfffffe00 { + c.ctxt.Diag("Constant(%d) must within the range of [-512,-8] in steps of 8\n%v", p.From.Offset, p) + } + out[0] = o0 +} + +// hashstp RB,offset(RA) +func type_hashstp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.From.Reg&0x1f) << 11 // RB + o0 |= uint32((p.To.Offset>>8)&0x1) << 0 // DX + o0 |= uint32((p.To.Offset>>3)&0x1f) << 21 // D + o0 |= uint32(p.To.Reg&0x1f) << 16 // RA + if p.To.Offset&0xfffffe07 != 0xfffffe00 { + c.ctxt.Diag("Constant(%d) must within the range of [-512,-8] in steps of 8\n%v", p.To.Offset, p) + } + out[0] = o0 +} + +// lxvkq XT,UIM +func type_lxvkq(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.To.Reg>>5)&0x1) << 0 // TX + o0 |= uint32(p.To.Reg&0x1f) << 21 // T + o0 |= uint32(p.From.Offset&0x1f) << 11 // UIM + out[0] = o0 +} + +// lxvp XTp,DQ(RA) +func type_lxvp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.To.Reg>>5)&0x1) << 21 // TX + o0 |= uint32((p.To.Reg>>1)&0xf) << 22 // Tp + o0 |= uint32((p.From.Offset>>4)&0xfff) << 4 // DQ + o0 |= uint32(p.From.Reg&0x1f) << 16 // RA + if p.From.Offset&0xf != 0 { + c.ctxt.Diag("Constant 0x%x (%d) is not a multiple of 16\n%v", p.From.Offset, p.From.Offset, p) + } + out[0] = o0 +} + +// lxvpx XTp,RA,RB +func type_lxvpx(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.To.Reg>>5)&0x1) << 21 // TX + o0 |= uint32((p.To.Reg>>1)&0xf) << 22 // Tp + o0 |= uint32(p.From.Index&0x1f) << 16 // RA + o0 |= uint32(p.From.Reg&0x1f) << 11 // RB + out[0] = o0 +} + +// lxvrwx XT,RA,RB +func type_lxvrwx(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.To.Reg>>5)&0x1) << 0 // TX + o0 |= uint32(p.To.Reg&0x1f) << 21 // T + o0 |= uint32(p.From.Index&0x1f) << 16 // RA + o0 |= uint32(p.From.Reg&0x1f) << 11 // RB + out[0] = o0 +} + +// mtvsrbmi VRT,bm +func type_mtvsrbmi(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // VRT + o0 |= uint32((p.From.Offset>>6)&0x3ff) << 6 // b0 + o0 |= uint32((p.From.Offset>>1)&0x1f) << 16 // b1 + o0 |= uint32(p.From.Offset&0x1) << 0 // b2 + out[0] = o0 +} + +// paddi RT,RA,SI,R +func type_paddi(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32(p.To.Reg&0x1f) << 21 // RT + o1 |= uint32(p.From.Reg&0x1f) << 16 // RA + o0 |= uint32((p.RestArgs[0].Addr.Offset>>16)&0x3ffff) << 0 // si0 + o1 |= uint32(p.RestArgs[0].Addr.Offset&0xffff) << 0 // si1 + o0 |= uint32(p.RestArgs[1].Addr.Offset&0x1) << 20 // R + out[1] = o1 + out[0] = o0 +} + +// pextd RA,RS,RB +func type_pextd(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 16 // RA + o0 |= uint32(p.From.Reg&0x1f) << 21 // RS + o0 |= uint32(p.Reg&0x1f) << 11 // RB + out[0] = o0 +} + +// plxssp VRT,D(RA),R +func type_plxssp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32(p.To.Reg&0x1f) << 21 // VRT + o0 |= uint32((p.From.Offset>>16)&0x3ffff) << 0 // d0 + o1 |= uint32(p.From.Offset&0xffff) << 0 // d1 + o1 |= uint32(p.From.Reg&0x1f) << 16 // RA + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x1) << 20 // R + out[1] = o1 + out[0] = o0 +} + +// plxv XT,D(RA),R +func type_plxv(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.To.Reg>>5)&0x1) << 26 // TX + o1 |= uint32(p.To.Reg&0x1f) << 21 // T + o0 |= uint32((p.From.Offset>>16)&0x3ffff) << 0 // d0 + o1 |= uint32(p.From.Offset&0xffff) << 0 // d1 + o1 |= uint32(p.From.Reg&0x1f) << 16 // RA + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x1) << 20 // R + out[1] = o1 + out[0] = o0 +} + +// plxvp XTp,D(RA),R +func type_plxvp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.To.Reg>>5)&0x1) << 21 // TX + o1 |= uint32((p.To.Reg>>1)&0xf) << 22 // Tp + o0 |= uint32((p.From.Offset>>16)&0x3ffff) << 0 // d0 + o1 |= uint32(p.From.Offset&0xffff) << 0 // d1 + o1 |= uint32(p.From.Reg&0x1f) << 16 // RA + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x1) << 20 // R + out[1] = o1 + out[0] = o0 +} + +// pmxvf32gerpp AT,XA,XB,XMSK,YMSK +func type_pmxvf32gerpp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32(p.To.Reg&0x7) << 23 // AT + o1 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o1 |= uint32(p.From.Reg&0x1f) << 16 // A + o1 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o1 |= uint32(p.Reg&0x1f) << 11 // B + o0 |= uint32(p.RestArgs[0].Addr.Offset&0xf) << 4 // XMSK + o0 |= uint32(p.RestArgs[1].Addr.Offset&0xf) << 0 // YMSK + out[1] = o1 + out[0] = o0 +} + +// pmxvf64gerpp AT,XAp,XB,XMSK,YMSK +func type_pmxvf64gerpp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32(p.To.Reg&0x7) << 23 // AT + o1 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o1 |= uint32(p.From.Reg&0x1f) << 16 // Ap + o1 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o1 |= uint32(p.Reg&0x1f) << 11 // B + o0 |= uint32(p.RestArgs[0].Addr.Offset&0xf) << 4 // XMSK + o0 |= uint32(p.RestArgs[1].Addr.Offset&0x3) << 2 // YMSK + out[1] = o1 + out[0] = o0 +} + +// pmxvi16ger2spp AT,XA,XB,XMSK,YMSK,PMSK +func type_pmxvi16ger2spp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32(p.To.Reg&0x7) << 23 // AT + o1 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o1 |= uint32(p.From.Reg&0x1f) << 16 // A + o1 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o1 |= uint32(p.Reg&0x1f) << 11 // B + o0 |= uint32(p.RestArgs[0].Addr.Offset&0xf) << 4 // XMSK + o0 |= uint32(p.RestArgs[1].Addr.Offset&0xf) << 0 // YMSK + o0 |= uint32(p.RestArgs[2].Addr.Offset&0x3) << 14 // PMSK + out[1] = o1 + out[0] = o0 +} + +// pmxvi4ger8pp AT,XA,XB,XMSK,YMSK,PMSK +func type_pmxvi4ger8pp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32(p.To.Reg&0x7) << 23 // AT + o1 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o1 |= uint32(p.From.Reg&0x1f) << 16 // A + o1 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o1 |= uint32(p.Reg&0x1f) << 11 // B + o0 |= uint32(p.RestArgs[0].Addr.Offset&0xf) << 4 // XMSK + o0 |= uint32(p.RestArgs[1].Addr.Offset&0xf) << 0 // YMSK + o0 |= uint32(p.RestArgs[2].Addr.Offset&0xff) << 8 // PMSK + out[1] = o1 + out[0] = o0 +} + +// pmxvi8ger4spp AT,XA,XB,XMSK,YMSK,PMSK +func type_pmxvi8ger4spp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32(p.To.Reg&0x7) << 23 // AT + o1 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o1 |= uint32(p.From.Reg&0x1f) << 16 // A + o1 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o1 |= uint32(p.Reg&0x1f) << 11 // B + o0 |= uint32(p.RestArgs[0].Addr.Offset&0xf) << 4 // XMSK + o0 |= uint32(p.RestArgs[1].Addr.Offset&0xf) << 0 // YMSK + o0 |= uint32(p.RestArgs[2].Addr.Offset&0xf) << 12 // PMSK + out[1] = o1 + out[0] = o0 +} + +// pnop +func type_pnop(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + out[1] = o1 + out[0] = o0 +} + +// pstxssp VRS,D(RA),R +func type_pstxssp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32(p.From.Reg&0x1f) << 21 // VRS + o0 |= uint32((p.To.Offset>>16)&0x3ffff) << 0 // d0 + o1 |= uint32(p.To.Offset&0xffff) << 0 // d1 + o1 |= uint32(p.To.Reg&0x1f) << 16 // RA + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x1) << 20 // R + out[1] = o1 + out[0] = o0 +} + +// pstxv XS,D(RA),R +func type_pstxv(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.From.Reg>>5)&0x1) << 26 // SX + o1 |= uint32(p.From.Reg&0x1f) << 21 // S + o0 |= uint32((p.To.Offset>>16)&0x3ffff) << 0 // d0 + o1 |= uint32(p.To.Offset&0xffff) << 0 // d1 + o1 |= uint32(p.To.Reg&0x1f) << 16 // RA + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x1) << 20 // R + out[1] = o1 + out[0] = o0 +} + +// pstxvp XSp,D(RA),R +func type_pstxvp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.From.Reg>>5)&0x1) << 21 // SX + o1 |= uint32((p.From.Reg>>1)&0xf) << 22 // Sp + o0 |= uint32((p.To.Offset>>16)&0x3ffff) << 0 // d0 + o1 |= uint32(p.To.Offset&0xffff) << 0 // d1 + o1 |= uint32(p.To.Reg&0x1f) << 16 // RA + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x1) << 20 // R + out[1] = o1 + out[0] = o0 +} + +// setnbcr RT,BI +func type_setnbcr(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // RT + o0 |= uint32(p.From.Reg&0x1f) << 16 // BI + out[0] = o0 +} + +// stxvp XSp,DQ(RA) +func type_stxvp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.From.Reg>>5)&0x1) << 21 // SX + o0 |= uint32((p.From.Reg>>1)&0xf) << 22 // Sp + o0 |= uint32((p.To.Offset>>4)&0xfff) << 4 // DQ + o0 |= uint32(p.To.Reg&0x1f) << 16 // RA + if p.To.Offset&0xf != 0 { + c.ctxt.Diag("Constant 0x%x (%d) is not a multiple of 16\n%v", p.To.Offset, p.To.Offset, p) + } + out[0] = o0 +} + +// stxvpx XSp,RA,RB +func type_stxvpx(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.From.Reg>>5)&0x1) << 21 // SX + o0 |= uint32((p.From.Reg>>1)&0xf) << 22 // Sp + o0 |= uint32(p.To.Index&0x1f) << 16 // RA + o0 |= uint32(p.To.Reg&0x1f) << 11 // RB + out[0] = o0 +} + +// stxvrwx XS,RA,RB +func type_stxvrwx(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.From.Reg>>5)&0x1) << 0 // SX + o0 |= uint32(p.From.Reg&0x1f) << 21 // S + o0 |= uint32(p.To.Index&0x1f) << 16 // RA + o0 |= uint32(p.To.Reg&0x1f) << 11 // RB + out[0] = o0 +} + +// vcmpuq BF,VRA,VRB +func type_vcmpuq(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x7) << 23 // BF + o0 |= uint32(p.From.Reg&0x1f) << 16 // VRA + o0 |= uint32(p.Reg&0x1f) << 11 // VRB + out[0] = o0 +} + +// vcntmbw RT,VRB,MP +func type_vcntmbw(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // RT + o0 |= uint32(p.From.Reg&0x1f) << 11 // VRB + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x1) << 16 // MP + out[0] = o0 +} + +// vgnb RT,VRB,N +func type_vgnb(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // RT + o0 |= uint32(p.From.Reg&0x1f) << 11 // VRB + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x7) << 16 // N + out[0] = o0 +} + +// vinsw VRT,RB,UIM +func type_vinsw(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // VRT + o0 |= uint32(p.From.Reg&0x1f) << 11 // RB + o0 |= uint32(p.RestArgs[0].Addr.Offset&0xf) << 16 // UIM + out[0] = o0 +} + +// vmsumcud VRT,VRA,VRB,VRC +func type_vmsumcud(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // VRT + o0 |= uint32(p.From.Reg&0x1f) << 16 // VRA + o0 |= uint32(p.Reg&0x1f) << 11 // VRB + o0 |= uint32(p.RestArgs[0].Addr.Reg&0x1f) << 6 // VRC + out[0] = o0 +} + +// vsrdbi VRT,VRA,VRB,SH +func type_vsrdbi(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // VRT + o0 |= uint32(p.From.Reg&0x1f) << 16 // VRA + o0 |= uint32(p.Reg&0x1f) << 11 // VRB + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x7) << 6 // SH + out[0] = o0 +} + +// xscvuqqp VRT,VRB +func type_xscvuqqp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // VRT + o0 |= uint32(p.From.Reg&0x1f) << 11 // VRB + out[0] = o0 +} + +// xsmincqp VRT,VRA,VRB +func type_xsmincqp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x1f) << 21 // VRT + o0 |= uint32(p.From.Reg&0x1f) << 16 // VRA + o0 |= uint32(p.Reg&0x1f) << 11 // VRB + out[0] = o0 +} + +// xvcvspbf16 XT,XB +func type_xvcvspbf16(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.To.Reg>>5)&0x1) << 0 // TX + o0 |= uint32(p.To.Reg&0x1f) << 21 // T + o0 |= uint32((p.From.Reg>>5)&0x1) << 1 // BX + o0 |= uint32(p.From.Reg&0x1f) << 11 // B + out[0] = o0 +} + +// xvi8ger4spp AT,XA,XB +func type_xvi8ger4spp(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x7) << 23 // AT + o0 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o0 |= uint32(p.From.Reg&0x1f) << 16 // A + o0 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o0 |= uint32(p.Reg&0x1f) << 11 // B + out[0] = o0 +} + +// xvtlsbb BF,XB +func type_xvtlsbb(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x7) << 23 // BF + o0 |= uint32((p.From.Reg>>5)&0x1) << 1 // BX + o0 |= uint32(p.From.Reg&0x1f) << 11 // B + out[0] = o0 +} + +// xxblendvw XT,XA,XB,XC +func type_xxblendvw(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.To.Reg>>5)&0x1) << 0 // TX + o1 |= uint32(p.To.Reg&0x1f) << 21 // T + o1 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o1 |= uint32(p.From.Reg&0x1f) << 16 // A + o1 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o1 |= uint32(p.Reg&0x1f) << 11 // B + o1 |= uint32((p.RestArgs[0].Addr.Reg>>5)&0x1) << 3 // CX + o1 |= uint32(p.RestArgs[0].Addr.Reg&0x1f) << 6 // C + out[1] = o1 + out[0] = o0 +} + +// xxeval XT,XA,XB,XC,IMM +func type_xxeval(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.To.Reg>>5)&0x1) << 0 // TX + o1 |= uint32(p.To.Reg&0x1f) << 21 // T + o1 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o1 |= uint32(p.From.Reg&0x1f) << 16 // A + o1 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o1 |= uint32(p.Reg&0x1f) << 11 // B + o1 |= uint32((p.RestArgs[0].Addr.Reg>>5)&0x1) << 3 // CX + o1 |= uint32(p.RestArgs[0].Addr.Reg&0x1f) << 6 // C + o0 |= uint32(p.RestArgs[1].Addr.Offset&0xff) << 0 // IMM + out[1] = o1 + out[0] = o0 +} + +// xxgenpcvwm XT,VRB,IMM +func type_xxgenpcvwm(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32((p.To.Reg>>5)&0x1) << 0 // TX + o0 |= uint32(p.To.Reg&0x1f) << 21 // T + o0 |= uint32(p.From.Reg&0x1f) << 11 // VRB + o0 |= uint32(p.RestArgs[0].Addr.Offset&0x1f) << 16 // IMM + out[0] = o0 +} + +// xxpermx XT,XA,XB,XC,UIM +func type_xxpermx(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.To.Reg>>5)&0x1) << 0 // TX + o1 |= uint32(p.To.Reg&0x1f) << 21 // T + o1 |= uint32((p.From.Reg>>5)&0x1) << 2 // AX + o1 |= uint32(p.From.Reg&0x1f) << 16 // A + o1 |= uint32((p.Reg>>5)&0x1) << 1 // BX + o1 |= uint32(p.Reg&0x1f) << 11 // B + o1 |= uint32((p.RestArgs[0].Addr.Reg>>5)&0x1) << 3 // CX + o1 |= uint32(p.RestArgs[0].Addr.Reg&0x1f) << 6 // C + o0 |= uint32(p.RestArgs[1].Addr.Offset&0x7) << 0 // UIM + out[1] = o1 + out[0] = o0 +} + +// xxsetaccz AT +func type_xxsetaccz(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenOpcodes[p.As-AXXSETACCZ] + o0 |= uint32(p.To.Reg&0x7) << 23 // AT + out[0] = o0 +} + +// xxsplti32dx XT,IX,IMM32 +func type_xxsplti32dx(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.To.Reg>>5)&0x1) << 16 // TX + o1 |= uint32(p.To.Reg&0x1f) << 21 // T + o1 |= uint32(p.From.Offset&0x1) << 17 // IX + o0 |= uint32((p.RestArgs[0].Addr.Offset>>16)&0xffff) << 0 // imm0 + o1 |= uint32(p.RestArgs[0].Addr.Offset&0xffff) << 0 // imm1 + out[1] = o1 + out[0] = o0 +} + +// xxspltiw XT,IMM32 +func type_xxspltiw(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) { + o0 := GenPfxOpcodes[p.As-AXXSPLTIW] + o1 := GenOpcodes[p.As-AXXSETACCZ] + o1 |= uint32((p.To.Reg>>5)&0x1) << 16 // TX + o1 |= uint32(p.To.Reg&0x1f) << 21 // T + o0 |= uint32((p.From.Offset>>16)&0xffff) << 0 // imm0 + o1 |= uint32(p.From.Offset&0xffff) << 0 // imm1 + out[1] = o1 + out[0] = o0 +} + +func opsetGen(from obj.As) bool { + r0 := from & obj.AMask + switch from { + case ABRW: + opset(ABRH, r0) + opset(ABRD, r0) + case ADCFFIXQQ: + case ADCTFIXQQ: + case AHASHCHKP: + opset(AHASHCHK, r0) + case AHASHSTP: + opset(AHASHST, r0) + case ALXVKQ: + case ALXVP: + case ALXVPX: + case ALXVRWX: + opset(ALXVRHX, r0) + opset(ALXVRDX, r0) + opset(ALXVRBX, r0) + case AMTVSRBMI: + case AMTVSRWM: + opset(AMTVSRQM, r0) + opset(AMTVSRHM, r0) + opset(AMTVSRDM, r0) + opset(AMTVSRBM, r0) + case APADDI: + case APEXTD: + opset(APDEPD, r0) + opset(ACNTTZDM, r0) + opset(ACNTLZDM, r0) + opset(ACFUGED, r0) + case APLFS: + opset(APLFD, r0) + case APLQ: + case APLWZ: + opset(APLWA, r0) + opset(APLHZ, r0) + opset(APLHA, r0) + opset(APLD, r0) + opset(APLBZ, r0) + case APLXSSP: + opset(APLXSD, r0) + case APLXV: + case APLXVP: + case APMXVF32GERPP: + opset(APMXVF32GERPN, r0) + opset(APMXVF32GERNP, r0) + opset(APMXVF32GERNN, r0) + opset(APMXVF32GER, r0) + case APMXVF64GERPP: + opset(APMXVF64GERPN, r0) + opset(APMXVF64GERNP, r0) + opset(APMXVF64GERNN, r0) + opset(APMXVF64GER, r0) + case APMXVI16GER2SPP: + opset(APMXVI16GER2S, r0) + opset(APMXVI16GER2PP, r0) + opset(APMXVI16GER2, r0) + opset(APMXVF16GER2PP, r0) + opset(APMXVF16GER2PN, r0) + opset(APMXVF16GER2NP, r0) + opset(APMXVF16GER2NN, r0) + opset(APMXVF16GER2, r0) + opset(APMXVBF16GER2PP, r0) + opset(APMXVBF16GER2PN, r0) + opset(APMXVBF16GER2NP, r0) + opset(APMXVBF16GER2NN, r0) + opset(APMXVBF16GER2, r0) + case APMXVI4GER8PP: + opset(APMXVI4GER8, r0) + case APMXVI8GER4SPP: + opset(APMXVI8GER4PP, r0) + opset(APMXVI8GER4, r0) + case APNOP: + case APSTFS: + opset(APSTFD, r0) + case APSTQ: + case APSTW: + opset(APSTH, r0) + opset(APSTD, r0) + opset(APSTB, r0) + case APSTXSSP: + opset(APSTXSD, r0) + case APSTXV: + case APSTXVP: + case ASETNBCR: + opset(ASETNBC, r0) + opset(ASETBCR, r0) + opset(ASETBC, r0) + case ASTXVP: + case ASTXVPX: + case ASTXVRWX: + opset(ASTXVRHX, r0) + opset(ASTXVRDX, r0) + opset(ASTXVRBX, r0) + case AVCLRRB: + opset(AVCLRLB, r0) + case AVCMPUQ: + opset(AVCMPSQ, r0) + case AVCNTMBW: + opset(AVCNTMBH, r0) + opset(AVCNTMBD, r0) + opset(AVCNTMBB, r0) + case AVEXTDUWVRX: + opset(AVEXTDUWVLX, r0) + opset(AVEXTDUHVRX, r0) + opset(AVEXTDUHVLX, r0) + opset(AVEXTDUBVRX, r0) + opset(AVEXTDUBVLX, r0) + opset(AVEXTDDVRX, r0) + opset(AVEXTDDVLX, r0) + case AVEXTRACTWM: + opset(AVEXTRACTQM, r0) + opset(AVEXTRACTHM, r0) + opset(AVEXTRACTDM, r0) + opset(AVEXTRACTBM, r0) + case AVGNB: + case AVINSW: + opset(AVINSD, r0) + case AVINSWRX: + opset(AVINSWLX, r0) + opset(AVINSHRX, r0) + opset(AVINSHLX, r0) + opset(AVINSDRX, r0) + opset(AVINSDLX, r0) + opset(AVINSBRX, r0) + opset(AVINSBLX, r0) + case AVINSWVRX: + opset(AVINSWVLX, r0) + opset(AVINSHVRX, r0) + opset(AVINSHVLX, r0) + opset(AVINSBVRX, r0) + opset(AVINSBVLX, r0) + case AVMSUMCUD: + case AVSRDBI: + opset(AVSLDBI, r0) + case AXSCVUQQP: + opset(AXSCVSQQP, r0) + opset(AXSCVQPUQZ, r0) + opset(AXSCVQPSQZ, r0) + opset(AVSTRIHRCC, r0) + opset(AVSTRIHR, r0) + opset(AVSTRIHLCC, r0) + opset(AVSTRIHL, r0) + opset(AVSTRIBRCC, r0) + opset(AVSTRIBR, r0) + opset(AVSTRIBLCC, r0) + opset(AVSTRIBL, r0) + opset(AVEXTSD2Q, r0) + opset(AVEXPANDWM, r0) + opset(AVEXPANDQM, r0) + opset(AVEXPANDHM, r0) + opset(AVEXPANDDM, r0) + opset(AVEXPANDBM, r0) + case AXSMINCQP: + opset(AXSMAXCQP, r0) + opset(AXSCMPGTQP, r0) + opset(AXSCMPGEQP, r0) + opset(AXSCMPEQQP, r0) + opset(AVSRQ, r0) + opset(AVSRAQ, r0) + opset(AVSLQ, r0) + opset(AVRLQNM, r0) + opset(AVRLQMI, r0) + opset(AVRLQ, r0) + opset(AVPEXTD, r0) + opset(AVPDEPD, r0) + opset(AVMULOUD, r0) + opset(AVMULOSD, r0) + opset(AVMULLD, r0) + opset(AVMULHUW, r0) + opset(AVMULHUD, r0) + opset(AVMULHSW, r0) + opset(AVMULHSD, r0) + opset(AVMULEUD, r0) + opset(AVMULESD, r0) + opset(AVMODUW, r0) + opset(AVMODUQ, r0) + opset(AVMODUD, r0) + opset(AVMODSW, r0) + opset(AVMODSQ, r0) + opset(AVMODSD, r0) + opset(AVDIVUW, r0) + opset(AVDIVUQ, r0) + opset(AVDIVUD, r0) + opset(AVDIVSW, r0) + opset(AVDIVSQ, r0) + opset(AVDIVSD, r0) + opset(AVDIVEUW, r0) + opset(AVDIVEUQ, r0) + opset(AVDIVEUD, r0) + opset(AVDIVESW, r0) + opset(AVDIVESQ, r0) + opset(AVDIVESD, r0) + opset(AVCTZDM, r0) + opset(AVCMPGTUQCC, r0) + opset(AVCMPGTUQ, r0) + opset(AVCMPGTSQCC, r0) + opset(AVCMPGTSQ, r0) + opset(AVCMPEQUQCC, r0) + opset(AVCMPEQUQ, r0) + opset(AVCLZDM, r0) + opset(AVCFUGED, r0) + case AXVCVSPBF16: + opset(AXVCVBF16SPN, r0) + case AXVI8GER4SPP: + opset(AXVI8GER4PP, r0) + opset(AXVI8GER4, r0) + opset(AXVI4GER8PP, r0) + opset(AXVI4GER8, r0) + opset(AXVI16GER2SPP, r0) + opset(AXVI16GER2S, r0) + opset(AXVI16GER2PP, r0) + opset(AXVI16GER2, r0) + opset(AXVF64GERPP, r0) + opset(AXVF64GERPN, r0) + opset(AXVF64GERNP, r0) + opset(AXVF64GERNN, r0) + opset(AXVF64GER, r0) + opset(AXVF32GERPP, r0) + opset(AXVF32GERPN, r0) + opset(AXVF32GERNP, r0) + opset(AXVF32GERNN, r0) + opset(AXVF32GER, r0) + opset(AXVF16GER2PP, r0) + opset(AXVF16GER2PN, r0) + opset(AXVF16GER2NP, r0) + opset(AXVF16GER2NN, r0) + opset(AXVF16GER2, r0) + opset(AXVBF16GER2PP, r0) + opset(AXVBF16GER2PN, r0) + opset(AXVBF16GER2NP, r0) + opset(AXVBF16GER2NN, r0) + opset(AXVBF16GER2, r0) + case AXVTLSBB: + case AXXBLENDVW: + opset(AXXBLENDVH, r0) + opset(AXXBLENDVD, r0) + opset(AXXBLENDVB, r0) + case AXXEVAL: + case AXXGENPCVWM: + opset(AXXGENPCVHM, r0) + opset(AXXGENPCVDM, r0) + opset(AXXGENPCVBM, r0) + case AXXPERMX: + case AXXSETACCZ: + opset(AXXMTACC, r0) + opset(AXXMFACC, r0) + case AXXSPLTI32DX: + case AXXSPLTIW: + opset(AXXSPLTIDP, r0) + default: + return false + } + return true +} diff --git a/src/cmd/internal/obj/ppc64/asm_test.go b/src/cmd/internal/obj/ppc64/asm_test.go new file mode 100644 index 0000000..89fc9ba --- /dev/null +++ b/src/cmd/internal/obj/ppc64/asm_test.go @@ -0,0 +1,555 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ppc64 + +import ( + "bytes" + "fmt" + "internal/testenv" + "math" + "os" + "path/filepath" + "regexp" + "strings" + "testing" + + "cmd/internal/obj" + "cmd/internal/objabi" +) + +var platformEnvs = [][]string{ + {"GOOS=aix", "GOARCH=ppc64"}, + {"GOOS=linux", "GOARCH=ppc64"}, + {"GOOS=linux", "GOARCH=ppc64le"}, +} + +const invalidPCAlignSrc = ` +TEXT test(SB),0,$0-0 +ADD $2, R3 +PCALIGN $64 +RET +` + +const validPCAlignSrc = ` +TEXT test(SB),0,$0-0 +ADD $2, R3 +PCALIGN $16 +MOVD $8, R16 +ADD $8, R4 +PCALIGN $32 +ADD $8, R3 +PCALIGN $8 +ADD $4, R8 +RET +` + +const x64pgm = ` +TEXT test(SB),0,$0-0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +PNOP +` +const x32pgm = ` +TEXT test(SB),0,$0-0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +PNOP +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +` + +const x16pgm = ` +TEXT test(SB),0,$0-0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +PNOP +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +` + +const x0pgm = ` +TEXT test(SB),0,$0-0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +PNOP +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +` +const x64pgmA64 = ` +TEXT test(SB),0,$0-0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +PNOP +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +PNOP +` + +const x64pgmA32 = ` +TEXT test(SB),0,$0-0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +PNOP +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +OR R0, R0 +PNOP +` + +// Test that nops are inserted when crossing 64B boundaries, and +// alignment is adjusted to avoid crossing. +func TestPfxAlign(t *testing.T) { + testenv.MustHaveGoBuild(t) + + dir, err := os.MkdirTemp("", "testpfxalign") + if err != nil { + t.Fatalf("could not create directory: %v", err) + } + defer os.RemoveAll(dir) + + pgms := []struct { + text []byte + align string + hasNop bool + }{ + {[]byte(x0pgm), "align=0x0", false}, // No alignment or nop adjustments needed + {[]byte(x16pgm), "align=0x20", false}, // Increased alignment needed + {[]byte(x32pgm), "align=0x40", false}, // Worst case alignment needed + {[]byte(x64pgm), "align=0x0", true}, // 0 aligned is default (16B) alignment + {[]byte(x64pgmA64), "align=0x40", true}, // extra alignment + nop + {[]byte(x64pgmA32), "align=0x20", true}, // extra alignment + nop + } + + for _, pgm := range pgms { + tmpfile := filepath.Join(dir, "x.s") + err = os.WriteFile(tmpfile, pgm.text, 0644) + if err != nil { + t.Fatalf("can't write output: %v\n", err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux", "GOARCH=ppc64le") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Failed to compile %v: %v\n", pgm, err) + } + if !strings.Contains(string(out), pgm.align) { + t.Errorf(fmt.Sprintf("Fatal, misaligned text with prefixed instructions:\n%s\n", string(out))) + } + hasNop := strings.Contains(string(out), "00 00 00 60") + if hasNop != pgm.hasNop { + t.Errorf(fmt.Sprintf("Fatal, prefixed instruction is missing nop padding:\n%s\n", string(out))) + } + } +} + +// TestLarge generates a very large file to verify that large +// program builds successfully, and branches which exceed the +// range of BC are rewritten to reach. +func TestLarge(t *testing.T) { + if testing.Short() { + t.Skip("Skip in short mode") + } + testenv.MustHaveGoBuild(t) + + dir, err := os.MkdirTemp("", "testlarge") + if err != nil { + t.Fatalf("could not create directory: %v", err) + } + defer os.RemoveAll(dir) + + // A few interesting test cases for long conditional branch fixups + tests := []struct { + jmpinsn string + backpattern []string + fwdpattern []string + }{ + // Test the interesting cases of conditional branch rewrites for too-far targets. Simple conditional + // branches can be made to reach with one JMP insertion, compound conditionals require two. + // + // beq <-> bne conversion (insert one jump) + {"BEQ", + []string{``, + `0x20030 131120\s\(.*\)\tBC\t\$4,\sCR0EQ,\s131128`, + `0x20034 131124\s\(.*\)\tJMP\t0`}, + []string{``, + `0x0000 00000\s\(.*\)\tBC\t\$4,\sCR0EQ,\s8`, + `0x0004 00004\s\(.*\)\tJMP\t131128`}, + }, + {"BNE", + []string{``, + `0x20030 131120\s\(.*\)\tBC\t\$12,\sCR0EQ,\s131128`, + `0x20034 131124\s\(.*\)\tJMP\t0`}, + []string{``, + `0x0000 00000\s\(.*\)\tBC\t\$12,\sCR0EQ,\s8`, + `0x0004 00004\s\(.*\)\tJMP\t131128`}}, + // bdnz (BC 16,0,tgt) <-> bdz (BC 18,0,+4) conversion (insert one jump) + {"BC 16,0,", + []string{``, + `0x20030 131120\s\(.*\)\tBC\t\$18,\sCR0LT,\s131128`, + `0x20034 131124\s\(.*\)\tJMP\t0`}, + []string{``, + `0x0000 00000\s\(.*\)\tBC\t\$18,\sCR0LT,\s8`, + `0x0004 00004\s\(.*\)\tJMP\t131128`}}, + {"BC 18,0,", + []string{``, + `0x20030 131120\s\(.*\)\tBC\t\$16,\sCR0LT,\s131128`, + `0x20034 131124\s\(.*\)\tJMP\t0`}, + []string{``, + `0x0000 00000\s\(.*\)\tBC\t\$16,\sCR0LT,\s8`, + `0x0004 00004\s\(.*\)\tJMP\t131128`}}, + // bdnzt (BC 8,0,tgt) <-> bdnzt (BC 8,0,+4) conversion (insert two jumps) + {"BC 8,0,", + []string{``, + `0x20034 131124\s\(.*\)\tBC\t\$8,\sCR0LT,\s131132`, + `0x20038 131128\s\(.*\)\tJMP\t131136`, + `0x2003c 131132\s\(.*\)\tJMP\t0\n`}, + []string{``, + `0x0000 00000\s\(.*\)\tBC\t\$8,\sCR0LT,\s8`, + `0x0004 00004\s\(.*\)\tJMP\t12`, + `0x0008 00008\s\(.*\)\tJMP\t131136\n`}}, + } + + for _, test := range tests { + // generate a very large function + buf := bytes.NewBuffer(make([]byte, 0, 7000000)) + gen(buf, test.jmpinsn) + + tmpfile := filepath.Join(dir, "x.s") + err = os.WriteFile(tmpfile, buf.Bytes(), 0644) + if err != nil { + t.Fatalf("can't write output: %v\n", err) + } + + // Test on all supported ppc64 platforms + for _, platenv := range platformEnvs { + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile) + cmd.Env = append(os.Environ(), platenv...) + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Assemble failed (%v): %v, output: %s", platenv, err, out) + } + matched, err := regexp.MatchString(strings.Join(test.fwdpattern, "\n\t*"), string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("Failed to detect long forward BC fixup in (%v):%s\n", platenv, out) + } + matched, err = regexp.MatchString(strings.Join(test.backpattern, "\n\t*"), string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("Failed to detect long backward BC fixup in (%v):%s\n", platenv, out) + } + } + } +} + +// gen generates a very large program with a very long forward and backwards conditional branch. +func gen(buf *bytes.Buffer, jmpinsn string) { + fmt.Fprintln(buf, "TEXT f(SB),0,$0-0") + fmt.Fprintln(buf, "label_start:") + fmt.Fprintln(buf, jmpinsn, "label_end") + for i := 0; i < (1<<15 + 10); i++ { + fmt.Fprintln(buf, "MOVD R0, R1") + } + fmt.Fprintln(buf, jmpinsn, "label_start") + fmt.Fprintln(buf, "label_end:") + fmt.Fprintln(buf, "MOVD R0, R1") + fmt.Fprintln(buf, "RET") +} + +// TestPCalign generates two asm files containing the +// PCALIGN directive, to verify correct values are and +// accepted, and incorrect values are flagged in error. +func TestPCalign(t *testing.T) { + var pattern8 = `0x...8\s.*ADD\s..,\sR8` + var pattern16 = `0x...[80]\s.*MOVD\s..,\sR16` + var pattern32 = `0x...0\s.*ADD\s..,\sR3` + + testenv.MustHaveGoBuild(t) + + dir, err := os.MkdirTemp("", "testpcalign") + if err != nil { + t.Fatalf("could not create directory: %v", err) + } + defer os.RemoveAll(dir) + + // generate a test with valid uses of PCALIGN + + tmpfile := filepath.Join(dir, "x.s") + err = os.WriteFile(tmpfile, []byte(validPCAlignSrc), 0644) + if err != nil { + t.Fatalf("can't write output: %v\n", err) + } + + // build generated file without errors and assemble it + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), "-S", tmpfile) + cmd.Env = append(os.Environ(), "GOARCH=ppc64le", "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Build failed: %v, output: %s", err, out) + } + + matched, err := regexp.MatchString(pattern8, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The 8 byte alignment is not correct: %t, output:%s\n", matched, out) + } + + matched, err = regexp.MatchString(pattern16, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The 16 byte alignment is not correct: %t, output:%s\n", matched, out) + } + + matched, err = regexp.MatchString(pattern32, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The 32 byte alignment is not correct: %t, output:%s\n", matched, out) + } + + // generate a test with invalid use of PCALIGN + + tmpfile = filepath.Join(dir, "xi.s") + err = os.WriteFile(tmpfile, []byte(invalidPCAlignSrc), 0644) + if err != nil { + t.Fatalf("can't write output: %v\n", err) + } + + // build test with errors and check for messages + cmd = testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "xi.o"), "-S", tmpfile) + cmd.Env = append(os.Environ(), "GOARCH=ppc64le", "GOOS=linux") + out, err = cmd.CombinedOutput() + if !strings.Contains(string(out), "Unexpected alignment") { + t.Errorf("Invalid alignment not detected for PCALIGN\n") + } +} + +// Verify register constants are correctly aligned. Much of the ppc64 assembler assumes masking out significant +// bits will produce a valid register number: +// REG_Rx & 31 == x +// REG_Fx & 31 == x +// REG_Vx & 31 == x +// REG_VSx & 63 == x +// REG_SPRx & 1023 == x +// REG_CRx & 7 == x +// +// VR and FPR disjointly overlap VSR, interpreting as VSR registers should produce the correctly overlapped VSR. +// REG_FPx & 63 == x +// REG_Vx & 63 == x + 32 +func TestRegValueAlignment(t *testing.T) { + tstFunc := func(rstart, rend, msk, rout int) { + for i := rstart; i <= rend; i++ { + if i&msk != rout { + t.Errorf("%v is not aligned to 0x%X (expected %d, got %d)\n", rconv(i), msk, rout, rstart&msk) + } + rout++ + } + } + var testType = []struct { + rstart int + rend int + msk int + rout int + }{ + {REG_VS0, REG_VS63, 63, 0}, + {REG_R0, REG_R31, 31, 0}, + {REG_F0, REG_F31, 31, 0}, + {REG_V0, REG_V31, 31, 0}, + {REG_V0, REG_V31, 63, 32}, + {REG_F0, REG_F31, 63, 0}, + {REG_SPR0, REG_SPR0 + 1023, 1023, 0}, + {REG_CR0, REG_CR7, 7, 0}, + {REG_CR0LT, REG_CR7SO, 31, 0}, + } + for _, t := range testType { + tstFunc(t.rstart, t.rend, t.msk, t.rout) + } +} + +// Verify interesting obj.Addr arguments are classified correctly. +func TestAddrClassifier(t *testing.T) { + type cmplx struct { + pic int + pic_dyn int + dyn int + nonpic int + } + tsts := [...]struct { + arg obj.Addr + output interface{} + }{ + // Supported register type args + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_R1}, C_REG}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_R2}, C_REGP}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_F1}, C_FREG}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_F2}, C_FREGP}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_V2}, C_VREG}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_VS1}, C_VSREG}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_VS2}, C_VSREGP}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_CR}, C_CREG}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_CR1}, C_CREG}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_CR1SO}, C_CRBIT}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_SPR0}, C_SPR}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_SPR0 + 1}, C_XER}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_SPR0 + 8}, C_LR}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_SPR0 + 9}, C_CTR}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_FPSCR}, C_FPSCR}, + {obj.Addr{Type: obj.TYPE_REG, Reg: REG_A1}, C_AREG}, + + // Memory type arguments. + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_GOTREF}, C_ADDR}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_TOCREF}, C_ADDR}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: &obj.LSym{Type: objabi.STLSBSS}}, cmplx{C_TLS_IE, C_TLS_IE, C_TLS_LE, C_TLS_LE}}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: &obj.LSym{Type: objabi.SDATA}}, C_ADDR}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_AUTO}, C_SOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_AUTO, Offset: BIG}, C_LOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_AUTO, Offset: -BIG - 1}, C_LOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_PARAM}, C_SOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_PARAM, Offset: BIG}, C_LOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_PARAM, Offset: -BIG - 33}, C_LOREG}, // 33 is FixedFrameSize-1 + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_NONE}, C_ZOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_NONE, Index: REG_R4}, C_XOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_NONE, Offset: 1}, C_SOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_NONE, Offset: BIG}, C_LOREG}, + {obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_NONE, Offset: -BIG - 33}, C_LOREG}, + + // Misc (golang initializes -0.0 to 0.0, hence the obfuscation below) + {obj.Addr{Type: obj.TYPE_TEXTSIZE}, C_TEXTSIZE}, + {obj.Addr{Type: obj.TYPE_FCONST, Val: 0.0}, C_ZCON}, + {obj.Addr{Type: obj.TYPE_FCONST, Val: math.Float64frombits(0x8000000000000000)}, C_S16CON}, + + // Address type arguments + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_NONE, Offset: 1}, C_SACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_NONE, Offset: BIG}, C_LACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_NONE, Offset: -BIG - 1}, C_LACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_NONE, Offset: 1 << 32}, C_DACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN, Sym: &obj.LSym{Type: objabi.SDATA}}, C_LACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_STATIC, Sym: &obj.LSym{Type: objabi.SDATA}}, C_LACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_AUTO, Offset: 1}, C_SACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_AUTO, Offset: BIG}, C_LACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_AUTO, Offset: -BIG - 1}, C_LACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_PARAM, Offset: 1}, C_SACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_PARAM, Offset: BIG}, C_LACON}, + {obj.Addr{Type: obj.TYPE_ADDR, Reg: REG_R0, Name: obj.NAME_PARAM, Offset: -BIG - 33}, C_LACON}, // 33 is FixedFrameSize-1 + + // Constant type arguments + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 0}, C_ZCON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 1}, C_U1CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 2}, C_U2CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 4}, C_U3CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 8}, C_U4CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 16}, C_U5CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 32}, C_U8CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 1 << 14}, C_U15CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 1 << 15}, C_U16CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 1 << 16}, C_U3216CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 1 + 1<<16}, C_U32CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 1 << 32}, C_S34CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: 1 << 33}, C_64CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: -1}, C_S16CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: -0x10000}, C_S3216CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: -0x10001}, C_S32CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: -(1 << 33)}, C_S34CON}, + {obj.Addr{Type: obj.TYPE_CONST, Name: obj.NAME_NONE, Offset: -(1 << 34)}, C_64CON}, + + // Branch like arguments + {obj.Addr{Type: obj.TYPE_BRANCH, Sym: &obj.LSym{Type: objabi.SDATA}}, cmplx{C_SBRA, C_LBRAPIC, C_LBRAPIC, C_SBRA}}, + {obj.Addr{Type: obj.TYPE_BRANCH}, C_SBRA}, + } + + pic_ctxt9 := ctxt9{ctxt: &obj.Link{Flag_shared: true, Arch: &Linkppc64}, autosize: 0} + pic_dyn_ctxt9 := ctxt9{ctxt: &obj.Link{Flag_shared: true, Flag_dynlink: true, Arch: &Linkppc64}, autosize: 0} + dyn_ctxt9 := ctxt9{ctxt: &obj.Link{Flag_dynlink: true, Arch: &Linkppc64}, autosize: 0} + nonpic_ctxt9 := ctxt9{ctxt: &obj.Link{Arch: &Linkppc64}, autosize: 0} + ctxts := [...]*ctxt9{&pic_ctxt9, &pic_dyn_ctxt9, &dyn_ctxt9, &nonpic_ctxt9} + name := [...]string{"pic", "pic_dyn", "dyn", "nonpic"} + for _, tst := range tsts { + var expect []int + switch tst.output.(type) { + case cmplx: + v := tst.output.(cmplx) + expect = []int{v.pic, v.pic_dyn, v.dyn, v.nonpic} + case int: + expect = []int{tst.output.(int), tst.output.(int), tst.output.(int), tst.output.(int)} + } + for i := range ctxts { + if output := ctxts[i].aclass(&tst.arg); output != expect[i] { + t.Errorf("%s.aclass(%v) = %v, expected %v\n", name[i], tst.arg, DRconv(output), DRconv(expect[i])) + } + } + } +} diff --git a/src/cmd/internal/obj/ppc64/doc.go b/src/cmd/internal/obj/ppc64/doc.go new file mode 100644 index 0000000..28340e4 --- /dev/null +++ b/src/cmd/internal/obj/ppc64/doc.go @@ -0,0 +1,254 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package ppc64 implements a PPC64 assembler that assembles Go asm into +the corresponding PPC64 instructions as defined by the Power ISA 3.0B. + +This document provides information on how to write code in Go assembler +for PPC64, focusing on the differences between Go and PPC64 assembly language. +It assumes some knowledge of PPC64 assembler. The original implementation of +PPC64 in Go defined many opcodes that are different from PPC64 opcodes, but +updates to the Go assembly language used mnemonics that are mostly similar if not +identical to the PPC64 mneumonics, such as VMX and VSX instructions. Not all detail +is included here; refer to the Power ISA document if interested in more detail. + +Starting with Go 1.15 the Go objdump supports the -gnu option, which provides a +side by side view of the Go assembler and the PPC64 assembler output. This is +extremely helpful in determining what final PPC64 assembly is generated from the +corresponding Go assembly. + +In the examples below, the Go assembly is on the left, PPC64 assembly on the right. + +1. Operand ordering + +In Go asm, the last operand (right) is the target operand, but with PPC64 asm, +the first operand (left) is the target. The order of the remaining operands is +not consistent: in general opcodes with 3 operands that perform math or logical +operations have their operands in reverse order. Opcodes for vector instructions +and those with more than 3 operands usually have operands in the same order except +for the target operand, which is first in PPC64 asm and last in Go asm. + +Example: + + ADD R3, R4, R5 <=> add r5, r4, r3 + +2. Constant operands + +In Go asm, an operand that starts with '$' indicates a constant value. If the +instruction using the constant has an immediate version of the opcode, then an +immediate value is used with the opcode if possible. + +Example: + + ADD $1, R3, R4 <=> addi r4, r3, 1 + +3. Opcodes setting condition codes + +In PPC64 asm, some instructions other than compares have variations that can set +the condition code where meaningful. This is indicated by adding '.' to the end +of the PPC64 instruction. In Go asm, these instructions have 'CC' at the end of +the opcode. The possible settings of the condition code depend on the instruction. +CR0 is the default for fixed-point instructions; CR1 for floating point; CR6 for +vector instructions. + +Example: + + ANDCC R3, R4, R5 <=> and. r5, r3, r4 (set CR0) + +4. Loads and stores from memory + +In Go asm, opcodes starting with 'MOV' indicate a load or store. When the target +is a memory reference, then it is a store; when the target is a register and the +source is a memory reference, then it is a load. + +MOV{B,H,W,D} variations identify the size as byte, halfword, word, doubleword. + +Adding 'Z' to the opcode for a load indicates zero extend; if omitted it is sign extend. +Adding 'U' to a load or store indicates an update of the base register with the offset. +Adding 'BR' to an opcode indicates byte-reversed load or store, or the order opposite +of the expected endian order. If 'BR' is used then zero extend is assumed. + +Memory references n(Ra) indicate the address in Ra + n. When used with an update form +of an opcode, the value in Ra is incremented by n. + +Memory references (Ra+Rb) or (Ra)(Rb) indicate the address Ra + Rb, used by indexed +loads or stores. Both forms are accepted. When used with an update then the base register +is updated by the value in the index register. + +Examples: + + MOVD (R3), R4 <=> ld r4,0(r3) + MOVW (R3), R4 <=> lwa r4,0(r3) + MOVWZU 4(R3), R4 <=> lwzu r4,4(r3) + MOVWZ (R3+R5), R4 <=> lwzx r4,r3,r5 + MOVHZ (R3), R4 <=> lhz r4,0(r3) + MOVHU 2(R3), R4 <=> lhau r4,2(r3) + MOVBZ (R3), R4 <=> lbz r4,0(r3) + + MOVD R4,(R3) <=> std r4,0(r3) + MOVW R4,(R3) <=> stw r4,0(r3) + MOVW R4,(R3+R5) <=> stwx r4,r3,r5 + MOVWU R4,4(R3) <=> stwu r4,4(r3) + MOVH R4,2(R3) <=> sth r4,2(r3) + MOVBU R4,(R3)(R5) <=> stbux r4,r3,r5 + +4. Compares + +When an instruction does a compare or other operation that might +result in a condition code, then the resulting condition is set +in a field of the condition register. The condition register consists +of 8 4-bit fields named CR0 - CR7. When a compare instruction +identifies a CR then the resulting condition is set in that field +to be read by a later branch or isel instruction. Within these fields, +bits are set to indicate less than, greater than, or equal conditions. + +Once an instruction sets a condition, then a subsequent branch, isel or +other instruction can read the condition field and operate based on the +bit settings. + +Examples: + + CMP R3, R4 <=> cmp r3, r4 (CR0 assumed) + CMP R3, R4, CR1 <=> cmp cr1, r3, r4 + +Note that the condition register is the target operand of compare opcodes, so +the remaining operands are in the same order for Go asm and PPC64 asm. +When CR0 is used then it is implicit and does not need to be specified. + +5. Branches + +Many branches are represented as a form of the BC instruction. There are +other extended opcodes to make it easier to see what type of branch is being +used. + +The following is a brief description of the BC instruction and its commonly +used operands. + +BC op1, op2, op3 + + op1: type of branch + 16 -> bctr (branch on ctr) + 12 -> bcr (branch if cr bit is set) + 8 -> bcr+bctr (branch on ctr and cr values) + 4 -> bcr != 0 (branch if specified cr bit is not set) + + There are more combinations but these are the most common. + + op2: condition register field and condition bit + + This contains an immediate value indicating which condition field + to read and what bits to test. Each field is 4 bits long with CR0 + at bit 0, CR1 at bit 4, etc. The value is computed as 4*CR+condition + with these condition values: + + 0 -> LT + 1 -> GT + 2 -> EQ + 3 -> OVG + + Thus 0 means test CR0 for LT, 5 means CR1 for GT, 30 means CR7 for EQ. + + op3: branch target + +Examples: + + BC 12, 0, target <=> blt cr0, target + BC 12, 2, target <=> beq cr0, target + BC 12, 5, target <=> bgt cr1, target + BC 12, 30, target <=> beq cr7, target + BC 4, 6, target <=> bne cr1, target + BC 4, 1, target <=> ble cr1, target + + The following extended opcodes are available for ease of use and readability: + + BNE CR2, target <=> bne cr2, target + BEQ CR4, target <=> beq cr4, target + BLT target <=> blt target (cr0 default) + BGE CR7, target <=> bge cr7, target + +Refer to the ISA for more information on additional values for the BC instruction, +how to handle OVG information, and much more. + +5. Align directive + +Starting with Go 1.12, Go asm supports the PCALIGN directive, which indicates +that the next instruction should be aligned to the specified value. Currently +8 and 16 are the only supported values, and a maximum of 2 NOPs will be added +to align the code. That means in the case where the code is aligned to 4 but +PCALIGN $16 is at that location, the code will only be aligned to 8 to avoid +adding 3 NOPs. + +The purpose of this directive is to improve performance for cases like loops +where better alignment (8 or 16 instead of 4) might be helpful. This directive +exists in PPC64 assembler and is frequently used by PPC64 assembler writers. + +PCALIGN $16 +PCALIGN $8 + +Functions in Go are aligned to 16 bytes, as is the case in all other compilers +for PPC64. + +6. Shift instructions + +The simple scalar shifts on PPC64 expect a shift count that fits in 5 bits for +32-bit values or 6 bit for 64-bit values. If the shift count is a constant value +greater than the max then the assembler sets it to the max for that size (31 for +32 bit values, 63 for 64 bit values). If the shift count is in a register, then +only the low 5 or 6 bits of the register will be used as the shift count. The +Go compiler will add appropriate code to compare the shift value to achieve the +correct result, and the assembler does not add extra checking. + +Examples: + + SRAD $8,R3,R4 => sradi r4,r3,8 + SRD $8,R3,R4 => rldicl r4,r3,56,8 + SLD $8,R3,R4 => rldicr r4,r3,8,55 + SRAW $16,R4,R5 => srawi r5,r4,16 + SRW $40,R4,R5 => rlwinm r5,r4,0,0,31 + SLW $12,R4,R5 => rlwinm r5,r4,12,0,19 + +Some non-simple shifts have operands in the Go assembly which don't map directly +onto operands in the PPC64 assembly. When an operand in a shift instruction in the +Go assembly is a bit mask, that mask is represented as a start and end bit in the +PPC64 assembly instead of a mask. See the ISA for more detail on these types of shifts. +Here are a few examples: + + RLWMI $7,R3,$65535,R6 => rlwimi r6,r3,7,16,31 + RLDMI $0,R4,$7,R6 => rldimi r6,r4,0,61 + +More recently, Go opcodes were added which map directly onto the PPC64 opcodes. It is +recommended to use the newer opcodes to avoid confusion. + + RLDICL $0,R4,$15,R6 => rldicl r6,r4,0,15 + RLDICR $0,R4,$15,R6 => rldicr r6.r4,0,15 + +# Register naming + +1. Special register usage in Go asm + +The following registers should not be modified by user Go assembler code. + + R0: Go code expects this register to contain the value 0. + R1: Stack pointer + R2: TOC pointer when compiled with -shared or -dynlink (a.k.a position independent code) + R13: TLS pointer + R30: g (goroutine) + +Register names: + + Rn is used for general purpose registers. (0-31) + Fn is used for floating point registers. (0-31) + Vn is used for vector registers. Slot 0 of Vn overlaps with Fn. (0-31) + VSn is used for vector-scalar registers. V0-V31 overlap with VS32-VS63. (0-63) + CTR represents the count register. + LR represents the link register. + CR represents the condition register + CRn represents a condition register field. (0-7) + CRnLT represents CR bit 0 of CR field n. (0-7) + CRnGT represents CR bit 1 of CR field n. (0-7) + CRnEQ represents CR bit 2 of CR field n. (0-7) + CRnSO represents CR bit 3 of CR field n. (0-7) +*/ +package ppc64 diff --git a/src/cmd/internal/obj/ppc64/list9.go b/src/cmd/internal/obj/ppc64/list9.go new file mode 100644 index 0000000..451c162 --- /dev/null +++ b/src/cmd/internal/obj/ppc64/list9.go @@ -0,0 +1,117 @@ +// cmd/9l/list.c from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package ppc64 + +import ( + "cmd/internal/obj" + "fmt" +) + +func init() { + obj.RegisterRegister(obj.RBasePPC64, REG_SPR0+1024, rconv) + // Note, the last entry in Anames is "LASTAOUT", it is not a real opcode. + obj.RegisterOpcode(obj.ABasePPC64, Anames[:len(Anames)-1]) + obj.RegisterOpcode(AFIRSTGEN, GenAnames) +} + +func rconv(r int) string { + if r == 0 { + return "NONE" + } + if r == REGG { + // Special case. + return "g" + } + if REG_R0 <= r && r <= REG_R31 { + return fmt.Sprintf("R%d", r-REG_R0) + } + if REG_F0 <= r && r <= REG_F31 { + return fmt.Sprintf("F%d", r-REG_F0) + } + if REG_V0 <= r && r <= REG_V31 { + return fmt.Sprintf("V%d", r-REG_V0) + } + if REG_VS0 <= r && r <= REG_VS63 { + return fmt.Sprintf("VS%d", r-REG_VS0) + } + if REG_CR0 <= r && r <= REG_CR7 { + return fmt.Sprintf("CR%d", r-REG_CR0) + } + if REG_CR0LT <= r && r <= REG_CR7SO { + bits := [4]string{"LT", "GT", "EQ", "SO"} + crf := (r - REG_CR0LT) / 4 + return fmt.Sprintf("CR%d%s", crf, bits[r%4]) + } + if REG_A0 <= r && r <= REG_A7 { + return fmt.Sprintf("A%d", r-REG_A0) + } + if r == REG_CR { + return "CR" + } + if REG_SPR0 <= r && r <= REG_SPR0+1023 { + switch r { + case REG_XER: + return "XER" + + case REG_LR: + return "LR" + + case REG_CTR: + return "CTR" + } + + return fmt.Sprintf("SPR(%d)", r-REG_SPR0) + } + + if r == REG_FPSCR { + return "FPSCR" + } + if r == REG_MSR { + return "MSR" + } + + return fmt.Sprintf("Rgok(%d)", r-obj.RBasePPC64) +} + +func DRconv(a int) string { + s := "C_??" + if a >= C_NONE && a <= C_NCLASS { + s = cnames9[a] + } + var fp string + fp += s + return fp +} + +func ConstantToCRbit(c int64) (int16, bool) { + reg64 := REG_CRBIT0 + c + success := reg64 >= REG_CR0LT && reg64 <= REG_CR7SO + return int16(reg64), success +} diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go new file mode 100644 index 0000000..47ad85e --- /dev/null +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -0,0 +1,1414 @@ +// cmd/9l/noop.c, cmd/9l/pass.c, cmd/9l/span.c from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package ppc64 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/src" + "cmd/internal/sys" + "log" +) + +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + p.From.Class = 0 + p.To.Class = 0 + + c := ctxt9{ctxt: ctxt, newprog: newprog} + + // Rewrite BR/BL to symbol as TYPE_BRANCH. + switch p.As { + case ABR, + ABL, + obj.ARET, + obj.ADUFFZERO, + obj.ADUFFCOPY: + if p.To.Sym != nil { + p.To.Type = obj.TYPE_BRANCH + } + } + + // Rewrite float constants to values stored in memory. + switch p.As { + case AFMOVS: + if p.From.Type == obj.TYPE_FCONST { + f32 := float32(p.From.Val.(float64)) + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float32Sym(f32) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + case AFMOVD: + if p.From.Type == obj.TYPE_FCONST { + f64 := p.From.Val.(float64) + // Constant not needed in memory for float +/- 0 + if f64 != 0 { + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float64Sym(f64) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + } + + case AMOVD: + // 32b constants (signed and unsigned) can be generated via 1 or 2 instructions. + // All others must be placed in memory and loaded. + isS32 := int64(int32(p.From.Offset)) == p.From.Offset + isU32 := uint64(uint32(p.From.Offset)) == uint64(p.From.Offset) + if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == 0 && !isS32 && !isU32 { + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Int64Sym(p.From.Offset) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + } + + switch p.As { + // Rewrite SUB constants into ADD. + case ASUBC: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADDC + } + + case ASUBCCC: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADDCCC + } + + case ASUB: + if p.From.Type == obj.TYPE_CONST { + p.From.Offset = -p.From.Offset + p.As = AADD + } + + // To maintain backwards compatibility, we accept some 4 argument usage of + // several opcodes which was likely not intended, but did work. These are not + // added to optab to avoid the chance this behavior might be used with newer + // instructions. + // + // Rewrite argument ordering like "ADDEX R3, $3, R4, R5" into + // "ADDEX R3, R4, $3, R5" + case AVSHASIGMAW, AVSHASIGMAD, AADDEX, AXXSLDWI, AXXPERMDI: + if len(p.RestArgs) == 2 && p.Reg == 0 && p.RestArgs[0].Addr.Type == obj.TYPE_CONST && p.RestArgs[1].Addr.Type == obj.TYPE_REG { + p.Reg = p.RestArgs[1].Addr.Reg + p.RestArgs = p.RestArgs[:1] + } + } + + if c.ctxt.Headtype == objabi.Haix { + c.rewriteToUseTOC(p) + } else if c.ctxt.Flag_dynlink { + c.rewriteToUseGot(p) + } +} + +// Rewrite p, if necessary, to access a symbol using its TOC anchor. +// This code is for AIX only. +func (c *ctxt9) rewriteToUseTOC(p *obj.Prog) { + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { + return + } + + if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { + // ADUFFZERO/ADUFFCOPY is considered as an ABL except in dynamic + // link where it should be an indirect call. + if !c.ctxt.Flag_dynlink { + return + } + // ADUFFxxx $offset + // becomes + // MOVD runtime.duffxxx@TOC, R12 + // ADD $offset, R12 + // MOVD R12, LR + // BL (LR) + var sym *obj.LSym + if p.As == obj.ADUFFZERO { + sym = c.ctxt.Lookup("runtime.duffzero") + } else { + sym = c.ctxt.Lookup("runtime.duffcopy") + } + // Retrieve or create the TOC anchor. + symtoc := c.ctxt.LookupInit("TOC."+sym.Name, func(s *obj.LSym) { + s.Type = objabi.SDATA + s.Set(obj.AttrDuplicateOK, true) + s.Set(obj.AttrStatic, true) + c.ctxt.Data = append(c.ctxt.Data, s) + s.WriteAddr(c.ctxt, 0, 8, sym, 0) + }) + + offset := p.To.Offset + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_TOCREF + p.From.Sym = symtoc + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R12 + p.To.Name = obj.NAME_NONE + p.To.Offset = 0 + p.To.Sym = nil + p1 := obj.Appendp(p, c.newprog) + p1.As = AADD + p1.From.Type = obj.TYPE_CONST + p1.From.Offset = offset + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REG_R12 + p2 := obj.Appendp(p1, c.newprog) + p2.As = AMOVD + p2.From.Type = obj.TYPE_REG + p2.From.Reg = REG_R12 + p2.To.Type = obj.TYPE_REG + p2.To.Reg = REG_LR + p3 := obj.Appendp(p2, c.newprog) + p3.As = obj.ACALL + p3.To.Type = obj.TYPE_REG + p3.To.Reg = REG_LR + } + + var source *obj.Addr + if p.From.Name == obj.NAME_EXTERN || p.From.Name == obj.NAME_STATIC { + if p.From.Type == obj.TYPE_ADDR { + if p.As == ADWORD { + // ADWORD $sym doesn't need TOC anchor + return + } + if p.As != AMOVD { + c.ctxt.Diag("do not know how to handle TYPE_ADDR in %v", p) + return + } + if p.To.Type != obj.TYPE_REG { + c.ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v", p) + return + } + } else if p.From.Type != obj.TYPE_MEM { + c.ctxt.Diag("do not know how to handle %v without TYPE_MEM", p) + return + } + source = &p.From + + } else if p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC { + if p.To.Type != obj.TYPE_MEM { + c.ctxt.Diag("do not know how to handle %v without TYPE_MEM", p) + return + } + if source != nil { + c.ctxt.Diag("cannot handle symbols on both sides in %v", p) + return + } + source = &p.To + } else { + return + + } + + if source.Sym == nil { + c.ctxt.Diag("do not know how to handle nil symbol in %v", p) + return + } + + if source.Sym.Type == objabi.STLSBSS { + return + } + + // Retrieve or create the TOC anchor. + symtoc := c.ctxt.LookupInit("TOC."+source.Sym.Name, func(s *obj.LSym) { + s.Type = objabi.SDATA + s.Set(obj.AttrDuplicateOK, true) + s.Set(obj.AttrStatic, true) + c.ctxt.Data = append(c.ctxt.Data, s) + s.WriteAddr(c.ctxt, 0, 8, source.Sym, 0) + }) + + if source.Type == obj.TYPE_ADDR { + // MOVD $sym, Rx becomes MOVD symtoc, Rx + // MOVD $sym+<off>, Rx becomes MOVD symtoc, Rx; ADD <off>, Rx + p.From.Type = obj.TYPE_MEM + p.From.Sym = symtoc + p.From.Name = obj.NAME_TOCREF + + if p.From.Offset != 0 { + q := obj.Appendp(p, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = p.From.Offset + p.From.Offset = 0 + q.To = p.To + } + return + + } + + // MOVx sym, Ry becomes MOVD symtoc, REGTMP; MOVx (REGTMP), Ry + // MOVx Ry, sym becomes MOVD symtoc, REGTMP; MOVx Ry, (REGTMP) + // An addition may be inserted between the two MOVs if there is an offset. + + q := obj.Appendp(p, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_MEM + q.From.Sym = symtoc + q.From.Name = obj.NAME_TOCREF + q.To.Type = obj.TYPE_REG + q.To.Reg = REGTMP + + q = obj.Appendp(q, c.newprog) + q.As = p.As + q.From = p.From + q.To = p.To + if p.From.Name != obj.NAME_NONE { + q.From.Type = obj.TYPE_MEM + q.From.Reg = REGTMP + q.From.Name = obj.NAME_NONE + q.From.Sym = nil + } else if p.To.Name != obj.NAME_NONE { + q.To.Type = obj.TYPE_MEM + q.To.Reg = REGTMP + q.To.Name = obj.NAME_NONE + q.To.Sym = nil + } else { + c.ctxt.Diag("unreachable case in rewriteToUseTOC with %v", p) + } + + obj.Nopout(p) +} + +// Rewrite p, if necessary, to access global data via the global offset table. +func (c *ctxt9) rewriteToUseGot(p *obj.Prog) { + if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { + // ADUFFxxx $offset + // becomes + // MOVD runtime.duffxxx@GOT, R12 + // ADD $offset, R12 + // MOVD R12, LR + // BL (LR) + var sym *obj.LSym + if p.As == obj.ADUFFZERO { + sym = c.ctxt.LookupABI("runtime.duffzero", obj.ABIInternal) + } else { + sym = c.ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal) + } + offset := p.To.Offset + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + p.From.Sym = sym + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R12 + p.To.Name = obj.NAME_NONE + p.To.Offset = 0 + p.To.Sym = nil + p1 := obj.Appendp(p, c.newprog) + p1.As = AADD + p1.From.Type = obj.TYPE_CONST + p1.From.Offset = offset + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REG_R12 + p2 := obj.Appendp(p1, c.newprog) + p2.As = AMOVD + p2.From.Type = obj.TYPE_REG + p2.From.Reg = REG_R12 + p2.To.Type = obj.TYPE_REG + p2.To.Reg = REG_LR + p3 := obj.Appendp(p2, c.newprog) + p3.As = obj.ACALL + p3.To.Type = obj.TYPE_REG + p3.To.Reg = REG_LR + } + + // We only care about global data: NAME_EXTERN means a global + // symbol in the Go sense, and p.Sym.Local is true for a few + // internally defined symbols. + if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // MOVD $sym, Rx becomes MOVD sym@GOT, Rx + // MOVD $sym+<off>, Rx becomes MOVD sym@GOT, Rx; ADD <off>, Rx + if p.As != AMOVD { + c.ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -dynlink", p) + } + if p.To.Type != obj.TYPE_REG { + c.ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -dynlink", p) + } + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + if p.From.Offset != 0 { + q := obj.Appendp(p, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = p.From.Offset + q.To = p.To + p.From.Offset = 0 + } + } + if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + var source *obj.Addr + // MOVx sym, Ry becomes MOVD sym@GOT, REGTMP; MOVx (REGTMP), Ry + // MOVx Ry, sym becomes MOVD sym@GOT, REGTMP; MOVx Ry, (REGTMP) + // An addition may be inserted between the two MOVs if there is an offset. + if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + c.ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) + } + source = &p.From + } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + source = &p.To + } else { + return + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { + return + } + if source.Sym.Type == objabi.STLSBSS { + return + } + if source.Type != obj.TYPE_MEM { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + p1 := obj.Appendp(p, c.newprog) + p2 := obj.Appendp(p1, c.newprog) + + p1.As = AMOVD + p1.From.Type = obj.TYPE_MEM + p1.From.Sym = source.Sym + p1.From.Name = obj.NAME_GOTREF + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REGTMP + + p2.As = p.As + p2.From = p.From + p2.To = p.To + if p.From.Name == obj.NAME_EXTERN { + p2.From.Reg = REGTMP + p2.From.Name = obj.NAME_NONE + p2.From.Sym = nil + } else if p.To.Name == obj.NAME_EXTERN { + p2.To.Reg = REGTMP + p2.To.Name = obj.NAME_NONE + p2.To.Sym = nil + } else { + return + } + obj.Nopout(p) +} + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + // TODO(minux): add morestack short-cuts with small fixed frame-size. + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + c := ctxt9{ctxt: ctxt, cursym: cursym, newprog: newprog} + + p := c.cursym.Func().Text + textstksiz := p.To.Offset + if textstksiz == -8 { + // Compatibility hack. + p.From.Sym.Set(obj.AttrNoFrame, true) + textstksiz = 0 + } + if textstksiz%8 != 0 { + c.ctxt.Diag("frame size %d not a multiple of 8", textstksiz) + } + if p.From.Sym.NoFrame() { + if textstksiz != 0 { + c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) + } + } + + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) + + /* + * find leaf subroutines + * expand RET + * expand BECOME pseudo + */ + + var q *obj.Prog + var q1 *obj.Prog + for p := c.cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + /* too hard, just leave alone */ + case obj.ATEXT: + q = p + + p.Mark |= LABEL | LEAF | SYNC + if p.Link != nil { + p.Link.Mark |= LABEL + } + + case ANOR: + q = p + if p.To.Type == obj.TYPE_REG { + if p.To.Reg == REGZERO { + p.Mark |= LABEL | SYNC + } + } + + case ALWAR, + ALBAR, + ASTBCCC, + ASTWCCC, + AEIEIO, + AICBI, + AISYNC, + ATLBIE, + ATLBIEL, + ASLBIA, + ASLBIE, + ASLBMFEE, + ASLBMFEV, + ASLBMTE, + ADCBF, + ADCBI, + ADCBST, + ADCBT, + ADCBTST, + ADCBZ, + ASYNC, + ATLBSYNC, + APTESYNC, + ALWSYNC, + ATW, + AWORD, + ARFI, + ARFCI, + ARFID, + AHRFID: + q = p + p.Mark |= LABEL | SYNC + continue + + case AMOVW, AMOVWZ, AMOVD: + q = p + if p.From.Reg >= REG_SPECIAL || p.To.Reg >= REG_SPECIAL { + p.Mark |= LABEL | SYNC + } + continue + + case AFABS, + AFABSCC, + AFADD, + AFADDCC, + AFCTIW, + AFCTIWCC, + AFCTIWZ, + AFCTIWZCC, + AFDIV, + AFDIVCC, + AFMADD, + AFMADDCC, + AFMOVD, + AFMOVDU, + /* case AFMOVDS: */ + AFMOVS, + AFMOVSU, + + /* case AFMOVSD: */ + AFMSUB, + AFMSUBCC, + AFMUL, + AFMULCC, + AFNABS, + AFNABSCC, + AFNEG, + AFNEGCC, + AFNMADD, + AFNMADDCC, + AFNMSUB, + AFNMSUBCC, + AFRSP, + AFRSPCC, + AFSUB, + AFSUBCC: + q = p + + p.Mark |= FLOAT + continue + + case ABL, + ABCL, + obj.ADUFFZERO, + obj.ADUFFCOPY: + c.cursym.Func().Text.Mark &^= LEAF + fallthrough + + case ABC, + ABEQ, + ABGE, + ABGT, + ABLE, + ABLT, + ABNE, + ABR, + ABVC, + ABVS: + p.Mark |= BRANCH + q = p + q1 = p.To.Target() + if q1 != nil { + // NOPs are not removed due to #40689. + + if q1.Mark&LEAF == 0 { + q1.Mark |= LABEL + } + } else { + p.Mark |= LABEL + } + q1 = p.Link + if q1 != nil { + q1.Mark |= LABEL + } + continue + + case AFCMPO, AFCMPU: + q = p + p.Mark |= FCMP | FLOAT + continue + + case obj.ARET: + q = p + if p.Link != nil { + p.Link.Mark |= LABEL + } + continue + + case obj.ANOP: + // NOPs are not removed due to + // #40689 + continue + + default: + q = p + continue + } + } + + autosize := int32(0) + var p1 *obj.Prog + var p2 *obj.Prog + for p := c.cursym.Func().Text; p != nil; p = p.Link { + o := p.As + switch o { + case obj.ATEXT: + autosize = int32(textstksiz) + + if p.Mark&LEAF != 0 && autosize == 0 { + // A leaf function with no locals has no frame. + p.From.Sym.Set(obj.AttrNoFrame, true) + } + + if !p.From.Sym.NoFrame() { + // If there is a stack frame at all, it includes + // space to save the LR. + autosize += int32(c.ctxt.Arch.FixedFrameSize) + } + + if p.Mark&LEAF != 0 && autosize < objabi.StackSmall { + // A leaf function with a small stack can be marked + // NOSPLIT, avoiding a stack check. + p.From.Sym.Set(obj.AttrNoSplit, true) + } + + p.To.Offset = int64(autosize) + + q = p + + if c.ctxt.Flag_shared && c.cursym.Name != "runtime.duffzero" && c.cursym.Name != "runtime.duffcopy" { + // When compiling Go into PIC, all functions must start + // with instructions to load the TOC pointer into r2: + // + // addis r2, r12, .TOC.-func@ha + // addi r2, r2, .TOC.-func@l+4 + // + // We could probably skip this prologue in some situations + // but it's a bit subtle. However, it is both safe and + // necessary to leave the prologue off duffzero and + // duffcopy as we rely on being able to jump to a specific + // instruction offset for them. + // + // These are AWORDS because there is no (afaict) way to + // generate the addis instruction except as part of the + // load of a large constant, and in that case there is no + // way to use r12 as the source. + // + // Note that the same condition is tested in + // putelfsym in cmd/link/internal/ld/symtab.go + // where we set the st_other field to indicate + // the presence of these instructions. + q = obj.Appendp(q, c.newprog) + q.As = AWORD + q.Pos = p.Pos + q.From.Type = obj.TYPE_CONST + q.From.Offset = 0x3c4c0000 + q = obj.Appendp(q, c.newprog) + q.As = AWORD + q.Pos = p.Pos + q.From.Type = obj.TYPE_CONST + q.From.Offset = 0x38420000 + rel := obj.Addrel(c.cursym) + rel.Off = 0 + rel.Siz = 8 + rel.Sym = c.ctxt.Lookup(".TOC.") + rel.Type = objabi.R_ADDRPOWER_PCREL + } + + if !c.cursym.Func().Text.From.Sym.NoSplit() { + q = c.stacksplit(q, autosize) // emit split check + } + + // Special handling of the racecall thunk. Assume that its asm code will + // save the link register and update the stack, since that code is + // called directly from C/C++ and can't clobber REGTMP (R31). + if autosize != 0 && c.cursym.Name != "runtime.racecallbackthunk" { + var prologueEnd *obj.Prog + // Save the link register and update the SP. MOVDU is used unless + // the frame size is too large. The link register must be saved + // even for non-empty leaf functions so that traceback works. + if autosize >= -BIG && autosize <= BIG { + // Use MOVDU to adjust R1 when saving R31, if autosize is small. + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_LR + q.To.Type = obj.TYPE_REG + q.To.Reg = REGTMP + prologueEnd = q + + q = obj.Appendp(q, c.newprog) + q.As = AMOVDU + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REGTMP + q.To.Type = obj.TYPE_MEM + q.To.Offset = int64(-autosize) + q.To.Reg = REGSP + q.Spadj = autosize + } else { + // Frame size is too large for a MOVDU instruction. + // Store link register before decrementing SP, so if a signal comes + // during the execution of the function prologue, the traceback + // code will not see a half-updated stack frame. + // This sequence is not async preemptible, as if we open a frame + // at the current SP, it will clobber the saved LR. + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_LR + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R29 // REGTMP may be used to synthesize large offset in the next instruction + + q = c.ctxt.StartUnsafePoint(q, c.newprog) + + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R29 + q.To.Type = obj.TYPE_MEM + q.To.Offset = int64(-autosize) + q.To.Reg = REGSP + + prologueEnd = q + + q = obj.Appendp(q, c.newprog) + q.As = AADD + q.Pos = p.Pos + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(-autosize) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = +autosize + + q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) + } + prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd) + } else if c.cursym.Func().Text.Mark&LEAF == 0 { + // A very few functions that do not return to their caller + // (e.g. gogo) are not identified as leaves but still have + // no frame. + c.cursym.Func().Text.Mark |= LEAF + } + + if c.cursym.Func().Text.Mark&LEAF != 0 { + c.cursym.Set(obj.AttrLeaf, true) + break + } + + if c.ctxt.Flag_shared { + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R2 + q.To.Type = obj.TYPE_MEM + q.To.Reg = REGSP + q.To.Offset = 24 + } + + if c.cursym.Func().Text.From.Sym.Wrapper() { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOVD g_panic(g), R3 + // CMP R0, R3 + // BEQ end + // MOVD panic_argp(R3), R4 + // ADD $(autosize+8), R1, R5 + // CMP R4, R5 + // BNE end + // ADD $8, R1, R6 + // MOVD R6, panic_argp(R3) + // end: + // NOP + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not a ppc64 NOP: it encodes to 0 instruction bytes. + + q = obj.Appendp(q, c.newprog) + + q.As = AMOVD + q.From.Type = obj.TYPE_MEM + q.From.Reg = REGG + q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R22 + + q = obj.Appendp(q, c.newprog) + q.As = ACMP + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R0 + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R22 + + q = obj.Appendp(q, c.newprog) + q.As = ABEQ + q.To.Type = obj.TYPE_BRANCH + p1 = q + + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_MEM + q.From.Reg = REG_R22 + q.From.Offset = 0 // Panic.argp + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R23 + + q = obj.Appendp(q, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(autosize) + c.ctxt.Arch.FixedFrameSize + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R24 + + q = obj.Appendp(q, c.newprog) + q.As = ACMP + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R23 + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R24 + + q = obj.Appendp(q, c.newprog) + q.As = ABNE + q.To.Type = obj.TYPE_BRANCH + p2 = q + + q = obj.Appendp(q, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = c.ctxt.Arch.FixedFrameSize + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R25 + + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R25 + q.To.Type = obj.TYPE_MEM + q.To.Reg = REG_R22 + q.To.Offset = 0 // Panic.argp + + q = obj.Appendp(q, c.newprog) + + q.As = obj.ANOP + p1.To.SetTarget(q) + p2.To.SetTarget(q) + } + + case obj.ARET: + if p.From.Type == obj.TYPE_CONST { + c.ctxt.Diag("using BECOME (%v) is not supported!", p) + break + } + + retTarget := p.To.Sym + + if c.cursym.Func().Text.Mark&LEAF != 0 { + if autosize == 0 || c.cursym.Name == "runtime.racecallbackthunk" { + p.As = ABR + p.From = obj.Addr{} + if retTarget == nil { + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_LR + } else { + p.To.Type = obj.TYPE_BRANCH + p.To.Sym = retTarget + } + p.Mark |= BRANCH + break + } + + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(autosize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = -autosize + + q = c.newprog() + q.As = ABR + q.Pos = p.Pos + if retTarget == nil { + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_LR + } else { + q.To.Type = obj.TYPE_BRANCH + q.To.Sym = retTarget + } + q.Mark |= BRANCH + q.Spadj = +autosize + + q.Link = p.Link + p.Link = q + break + } + + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Offset = 0 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + + q = c.newprog() + q.As = AMOVD + q.Pos = p.Pos + q.From.Type = obj.TYPE_REG + q.From.Reg = REGTMP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_LR + + q.Link = p.Link + p.Link = q + p = q + + if false { + // Debug bad returns + q = c.newprog() + + q.As = AMOVD + q.Pos = p.Pos + q.From.Type = obj.TYPE_MEM + q.From.Offset = 0 + q.From.Reg = REGTMP + q.To.Type = obj.TYPE_REG + q.To.Reg = REGTMP + + q.Link = p.Link + p.Link = q + p = q + } + prev := p + if autosize != 0 && c.cursym.Name != "runtime.racecallbackthunk" { + q = c.newprog() + q.As = AADD + q.Pos = p.Pos + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(autosize) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -autosize + + q.Link = p.Link + prev.Link = q + prev = q + } + + q1 = c.newprog() + q1.As = ABR + q1.Pos = p.Pos + if retTarget == nil { + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REG_LR + } else { + q1.To.Type = obj.TYPE_BRANCH + q1.To.Sym = retTarget + } + q1.Mark |= BRANCH + q1.Spadj = +autosize + + q1.Link = q.Link + prev.Link = q1 + case AADD: + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { + p.Spadj = int32(-p.From.Offset) + } + case AMOVDU: + if p.To.Type == obj.TYPE_MEM && p.To.Reg == REGSP { + p.Spadj = int32(-p.To.Offset) + } + if p.From.Type == obj.TYPE_MEM && p.From.Reg == REGSP { + p.Spadj = int32(-p.From.Offset) + } + case obj.AGETCALLERPC: + if cursym.Leaf() { + /* MOVD LR, Rd */ + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + } else { + /* MOVD (RSP), Rd */ + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + } + } + + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 && p.As != ACMPU { + f := c.cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + } +} + +/* +// instruction scheduling + + if(debug['Q'] == 0) + return; + + curtext = nil; + q = nil; // p - 1 + q1 = firstp; // top of block + o = 0; // count of instructions + for(p = firstp; p != nil; p = p1) { + p1 = p->link; + o++; + if(p->mark & NOSCHED){ + if(q1 != p){ + sched(q1, q); + } + for(; p != nil; p = p->link){ + if(!(p->mark & NOSCHED)) + break; + q = p; + } + p1 = p; + q1 = p; + o = 0; + continue; + } + if(p->mark & (LABEL|SYNC)) { + if(q1 != p) + sched(q1, q); + q1 = p; + o = 1; + } + if(p->mark & (BRANCH|SYNC)) { + sched(q1, p); + q1 = p1; + o = 0; + } + if(o >= NSCHED) { + sched(q1, p); + q1 = p1; + o = 0; + } + q = p; + } +*/ +func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { + if c.ctxt.Flag_maymorestack != "" { + if c.ctxt.Flag_shared || c.ctxt.Flag_dynlink { + // See the call to morestack for why these are + // complicated to support. + c.ctxt.Diag("maymorestack with -shared or -dynlink is not supported") + } + + // Spill arguments. This has to happen before we open + // any more frame space. + p = c.cursym.Func().SpillRegisterArgs(p, c.newprog) + + // Save LR and REGCTXT + frameSize := 8 + c.ctxt.Arch.FixedFrameSize + + // MOVD LR, REGTMP + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + // MOVDU REGTMP, -16(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVDU + p.From.Type = obj.TYPE_REG + p.From.Reg = REGTMP + p.To.Type = obj.TYPE_MEM + p.To.Offset = -frameSize + p.To.Reg = REGSP + p.Spadj = int32(frameSize) + + // MOVD REGCTXT, 8(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REGCTXT + p.To.Type = obj.TYPE_MEM + p.To.Offset = 8 + p.To.Reg = REGSP + + // BL maymorestack + p = obj.Appendp(p, c.newprog) + p.As = ABL + p.To.Type = obj.TYPE_BRANCH + // See ../x86/obj6.go + p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI()) + + // Restore LR and REGCTXT + + // MOVD 8(SP), REGCTXT + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Offset = 8 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGCTXT + + // MOVD 0(SP), REGTMP + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Offset = 0 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + + // MOVD REGTMP, LR + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_LR + + // ADD $16, SP + p = obj.Appendp(p, c.newprog) + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = frameSize + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = -int32(frameSize) + + // Unspill arguments. + p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog) + } + + // save entry point, but skipping the two instructions setting R2 in shared mode and maymorestack + startPred := p + + // MOVD g_stackguard(g), R22 + p = obj.Appendp(p, c.newprog) + + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0 + if c.cursym.CFunc() { + p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R22 + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + var q *obj.Prog + if framesize <= objabi.StackSmall { + // small stack: SP < stackguard + // CMP stackguard, SP + p = obj.Appendp(p, c.newprog) + + p.As = ACMPU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R22 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + } else { + // large stack: SP-framesize < stackguard-StackSmall + offset := int64(framesize) - objabi.StackSmall + if framesize > objabi.StackBig { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // CMPU SP, $(framesize-StackSmall) + // BLT label-of-call-to-morestack + if offset <= 0xffff { + p = obj.Appendp(p, c.newprog) + p.As = ACMPU + p.From.Type = obj.TYPE_REG + p.From.Reg = REGSP + p.To.Type = obj.TYPE_CONST + p.To.Offset = offset + } else { + // Constant is too big for CMPU. + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_CONST + p.From.Offset = offset + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R23 + + p = obj.Appendp(p, c.newprog) + p.As = ACMPU + p.From.Type = obj.TYPE_REG + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R23 + } + + p = obj.Appendp(p, c.newprog) + q = p + p.As = ABLT + p.To.Type = obj.TYPE_BRANCH + } + + // Check against the stack guard. We've ensured this won't underflow. + // ADD $-(framesize-StackSmall), SP, R4 + // CMPU stackguard, R4 + p = obj.Appendp(p, c.newprog) + + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = -offset + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R23 + + p = obj.Appendp(p, c.newprog) + p.As = ACMPU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R22 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R23 + } + + // q1: BLT done + p = obj.Appendp(p, c.newprog) + q1 := p + + p.As = ABLT + p.To.Type = obj.TYPE_BRANCH + + p = obj.Appendp(p, c.newprog) + p.As = obj.ANOP // zero-width place holder + + if q != nil { + q.To.SetTarget(p) + } + + // Spill the register args that could be clobbered by the + // morestack code. + + spill := c.cursym.Func().SpillRegisterArgs(p, c.newprog) + + // MOVD LR, R5 + p = obj.Appendp(spill, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R5 + + p = c.ctxt.EmitEntryStackMap(c.cursym, p, c.newprog) + + var morestacksym *obj.LSym + if c.cursym.CFunc() { + morestacksym = c.ctxt.Lookup("runtime.morestackc") + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { + morestacksym = c.ctxt.Lookup("runtime.morestack_noctxt") + } else { + morestacksym = c.ctxt.Lookup("runtime.morestack") + } + + if c.ctxt.Flag_shared { + // In PPC64 PIC code, R2 is used as TOC pointer derived from R12 + // which is the address of function entry point when entering + // the function. We need to preserve R2 across call to morestack. + // Fortunately, in shared mode, 8(SP) and 16(SP) are reserved in + // the caller's frame, but not used (0(SP) is caller's saved LR, + // 24(SP) is caller's saved R2). Use 8(SP) to save this function's R2. + // MOVD R2, 8(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R2 + p.To.Type = obj.TYPE_MEM + p.To.Reg = REGSP + p.To.Offset = 8 + } + + if c.ctxt.Flag_dynlink { + // Avoid calling morestack via a PLT when dynamically linking. The + // PLT stubs generated by the system linker on ppc64le when "std r2, + // 24(r1)" to save the TOC pointer in their callers stack + // frame. Unfortunately (and necessarily) morestack is called before + // the function that calls it sets up its frame and so the PLT ends + // up smashing the saved TOC pointer for its caller's caller. + // + // According to the ABI documentation there is a mechanism to avoid + // the TOC save that the PLT stub does (put a R_PPC64_TOCSAVE + // relocation on the nop after the call to morestack) but at the time + // of writing it is not supported at all by gold and my attempt to + // use it with ld.bfd caused an internal linker error. So this hack + // seems preferable. + + // MOVD $runtime.morestack(SB), R12 + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Sym = morestacksym + p.From.Name = obj.NAME_GOTREF + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R12 + + // MOVD R12, LR + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R12 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_LR + + // BL LR + p = obj.Appendp(p, c.newprog) + p.As = obj.ACALL + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_LR + } else { + // BL runtime.morestack(SB) + p = obj.Appendp(p, c.newprog) + + p.As = ABL + p.To.Type = obj.TYPE_BRANCH + p.To.Sym = morestacksym + } + + if c.ctxt.Flag_shared { + // MOVD 8(SP), R2 + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + p.From.Offset = 8 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R2 + } + + unspill := c.cursym.Func().UnspillRegisterArgs(p, c.newprog) + p = c.ctxt.EndUnsafePoint(unspill, c.newprog, -1) + + // BR start + p = obj.Appendp(p, c.newprog) + p.As = ABR + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(startPred.Link) + + // placeholder for q1's jump target + p = obj.Appendp(p, c.newprog) + + p.As = obj.ANOP // zero-width place holder + q1.To.SetTarget(p) + + return p +} + +// MMA accumulator to/from instructions are slightly ambiguous since +// the argument represents both source and destination, specified as +// an accumulator. It is treated as a unary destination to simplify +// the code generation in ppc64map. +var unaryDst = map[obj.As]bool{ + AXXSETACCZ: true, + AXXMTACC: true, + AXXMFACC: true, +} + +var Linkppc64 = obj.LinkArch{ + Arch: sys.ArchPPC64, + Init: buildop, + Preprocess: preprocess, + Assemble: span9, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: PPC64DWARFRegisters, +} + +var Linkppc64le = obj.LinkArch{ + Arch: sys.ArchPPC64LE, + Init: buildop, + Preprocess: preprocess, + Assemble: span9, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: PPC64DWARFRegisters, +} diff --git a/src/cmd/internal/obj/riscv/anames.go b/src/cmd/internal/obj/riscv/anames.go new file mode 100644 index 0000000..d2a3674 --- /dev/null +++ b/src/cmd/internal/obj/riscv/anames.go @@ -0,0 +1,260 @@ +// Code generated by stringer -i cpu.go -o anames.go -p riscv; DO NOT EDIT. + +package riscv + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "ADDI", + "SLTI", + "SLTIU", + "ANDI", + "ORI", + "XORI", + "SLLI", + "SRLI", + "SRAI", + "LUI", + "AUIPC", + "ADD", + "SLT", + "SLTU", + "AND", + "OR", + "XOR", + "SLL", + "SRL", + "SUB", + "SRA", + "SLLIRV32", + "SRLIRV32", + "SRAIRV32", + "JAL", + "JALR", + "BEQ", + "BNE", + "BLT", + "BLTU", + "BGE", + "BGEU", + "LW", + "LWU", + "LH", + "LHU", + "LB", + "LBU", + "SW", + "SH", + "SB", + "FENCE", + "FENCEI", + "FENCETSO", + "ADDIW", + "SLLIW", + "SRLIW", + "SRAIW", + "ADDW", + "SLLW", + "SRLW", + "SUBW", + "SRAW", + "LD", + "SD", + "MUL", + "MULH", + "MULHU", + "MULHSU", + "MULW", + "DIV", + "DIVU", + "REM", + "REMU", + "DIVW", + "DIVUW", + "REMW", + "REMUW", + "LRD", + "SCD", + "LRW", + "SCW", + "AMOSWAPD", + "AMOADDD", + "AMOANDD", + "AMOORD", + "AMOXORD", + "AMOMAXD", + "AMOMAXUD", + "AMOMIND", + "AMOMINUD", + "AMOSWAPW", + "AMOADDW", + "AMOANDW", + "AMOORW", + "AMOXORW", + "AMOMAXW", + "AMOMAXUW", + "AMOMINW", + "AMOMINUW", + "RDCYCLE", + "RDCYCLEH", + "RDTIME", + "RDTIMEH", + "RDINSTRET", + "RDINSTRETH", + "FRCSR", + "FSCSR", + "FRRM", + "FSRM", + "FRFLAGS", + "FSFLAGS", + "FSRMI", + "FSFLAGSI", + "FLW", + "FSW", + "FADDS", + "FSUBS", + "FMULS", + "FDIVS", + "FMINS", + "FMAXS", + "FSQRTS", + "FMADDS", + "FMSUBS", + "FNMADDS", + "FNMSUBS", + "FCVTWS", + "FCVTLS", + "FCVTSW", + "FCVTSL", + "FCVTWUS", + "FCVTLUS", + "FCVTSWU", + "FCVTSLU", + "FSGNJS", + "FSGNJNS", + "FSGNJXS", + "FMVXS", + "FMVSX", + "FMVXW", + "FMVWX", + "FEQS", + "FLTS", + "FLES", + "FCLASSS", + "FLD", + "FSD", + "FADDD", + "FSUBD", + "FMULD", + "FDIVD", + "FMIND", + "FMAXD", + "FSQRTD", + "FMADDD", + "FMSUBD", + "FNMADDD", + "FNMSUBD", + "FCVTWD", + "FCVTLD", + "FCVTDW", + "FCVTDL", + "FCVTWUD", + "FCVTLUD", + "FCVTDWU", + "FCVTDLU", + "FCVTSD", + "FCVTDS", + "FSGNJD", + "FSGNJND", + "FSGNJXD", + "FMVXD", + "FMVDX", + "FEQD", + "FLTD", + "FLED", + "FCLASSD", + "FLQ", + "FSQ", + "FADDQ", + "FSUBQ", + "FMULQ", + "FDIVQ", + "FMINQ", + "FMAXQ", + "FSQRTQ", + "FMADDQ", + "FMSUBQ", + "FNMADDQ", + "FNMSUBQ", + "FCVTWQ", + "FCVTLQ", + "FCVTSQ", + "FCVTDQ", + "FCVTQW", + "FCVTQL", + "FCVTQS", + "FCVTQD", + "FCVTWUQ", + "FCVTLUQ", + "FCVTQWU", + "FCVTQLU", + "FSGNJQ", + "FSGNJNQ", + "FSGNJXQ", + "FMVXQ", + "FMVQX", + "FEQQ", + "FLEQ", + "FLTQ", + "FCLASSQ", + "CSRRW", + "CSRRS", + "CSRRC", + "CSRRWI", + "CSRRSI", + "CSRRCI", + "ECALL", + "SCALL", + "EBREAK", + "SBREAK", + "MRET", + "SRET", + "URET", + "DRET", + "WFI", + "SFENCEVMA", + "HFENCEGVMA", + "HFENCEVVMA", + "WORD", + "BEQZ", + "BGEZ", + "BGT", + "BGTU", + "BGTZ", + "BLE", + "BLEU", + "BLEZ", + "BLTZ", + "BNEZ", + "FABSD", + "FABSS", + "FNEGD", + "FNEGS", + "FNED", + "FNES", + "MOV", + "MOVB", + "MOVBU", + "MOVF", + "MOVD", + "MOVH", + "MOVHU", + "MOVW", + "MOVWU", + "NEG", + "NEGW", + "NOT", + "SEQZ", + "SNEZ", + "LAST", +} diff --git a/src/cmd/internal/obj/riscv/asm_test.go b/src/cmd/internal/obj/riscv/asm_test.go new file mode 100644 index 0000000..c22428c --- /dev/null +++ b/src/cmd/internal/obj/riscv/asm_test.go @@ -0,0 +1,213 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv + +import ( + "bytes" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "runtime" + "testing" +) + +// TestLargeBranch generates a large function with a very far conditional +// branch, in order to ensure that it assembles successfully. +func TestLargeBranch(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test in short mode") + } + testenv.MustHaveGoBuild(t) + + dir, err := os.MkdirTemp("", "testlargebranch") + if err != nil { + t.Fatalf("Could not create directory: %v", err) + } + defer os.RemoveAll(dir) + + // Generate a very large function. + buf := bytes.NewBuffer(make([]byte, 0, 7000000)) + genLargeBranch(buf) + + tmpfile := filepath.Join(dir, "x.s") + if err := os.WriteFile(tmpfile, buf.Bytes(), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + + // Assemble generated file. + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Build failed: %v, output: %s", err, out) + } +} + +func genLargeBranch(buf *bytes.Buffer) { + fmt.Fprintln(buf, "TEXT f(SB),0,$0-0") + fmt.Fprintln(buf, "BEQ X0, X0, label") + for i := 0; i < 1<<19; i++ { + fmt.Fprintln(buf, "ADD $0, X0, X0") + } + fmt.Fprintln(buf, "label:") + fmt.Fprintln(buf, "ADD $0, X0, X0") +} + +// TestLargeCall generates a large function (>1MB of text) with a call to +// a following function, in order to ensure that it assembles and links +// correctly. +func TestLargeCall(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test in short mode") + } + testenv.MustHaveGoBuild(t) + + dir, err := os.MkdirTemp("", "testlargecall") + if err != nil { + t.Fatalf("could not create directory: %v", err) + } + defer os.RemoveAll(dir) + + if err := os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module largecall"), 0644); err != nil { + t.Fatalf("Failed to write file: %v\n", err) + } + main := `package main +func main() { + x() +} + +func x() +func y() +` + if err := os.WriteFile(filepath.Join(dir, "x.go"), []byte(main), 0644); err != nil { + t.Fatalf("failed to write main: %v\n", err) + } + + // Generate a very large function with call. + buf := bytes.NewBuffer(make([]byte, 0, 7000000)) + genLargeCall(buf) + + if err := os.WriteFile(filepath.Join(dir, "x.s"), buf.Bytes(), 0644); err != nil { + t.Fatalf("Failed to write file: %v\n", err) + } + + // Build generated files. + cmd := testenv.Command(t, testenv.GoToolPath(t), "build", "-ldflags=-linkmode=internal") + cmd.Dir = dir + cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Build failed: %v, output: %s", err, out) + } + + if runtime.GOARCH == "riscv64" && testenv.HasCGO() { + cmd := testenv.Command(t, testenv.GoToolPath(t), "build", "-ldflags=-linkmode=external") + cmd.Dir = dir + cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Build failed: %v, output: %s", err, out) + } + } +} + +func genLargeCall(buf *bytes.Buffer) { + fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0") + fmt.Fprintln(buf, "CALL ·y(SB)") + for i := 0; i < 1<<19; i++ { + fmt.Fprintln(buf, "ADD $0, X0, X0") + } + fmt.Fprintln(buf, "RET") + fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0") + fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "RET") +} + +// Issue 20348. +func TestNoRet(t *testing.T) { + dir, err := os.MkdirTemp("", "testnoret") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + tmpfile := filepath.Join(dir, "x.s") + if err := os.WriteFile(tmpfile, []byte("TEXT ·stub(SB),$0-0\nNOP\n"), 0644); err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux") + if out, err := cmd.CombinedOutput(); err != nil { + t.Errorf("%v\n%s", err, out) + } +} + +func TestImmediateSplitting(t *testing.T) { + dir, err := os.MkdirTemp("", "testimmsplit") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + tmpfile := filepath.Join(dir, "x.s") + asm := ` +TEXT _stub(SB),$0-0 + LB 4096(X5), X6 + LH 4096(X5), X6 + LW 4096(X5), X6 + LD 4096(X5), X6 + LBU 4096(X5), X6 + LHU 4096(X5), X6 + LWU 4096(X5), X6 + SB X6, 4096(X5) + SH X6, 4096(X5) + SW X6, 4096(X5) + SD X6, 4096(X5) + + FLW 4096(X5), F6 + FLD 4096(X5), F6 + FSW F6, 4096(X5) + FSD F6, 4096(X5) + + MOVB 4096(X5), X6 + MOVH 4096(X5), X6 + MOVW 4096(X5), X6 + MOV 4096(X5), X6 + MOVBU 4096(X5), X6 + MOVHU 4096(X5), X6 + MOVWU 4096(X5), X6 + + MOVB X6, 4096(X5) + MOVH X6, 4096(X5) + MOVW X6, 4096(X5) + MOV X6, 4096(X5) + + MOVF 4096(X5), F6 + MOVD 4096(X5), F6 + MOVF F6, 4096(X5) + MOVD F6, 4096(X5) +` + if err := os.WriteFile(tmpfile, []byte(asm), 0644); err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux") + if out, err := cmd.CombinedOutput(); err != nil { + t.Errorf("%v\n%s", err, out) + } +} + +func TestBranch(t *testing.T) { + if runtime.GOARCH != "riscv64" { + t.Skip("Requires riscv64 to run") + } + + testenv.MustHaveGoBuild(t) + + cmd := testenv.Command(t, testenv.GoToolPath(t), "test") + cmd.Dir = "testdata/testbranch" + if out, err := testenv.CleanCmdEnv(cmd).CombinedOutput(); err != nil { + t.Errorf("Branch test failed: %v\n%s", err, out) + } +} diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go new file mode 100644 index 0000000..594f8ea --- /dev/null +++ b/src/cmd/internal/obj/riscv/cpu.go @@ -0,0 +1,650 @@ +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// Portions Copyright © 2019 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package riscv + +import "cmd/internal/obj" + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p riscv + +const ( + // Base register numberings. + REG_X0 = obj.RBaseRISCV + iota + REG_X1 + REG_X2 + REG_X3 + REG_X4 + REG_X5 + REG_X6 + REG_X7 + REG_X8 + REG_X9 + REG_X10 + REG_X11 + REG_X12 + REG_X13 + REG_X14 + REG_X15 + REG_X16 + REG_X17 + REG_X18 + REG_X19 + REG_X20 + REG_X21 + REG_X22 + REG_X23 + REG_X24 + REG_X25 + REG_X26 + REG_X27 + REG_X28 + REG_X29 + REG_X30 + REG_X31 + + // FP register numberings. + REG_F0 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + REG_F16 + REG_F17 + REG_F18 + REG_F19 + REG_F20 + REG_F21 + REG_F22 + REG_F23 + REG_F24 + REG_F25 + REG_F26 + REG_F27 + REG_F28 + REG_F29 + REG_F30 + REG_F31 + + // This marks the end of the register numbering. + REG_END + + // General registers reassigned to ABI names. + REG_ZERO = REG_X0 + REG_RA = REG_X1 // aka REG_LR + REG_SP = REG_X2 + REG_GP = REG_X3 // aka REG_SB + REG_TP = REG_X4 + REG_T0 = REG_X5 + REG_T1 = REG_X6 + REG_T2 = REG_X7 + REG_S0 = REG_X8 + REG_S1 = REG_X9 + REG_A0 = REG_X10 + REG_A1 = REG_X11 + REG_A2 = REG_X12 + REG_A3 = REG_X13 + REG_A4 = REG_X14 + REG_A5 = REG_X15 + REG_A6 = REG_X16 + REG_A7 = REG_X17 + REG_S2 = REG_X18 + REG_S3 = REG_X19 + REG_S4 = REG_X20 + REG_S5 = REG_X21 + REG_S6 = REG_X22 + REG_S7 = REG_X23 + REG_S8 = REG_X24 + REG_S9 = REG_X25 + REG_S10 = REG_X26 // aka REG_CTXT + REG_S11 = REG_X27 // aka REG_G + REG_T3 = REG_X28 + REG_T4 = REG_X29 + REG_T5 = REG_X30 + REG_T6 = REG_X31 // aka REG_TMP + + // Go runtime register names. + REG_CTXT = REG_S10 // Context for closures. + REG_G = REG_S11 // G pointer. + REG_LR = REG_RA // Link register. + REG_TMP = REG_T6 // Reserved for assembler use. + + // ABI names for floating point registers. + REG_FT0 = REG_F0 + REG_FT1 = REG_F1 + REG_FT2 = REG_F2 + REG_FT3 = REG_F3 + REG_FT4 = REG_F4 + REG_FT5 = REG_F5 + REG_FT6 = REG_F6 + REG_FT7 = REG_F7 + REG_FS0 = REG_F8 + REG_FS1 = REG_F9 + REG_FA0 = REG_F10 + REG_FA1 = REG_F11 + REG_FA2 = REG_F12 + REG_FA3 = REG_F13 + REG_FA4 = REG_F14 + REG_FA5 = REG_F15 + REG_FA6 = REG_F16 + REG_FA7 = REG_F17 + REG_FS2 = REG_F18 + REG_FS3 = REG_F19 + REG_FS4 = REG_F20 + REG_FS5 = REG_F21 + REG_FS6 = REG_F22 + REG_FS7 = REG_F23 + REG_FS8 = REG_F24 + REG_FS9 = REG_F25 + REG_FS10 = REG_F26 + REG_FS11 = REG_F27 + REG_FT8 = REG_F28 + REG_FT9 = REG_F29 + REG_FT10 = REG_F30 + REG_FT11 = REG_F31 + + // Names generated by the SSA compiler. + REGSP = REG_SP + REGG = REG_G +) + +// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-dwarf.adoc#dwarf-register-numbers +var RISCV64DWARFRegisters = map[int16]int16{ + // Integer Registers. + REG_X0: 0, + REG_X1: 1, + REG_X2: 2, + REG_X3: 3, + REG_X4: 4, + REG_X5: 5, + REG_X6: 6, + REG_X7: 7, + REG_X8: 8, + REG_X9: 9, + REG_X10: 10, + REG_X11: 11, + REG_X12: 12, + REG_X13: 13, + REG_X14: 14, + REG_X15: 15, + REG_X16: 16, + REG_X17: 17, + REG_X18: 18, + REG_X19: 19, + REG_X20: 20, + REG_X21: 21, + REG_X22: 22, + REG_X23: 23, + REG_X24: 24, + REG_X25: 25, + REG_X26: 26, + REG_X27: 27, + REG_X28: 28, + REG_X29: 29, + REG_X30: 30, + REG_X31: 31, + + // Floating-Point Registers. + REG_F0: 32, + REG_F1: 33, + REG_F2: 34, + REG_F3: 35, + REG_F4: 36, + REG_F5: 37, + REG_F6: 38, + REG_F7: 39, + REG_F8: 40, + REG_F9: 41, + REG_F10: 42, + REG_F11: 43, + REG_F12: 44, + REG_F13: 45, + REG_F14: 46, + REG_F15: 47, + REG_F16: 48, + REG_F17: 49, + REG_F18: 50, + REG_F19: 51, + REG_F20: 52, + REG_F21: 53, + REG_F22: 54, + REG_F23: 55, + REG_F24: 56, + REG_F25: 57, + REG_F26: 58, + REG_F27: 59, + REG_F28: 60, + REG_F29: 61, + REG_F30: 62, + REG_F31: 63, +} + +// Prog.Mark flags. +const ( + // USES_REG_TMP indicates that a machine instruction generated from the + // corresponding *obj.Prog uses the temporary register. + USES_REG_TMP = 1 << iota + + // NEED_CALL_RELOC is set on JAL instructions to indicate that a + // R_RISCV_CALL relocation is needed. + NEED_CALL_RELOC + + // NEED_PCREL_ITYPE_RELOC is set on AUIPC instructions to indicate that + // it is the first instruction in an AUIPC + I-type pair that needs a + // R_RISCV_PCREL_ITYPE relocation. + NEED_PCREL_ITYPE_RELOC + + // NEED_PCREL_STYPE_RELOC is set on AUIPC instructions to indicate that + // it is the first instruction in an AUIPC + S-type pair that needs a + // R_RISCV_PCREL_STYPE relocation. + NEED_PCREL_STYPE_RELOC +) + +// RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files +// at https://github.com/riscv/riscv-opcodes. +// +// As well as some pseudo-mnemonics (e.g. MOV) used only in the assembler. +// +// See also "The RISC-V Instruction Set Manual" at https://riscv.org/specifications/. +// +// If you modify this table, you MUST run 'go generate' to regenerate anames.go! +const ( + // Unprivileged ISA (Document Version 20190608-Base-Ratified) + + // 2.4: Integer Computational Instructions + AADDI = obj.ABaseRISCV + obj.A_ARCHSPECIFIC + iota + ASLTI + ASLTIU + AANDI + AORI + AXORI + ASLLI + ASRLI + ASRAI + ALUI + AAUIPC + AADD + ASLT + ASLTU + AAND + AOR + AXOR + ASLL + ASRL + ASUB + ASRA + + // The SLL/SRL/SRA instructions differ slightly between RV32 and RV64, + // hence there are pseudo-opcodes for the RV32 specific versions. + ASLLIRV32 + ASRLIRV32 + ASRAIRV32 + + // 2.5: Control Transfer Instructions + AJAL + AJALR + ABEQ + ABNE + ABLT + ABLTU + ABGE + ABGEU + + // 2.6: Load and Store Instructions + ALW + ALWU + ALH + ALHU + ALB + ALBU + ASW + ASH + ASB + + // 2.7: Memory Ordering Instructions + AFENCE + AFENCEI + AFENCETSO + + // 5.2: Integer Computational Instructions (RV64I) + AADDIW + ASLLIW + ASRLIW + ASRAIW + AADDW + ASLLW + ASRLW + ASUBW + ASRAW + + // 5.3: Load and Store Instructions (RV64I) + ALD + ASD + + // 7.1: Multiplication Operations + AMUL + AMULH + AMULHU + AMULHSU + AMULW + ADIV + ADIVU + AREM + AREMU + ADIVW + ADIVUW + AREMW + AREMUW + + // 8.2: Load-Reserved/Store-Conditional Instructions + ALRD + ASCD + ALRW + ASCW + + // 8.3: Atomic Memory Operations + AAMOSWAPD + AAMOADDD + AAMOANDD + AAMOORD + AAMOXORD + AAMOMAXD + AAMOMAXUD + AAMOMIND + AAMOMINUD + AAMOSWAPW + AAMOADDW + AAMOANDW + AAMOORW + AAMOXORW + AAMOMAXW + AAMOMAXUW + AAMOMINW + AAMOMINUW + + // 10.1: Base Counters and Timers + ARDCYCLE + ARDCYCLEH + ARDTIME + ARDTIMEH + ARDINSTRET + ARDINSTRETH + + // 11.2: Floating-Point Control and Status Register + AFRCSR + AFSCSR + AFRRM + AFSRM + AFRFLAGS + AFSFLAGS + AFSRMI + AFSFLAGSI + + // 11.5: Single-Precision Load and Store Instructions + AFLW + AFSW + + // 11.6: Single-Precision Floating-Point Computational Instructions + AFADDS + AFSUBS + AFMULS + AFDIVS + AFMINS + AFMAXS + AFSQRTS + AFMADDS + AFMSUBS + AFNMADDS + AFNMSUBS + + // 11.7: Single-Precision Floating-Point Conversion and Move Instructions + AFCVTWS + AFCVTLS + AFCVTSW + AFCVTSL + AFCVTWUS + AFCVTLUS + AFCVTSWU + AFCVTSLU + AFSGNJS + AFSGNJNS + AFSGNJXS + AFMVXS + AFMVSX + AFMVXW + AFMVWX + + // 11.8: Single-Precision Floating-Point Compare Instructions + AFEQS + AFLTS + AFLES + + // 11.9: Single-Precision Floating-Point Classify Instruction + AFCLASSS + + // 12.3: Double-Precision Load and Store Instructions + AFLD + AFSD + + // 12.4: Double-Precision Floating-Point Computational Instructions + AFADDD + AFSUBD + AFMULD + AFDIVD + AFMIND + AFMAXD + AFSQRTD + AFMADDD + AFMSUBD + AFNMADDD + AFNMSUBD + + // 12.5: Double-Precision Floating-Point Conversion and Move Instructions + AFCVTWD + AFCVTLD + AFCVTDW + AFCVTDL + AFCVTWUD + AFCVTLUD + AFCVTDWU + AFCVTDLU + AFCVTSD + AFCVTDS + AFSGNJD + AFSGNJND + AFSGNJXD + AFMVXD + AFMVDX + + // 12.6: Double-Precision Floating-Point Compare Instructions + AFEQD + AFLTD + AFLED + + // 12.7: Double-Precision Floating-Point Classify Instruction + AFCLASSD + + // 13.1 Quad-Precision Load and Store Instructions + AFLQ + AFSQ + + // 13.2: Quad-Precision Computational Instructions + AFADDQ + AFSUBQ + AFMULQ + AFDIVQ + AFMINQ + AFMAXQ + AFSQRTQ + AFMADDQ + AFMSUBQ + AFNMADDQ + AFNMSUBQ + + // 13.3 Quad-Precision Convert and Move Instructions + AFCVTWQ + AFCVTLQ + AFCVTSQ + AFCVTDQ + AFCVTQW + AFCVTQL + AFCVTQS + AFCVTQD + AFCVTWUQ + AFCVTLUQ + AFCVTQWU + AFCVTQLU + AFSGNJQ + AFSGNJNQ + AFSGNJXQ + AFMVXQ + AFMVQX + + // 13.4 Quad-Precision Floating-Point Compare Instructions + AFEQQ + AFLEQ + AFLTQ + + // 13.5 Quad-Precision Floating-Point Classify Instruction + AFCLASSQ + + // Privileged ISA (Version 20190608-Priv-MSU-Ratified) + + // 3.1.9: Instructions to Access CSRs + ACSRRW + ACSRRS + ACSRRC + ACSRRWI + ACSRRSI + ACSRRCI + + // 3.2.1: Environment Call and Breakpoint + AECALL + ASCALL + AEBREAK + ASBREAK + + // 3.2.2: Trap-Return Instructions + AMRET + ASRET + AURET + ADRET + + // 3.2.3: Wait for Interrupt + AWFI + + // 4.2.1: Supervisor Memory-Management Fence Instruction + ASFENCEVMA + + // Hypervisor Memory-Management Instructions + AHFENCEGVMA + AHFENCEVVMA + + // The escape hatch. Inserts a single 32-bit word. + AWORD + + // Pseudo-instructions. These get translated by the assembler into other + // instructions, based on their operands. + ABEQZ + ABGEZ + ABGT + ABGTU + ABGTZ + ABLE + ABLEU + ABLEZ + ABLTZ + ABNEZ + AFABSD + AFABSS + AFNEGD + AFNEGS + AFNED + AFNES + AMOV + AMOVB + AMOVBU + AMOVF + AMOVD + AMOVH + AMOVHU + AMOVW + AMOVWU + ANEG + ANEGW + ANOT + ASEQZ + ASNEZ + + // End marker + ALAST +) + +// All unary instructions which write to their arguments (as opposed to reading +// from them) go here. The assembly parser uses this information to populate +// its AST in a semantically reasonable way. +// +// Any instructions not listed here are assumed to either be non-unary or to read +// from its argument. +var unaryDst = map[obj.As]bool{ + ARDCYCLE: true, + ARDCYCLEH: true, + ARDTIME: true, + ARDTIMEH: true, + ARDINSTRET: true, + ARDINSTRETH: true, +} + +// Instruction encoding masks. +const ( + // JTypeImmMask is a mask including only the immediate portion of + // J-type instructions. + JTypeImmMask = 0xfffff000 + + // ITypeImmMask is a mask including only the immediate portion of + // I-type instructions. + ITypeImmMask = 0xfff00000 + + // STypeImmMask is a mask including only the immediate portion of + // S-type instructions. + STypeImmMask = 0xfe000f80 + + // UTypeImmMask is a mask including only the immediate portion of + // U-type instructions. + UTypeImmMask = 0xfffff000 +) diff --git a/src/cmd/internal/obj/riscv/inst.go b/src/cmd/internal/obj/riscv/inst.go new file mode 100644 index 0000000..891199e --- /dev/null +++ b/src/cmd/internal/obj/riscv/inst.go @@ -0,0 +1,459 @@ +// Code generated by parse_opcodes -go; DO NOT EDIT. + +package riscv + +import "cmd/internal/obj" + +type inst struct { + opcode uint32 + funct3 uint32 + rs2 uint32 + csr int64 + funct7 uint32 +} + +func encode(a obj.As) *inst { + switch a { + case ABEQ: + return &inst{0x63, 0x0, 0x0, 0, 0x0} + case ABNE: + return &inst{0x63, 0x1, 0x0, 0, 0x0} + case ABLT: + return &inst{0x63, 0x4, 0x0, 0, 0x0} + case ABGE: + return &inst{0x63, 0x5, 0x0, 0, 0x0} + case ABLTU: + return &inst{0x63, 0x6, 0x0, 0, 0x0} + case ABGEU: + return &inst{0x63, 0x7, 0x0, 0, 0x0} + case AJALR: + return &inst{0x67, 0x0, 0x0, 0, 0x0} + case AJAL: + return &inst{0x6f, 0x0, 0x0, 0, 0x0} + case ALUI: + return &inst{0x37, 0x0, 0x0, 0, 0x0} + case AAUIPC: + return &inst{0x17, 0x0, 0x0, 0, 0x0} + case AADDI: + return &inst{0x13, 0x0, 0x0, 0, 0x0} + case ASLLI: + return &inst{0x13, 0x1, 0x0, 0, 0x0} + case ASLTI: + return &inst{0x13, 0x2, 0x0, 0, 0x0} + case ASLTIU: + return &inst{0x13, 0x3, 0x0, 0, 0x0} + case AXORI: + return &inst{0x13, 0x4, 0x0, 0, 0x0} + case ASRLI: + return &inst{0x13, 0x5, 0x0, 0, 0x0} + case ASRAI: + return &inst{0x13, 0x5, 0x0, 1024, 0x20} + case AORI: + return &inst{0x13, 0x6, 0x0, 0, 0x0} + case AANDI: + return &inst{0x13, 0x7, 0x0, 0, 0x0} + case AADD: + return &inst{0x33, 0x0, 0x0, 0, 0x0} + case ASUB: + return &inst{0x33, 0x0, 0x0, 1024, 0x20} + case ASLL: + return &inst{0x33, 0x1, 0x0, 0, 0x0} + case ASLT: + return &inst{0x33, 0x2, 0x0, 0, 0x0} + case ASLTU: + return &inst{0x33, 0x3, 0x0, 0, 0x0} + case AXOR: + return &inst{0x33, 0x4, 0x0, 0, 0x0} + case ASRL: + return &inst{0x33, 0x5, 0x0, 0, 0x0} + case ASRA: + return &inst{0x33, 0x5, 0x0, 1024, 0x20} + case AOR: + return &inst{0x33, 0x6, 0x0, 0, 0x0} + case AAND: + return &inst{0x33, 0x7, 0x0, 0, 0x0} + case AADDIW: + return &inst{0x1b, 0x0, 0x0, 0, 0x0} + case ASLLIW: + return &inst{0x1b, 0x1, 0x0, 0, 0x0} + case ASRLIW: + return &inst{0x1b, 0x5, 0x0, 0, 0x0} + case ASRAIW: + return &inst{0x1b, 0x5, 0x0, 1024, 0x20} + case AADDW: + return &inst{0x3b, 0x0, 0x0, 0, 0x0} + case ASUBW: + return &inst{0x3b, 0x0, 0x0, 1024, 0x20} + case ASLLW: + return &inst{0x3b, 0x1, 0x0, 0, 0x0} + case ASRLW: + return &inst{0x3b, 0x5, 0x0, 0, 0x0} + case ASRAW: + return &inst{0x3b, 0x5, 0x0, 1024, 0x20} + case ALB: + return &inst{0x3, 0x0, 0x0, 0, 0x0} + case ALH: + return &inst{0x3, 0x1, 0x0, 0, 0x0} + case ALW: + return &inst{0x3, 0x2, 0x0, 0, 0x0} + case ALD: + return &inst{0x3, 0x3, 0x0, 0, 0x0} + case ALBU: + return &inst{0x3, 0x4, 0x0, 0, 0x0} + case ALHU: + return &inst{0x3, 0x5, 0x0, 0, 0x0} + case ALWU: + return &inst{0x3, 0x6, 0x0, 0, 0x0} + case ASB: + return &inst{0x23, 0x0, 0x0, 0, 0x0} + case ASH: + return &inst{0x23, 0x1, 0x0, 0, 0x0} + case ASW: + return &inst{0x23, 0x2, 0x0, 0, 0x0} + case ASD: + return &inst{0x23, 0x3, 0x0, 0, 0x0} + case AFENCE: + return &inst{0xf, 0x0, 0x0, 0, 0x0} + case AFENCEI: + return &inst{0xf, 0x1, 0x0, 0, 0x0} + case AMUL: + return &inst{0x33, 0x0, 0x0, 32, 0x1} + case AMULH: + return &inst{0x33, 0x1, 0x0, 32, 0x1} + case AMULHSU: + return &inst{0x33, 0x2, 0x0, 32, 0x1} + case AMULHU: + return &inst{0x33, 0x3, 0x0, 32, 0x1} + case ADIV: + return &inst{0x33, 0x4, 0x0, 32, 0x1} + case ADIVU: + return &inst{0x33, 0x5, 0x0, 32, 0x1} + case AREM: + return &inst{0x33, 0x6, 0x0, 32, 0x1} + case AREMU: + return &inst{0x33, 0x7, 0x0, 32, 0x1} + case AMULW: + return &inst{0x3b, 0x0, 0x0, 32, 0x1} + case ADIVW: + return &inst{0x3b, 0x4, 0x0, 32, 0x1} + case ADIVUW: + return &inst{0x3b, 0x5, 0x0, 32, 0x1} + case AREMW: + return &inst{0x3b, 0x6, 0x0, 32, 0x1} + case AREMUW: + return &inst{0x3b, 0x7, 0x0, 32, 0x1} + case AAMOADDW: + return &inst{0x2f, 0x2, 0x0, 0, 0x0} + case AAMOXORW: + return &inst{0x2f, 0x2, 0x0, 512, 0x10} + case AAMOORW: + return &inst{0x2f, 0x2, 0x0, 1024, 0x20} + case AAMOANDW: + return &inst{0x2f, 0x2, 0x0, 1536, 0x30} + case AAMOMINW: + return &inst{0x2f, 0x2, 0x0, -2048, 0x40} + case AAMOMAXW: + return &inst{0x2f, 0x2, 0x0, -1536, 0x50} + case AAMOMINUW: + return &inst{0x2f, 0x2, 0x0, -1024, 0x60} + case AAMOMAXUW: + return &inst{0x2f, 0x2, 0x0, -512, 0x70} + case AAMOSWAPW: + return &inst{0x2f, 0x2, 0x0, 128, 0x4} + case ALRW: + return &inst{0x2f, 0x2, 0x0, 256, 0x8} + case ASCW: + return &inst{0x2f, 0x2, 0x0, 384, 0xc} + case AAMOADDD: + return &inst{0x2f, 0x3, 0x0, 0, 0x0} + case AAMOXORD: + return &inst{0x2f, 0x3, 0x0, 512, 0x10} + case AAMOORD: + return &inst{0x2f, 0x3, 0x0, 1024, 0x20} + case AAMOANDD: + return &inst{0x2f, 0x3, 0x0, 1536, 0x30} + case AAMOMIND: + return &inst{0x2f, 0x3, 0x0, -2048, 0x40} + case AAMOMAXD: + return &inst{0x2f, 0x3, 0x0, -1536, 0x50} + case AAMOMINUD: + return &inst{0x2f, 0x3, 0x0, -1024, 0x60} + case AAMOMAXUD: + return &inst{0x2f, 0x3, 0x0, -512, 0x70} + case AAMOSWAPD: + return &inst{0x2f, 0x3, 0x0, 128, 0x4} + case ALRD: + return &inst{0x2f, 0x3, 0x0, 256, 0x8} + case ASCD: + return &inst{0x2f, 0x3, 0x0, 384, 0xc} + case AECALL: + return &inst{0x73, 0x0, 0x0, 0, 0x0} + case AEBREAK: + return &inst{0x73, 0x0, 0x1, 1, 0x0} + case AURET: + return &inst{0x73, 0x0, 0x2, 2, 0x0} + case ASRET: + return &inst{0x73, 0x0, 0x2, 258, 0x8} + case AMRET: + return &inst{0x73, 0x0, 0x2, 770, 0x18} + case ADRET: + return &inst{0x73, 0x0, 0x12, 1970, 0x3d} + case ASFENCEVMA: + return &inst{0x73, 0x0, 0x0, 288, 0x9} + case AWFI: + return &inst{0x73, 0x0, 0x5, 261, 0x8} + case ACSRRW: + return &inst{0x73, 0x1, 0x0, 0, 0x0} + case ACSRRS: + return &inst{0x73, 0x2, 0x0, 0, 0x0} + case ACSRRC: + return &inst{0x73, 0x3, 0x0, 0, 0x0} + case ACSRRWI: + return &inst{0x73, 0x5, 0x0, 0, 0x0} + case ACSRRSI: + return &inst{0x73, 0x6, 0x0, 0, 0x0} + case ACSRRCI: + return &inst{0x73, 0x7, 0x0, 0, 0x0} + case AHFENCEVVMA: + return &inst{0x73, 0x0, 0x0, 544, 0x11} + case AHFENCEGVMA: + return &inst{0x73, 0x0, 0x0, 1568, 0x31} + case AFADDS: + return &inst{0x53, 0x0, 0x0, 0, 0x0} + case AFSUBS: + return &inst{0x53, 0x0, 0x0, 128, 0x4} + case AFMULS: + return &inst{0x53, 0x0, 0x0, 256, 0x8} + case AFDIVS: + return &inst{0x53, 0x0, 0x0, 384, 0xc} + case AFSGNJS: + return &inst{0x53, 0x0, 0x0, 512, 0x10} + case AFSGNJNS: + return &inst{0x53, 0x1, 0x0, 512, 0x10} + case AFSGNJXS: + return &inst{0x53, 0x2, 0x0, 512, 0x10} + case AFMINS: + return &inst{0x53, 0x0, 0x0, 640, 0x14} + case AFMAXS: + return &inst{0x53, 0x1, 0x0, 640, 0x14} + case AFSQRTS: + return &inst{0x53, 0x0, 0x0, 1408, 0x2c} + case AFADDD: + return &inst{0x53, 0x0, 0x0, 32, 0x1} + case AFSUBD: + return &inst{0x53, 0x0, 0x0, 160, 0x5} + case AFMULD: + return &inst{0x53, 0x0, 0x0, 288, 0x9} + case AFDIVD: + return &inst{0x53, 0x0, 0x0, 416, 0xd} + case AFSGNJD: + return &inst{0x53, 0x0, 0x0, 544, 0x11} + case AFSGNJND: + return &inst{0x53, 0x1, 0x0, 544, 0x11} + case AFSGNJXD: + return &inst{0x53, 0x2, 0x0, 544, 0x11} + case AFMIND: + return &inst{0x53, 0x0, 0x0, 672, 0x15} + case AFMAXD: + return &inst{0x53, 0x1, 0x0, 672, 0x15} + case AFCVTSD: + return &inst{0x53, 0x0, 0x1, 1025, 0x20} + case AFCVTDS: + return &inst{0x53, 0x0, 0x0, 1056, 0x21} + case AFSQRTD: + return &inst{0x53, 0x0, 0x0, 1440, 0x2d} + case AFADDQ: + return &inst{0x53, 0x0, 0x0, 96, 0x3} + case AFSUBQ: + return &inst{0x53, 0x0, 0x0, 224, 0x7} + case AFMULQ: + return &inst{0x53, 0x0, 0x0, 352, 0xb} + case AFDIVQ: + return &inst{0x53, 0x0, 0x0, 480, 0xf} + case AFSGNJQ: + return &inst{0x53, 0x0, 0x0, 608, 0x13} + case AFSGNJNQ: + return &inst{0x53, 0x1, 0x0, 608, 0x13} + case AFSGNJXQ: + return &inst{0x53, 0x2, 0x0, 608, 0x13} + case AFMINQ: + return &inst{0x53, 0x0, 0x0, 736, 0x17} + case AFMAXQ: + return &inst{0x53, 0x1, 0x0, 736, 0x17} + case AFCVTSQ: + return &inst{0x53, 0x0, 0x3, 1027, 0x20} + case AFCVTQS: + return &inst{0x53, 0x0, 0x0, 1120, 0x23} + case AFCVTDQ: + return &inst{0x53, 0x0, 0x3, 1059, 0x21} + case AFCVTQD: + return &inst{0x53, 0x0, 0x1, 1121, 0x23} + case AFSQRTQ: + return &inst{0x53, 0x0, 0x0, 1504, 0x2f} + case AFLES: + return &inst{0x53, 0x0, 0x0, -1536, 0x50} + case AFLTS: + return &inst{0x53, 0x1, 0x0, -1536, 0x50} + case AFEQS: + return &inst{0x53, 0x2, 0x0, -1536, 0x50} + case AFLED: + return &inst{0x53, 0x0, 0x0, -1504, 0x51} + case AFLTD: + return &inst{0x53, 0x1, 0x0, -1504, 0x51} + case AFEQD: + return &inst{0x53, 0x2, 0x0, -1504, 0x51} + case AFLEQ: + return &inst{0x53, 0x0, 0x0, -1440, 0x53} + case AFLTQ: + return &inst{0x53, 0x1, 0x0, -1440, 0x53} + case AFEQQ: + return &inst{0x53, 0x2, 0x0, -1440, 0x53} + case AFCVTWS: + return &inst{0x53, 0x0, 0x0, -1024, 0x60} + case AFCVTWUS: + return &inst{0x53, 0x0, 0x1, -1023, 0x60} + case AFCVTLS: + return &inst{0x53, 0x0, 0x2, -1022, 0x60} + case AFCVTLUS: + return &inst{0x53, 0x0, 0x3, -1021, 0x60} + case AFMVXW: + return &inst{0x53, 0x0, 0x0, -512, 0x70} + case AFCLASSS: + return &inst{0x53, 0x1, 0x0, -512, 0x70} + case AFCVTWD: + return &inst{0x53, 0x0, 0x0, -992, 0x61} + case AFCVTWUD: + return &inst{0x53, 0x0, 0x1, -991, 0x61} + case AFCVTLD: + return &inst{0x53, 0x0, 0x2, -990, 0x61} + case AFCVTLUD: + return &inst{0x53, 0x0, 0x3, -989, 0x61} + case AFMVXD: + return &inst{0x53, 0x0, 0x0, -480, 0x71} + case AFCLASSD: + return &inst{0x53, 0x1, 0x0, -480, 0x71} + case AFCVTWQ: + return &inst{0x53, 0x0, 0x0, -928, 0x63} + case AFCVTWUQ: + return &inst{0x53, 0x0, 0x1, -927, 0x63} + case AFCVTLQ: + return &inst{0x53, 0x0, 0x2, -926, 0x63} + case AFCVTLUQ: + return &inst{0x53, 0x0, 0x3, -925, 0x63} + case AFMVXQ: + return &inst{0x53, 0x0, 0x0, -416, 0x73} + case AFCLASSQ: + return &inst{0x53, 0x1, 0x0, -416, 0x73} + case AFCVTSW: + return &inst{0x53, 0x0, 0x0, -768, 0x68} + case AFCVTSWU: + return &inst{0x53, 0x0, 0x1, -767, 0x68} + case AFCVTSL: + return &inst{0x53, 0x0, 0x2, -766, 0x68} + case AFCVTSLU: + return &inst{0x53, 0x0, 0x3, -765, 0x68} + case AFMVWX: + return &inst{0x53, 0x0, 0x0, -256, 0x78} + case AFCVTDW: + return &inst{0x53, 0x0, 0x0, -736, 0x69} + case AFCVTDWU: + return &inst{0x53, 0x0, 0x1, -735, 0x69} + case AFCVTDL: + return &inst{0x53, 0x0, 0x2, -734, 0x69} + case AFCVTDLU: + return &inst{0x53, 0x0, 0x3, -733, 0x69} + case AFMVDX: + return &inst{0x53, 0x0, 0x0, -224, 0x79} + case AFCVTQW: + return &inst{0x53, 0x0, 0x0, -672, 0x6b} + case AFCVTQWU: + return &inst{0x53, 0x0, 0x1, -671, 0x6b} + case AFCVTQL: + return &inst{0x53, 0x0, 0x2, -670, 0x6b} + case AFCVTQLU: + return &inst{0x53, 0x0, 0x3, -669, 0x6b} + case AFMVQX: + return &inst{0x53, 0x0, 0x0, -160, 0x7b} + case AFLW: + return &inst{0x7, 0x2, 0x0, 0, 0x0} + case AFLD: + return &inst{0x7, 0x3, 0x0, 0, 0x0} + case AFLQ: + return &inst{0x7, 0x4, 0x0, 0, 0x0} + case AFSW: + return &inst{0x27, 0x2, 0x0, 0, 0x0} + case AFSD: + return &inst{0x27, 0x3, 0x0, 0, 0x0} + case AFSQ: + return &inst{0x27, 0x4, 0x0, 0, 0x0} + case AFMADDS: + return &inst{0x43, 0x0, 0x0, 0, 0x0} + case AFMSUBS: + return &inst{0x47, 0x0, 0x0, 0, 0x0} + case AFNMSUBS: + return &inst{0x4b, 0x0, 0x0, 0, 0x0} + case AFNMADDS: + return &inst{0x4f, 0x0, 0x0, 0, 0x0} + case AFMADDD: + return &inst{0x43, 0x0, 0x0, 32, 0x1} + case AFMSUBD: + return &inst{0x47, 0x0, 0x0, 32, 0x1} + case AFNMSUBD: + return &inst{0x4b, 0x0, 0x0, 32, 0x1} + case AFNMADDD: + return &inst{0x4f, 0x0, 0x0, 32, 0x1} + case AFMADDQ: + return &inst{0x43, 0x0, 0x0, 96, 0x3} + case AFMSUBQ: + return &inst{0x47, 0x0, 0x0, 96, 0x3} + case AFNMSUBQ: + return &inst{0x4b, 0x0, 0x0, 96, 0x3} + case AFNMADDQ: + return &inst{0x4f, 0x0, 0x0, 96, 0x3} + case ASLLIRV32: + return &inst{0x13, 0x1, 0x0, 0, 0x0} + case ASRLIRV32: + return &inst{0x13, 0x5, 0x0, 0, 0x0} + case ASRAIRV32: + return &inst{0x13, 0x5, 0x0, 1024, 0x20} + case AFRFLAGS: + return &inst{0x73, 0x2, 0x1, 1, 0x0} + case AFSFLAGS: + return &inst{0x73, 0x1, 0x1, 1, 0x0} + case AFSFLAGSI: + return &inst{0x73, 0x5, 0x1, 1, 0x0} + case AFRRM: + return &inst{0x73, 0x2, 0x2, 2, 0x0} + case AFSRM: + return &inst{0x73, 0x1, 0x2, 2, 0x0} + case AFSRMI: + return &inst{0x73, 0x5, 0x2, 2, 0x0} + case AFSCSR: + return &inst{0x73, 0x1, 0x3, 3, 0x0} + case AFRCSR: + return &inst{0x73, 0x2, 0x3, 3, 0x0} + case ARDCYCLE: + return &inst{0x73, 0x2, 0x0, -1024, 0x60} + case ARDTIME: + return &inst{0x73, 0x2, 0x1, -1023, 0x60} + case ARDINSTRET: + return &inst{0x73, 0x2, 0x2, -1022, 0x60} + case ARDCYCLEH: + return &inst{0x73, 0x2, 0x0, -896, 0x64} + case ARDTIMEH: + return &inst{0x73, 0x2, 0x1, -895, 0x64} + case ARDINSTRETH: + return &inst{0x73, 0x2, 0x2, -894, 0x64} + case ASCALL: + return &inst{0x73, 0x0, 0x0, 0, 0x0} + case ASBREAK: + return &inst{0x73, 0x0, 0x1, 1, 0x0} + case AFMVXS: + return &inst{0x53, 0x0, 0x0, -512, 0x70} + case AFMVSX: + return &inst{0x53, 0x0, 0x0, -256, 0x78} + case AFENCETSO: + return &inst{0xf, 0x0, 0x13, -1997, 0x41} + } + return nil +} diff --git a/src/cmd/internal/obj/riscv/list.go b/src/cmd/internal/obj/riscv/list.go new file mode 100644 index 0000000..de90961 --- /dev/null +++ b/src/cmd/internal/obj/riscv/list.go @@ -0,0 +1,33 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv + +import ( + "fmt" + + "cmd/internal/obj" +) + +func init() { + obj.RegisterRegister(obj.RBaseRISCV, REG_END, RegName) + obj.RegisterOpcode(obj.ABaseRISCV, Anames) +} + +func RegName(r int) string { + switch { + case r == 0: + return "NONE" + case r == REG_G: + return "g" + case r == REG_SP: + return "SP" + case REG_X0 <= r && r <= REG_X31: + return fmt.Sprintf("X%d", r-REG_X0) + case REG_F0 <= r && r <= REG_F31: + return fmt.Sprintf("F%d", r-REG_F0) + default: + return fmt.Sprintf("Rgok(%d)", r-obj.RBaseRISCV) + } +} diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go new file mode 100644 index 0000000..da322a7 --- /dev/null +++ b/src/cmd/internal/obj/riscv/obj.go @@ -0,0 +1,2248 @@ +// Copyright © 2015 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package riscv + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "fmt" + "log" +) + +func buildop(ctxt *obj.Link) {} + +func jalToSym(ctxt *obj.Link, p *obj.Prog, lr int16) { + switch p.As { + case obj.ACALL, obj.AJMP, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY: + default: + ctxt.Diag("unexpected Prog in jalToSym: %v", p) + return + } + + p.As = AJAL + p.Mark |= NEED_CALL_RELOC + p.From.Type = obj.TYPE_REG + p.From.Reg = lr + p.Reg = obj.REG_NONE +} + +// progedit is called individually for each *obj.Prog. It normalizes instruction +// formats and eliminates as many pseudo-instructions as possible. +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + + // Expand binary instructions to ternary ones. + if p.Reg == obj.REG_NONE { + switch p.As { + case AADDI, ASLTI, ASLTIU, AANDI, AORI, AXORI, ASLLI, ASRLI, ASRAI, + AADDIW, ASLLIW, ASRLIW, ASRAIW, AADDW, ASUBW, ASLLW, ASRLW, ASRAW, + AADD, AAND, AOR, AXOR, ASLL, ASRL, ASUB, ASRA, + AMUL, AMULH, AMULHU, AMULHSU, AMULW, ADIV, ADIVU, ADIVW, ADIVUW, + AREM, AREMU, AREMW, AREMUW: + p.Reg = p.To.Reg + } + } + + // Rewrite instructions with constant operands to refer to the immediate + // form of the instruction. + if p.From.Type == obj.TYPE_CONST { + switch p.As { + case AADD: + p.As = AADDI + case ASLT: + p.As = ASLTI + case ASLTU: + p.As = ASLTIU + case AAND: + p.As = AANDI + case AOR: + p.As = AORI + case AXOR: + p.As = AXORI + case ASLL: + p.As = ASLLI + case ASRL: + p.As = ASRLI + case ASRA: + p.As = ASRAI + case AADDW: + p.As = AADDIW + case ASLLW: + p.As = ASLLIW + case ASRLW: + p.As = ASRLIW + case ASRAW: + p.As = ASRAIW + } + } + + switch p.As { + case obj.AJMP: + // Turn JMP into JAL ZERO or JALR ZERO. + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_ZERO + + switch p.To.Type { + case obj.TYPE_BRANCH: + p.As = AJAL + case obj.TYPE_MEM: + switch p.To.Name { + case obj.NAME_NONE: + p.As = AJALR + case obj.NAME_EXTERN, obj.NAME_STATIC: + // Handled in preprocess. + default: + ctxt.Diag("unsupported name %d for %v", p.To.Name, p) + } + default: + panic(fmt.Sprintf("unhandled type %+v", p.To.Type)) + } + + case obj.ACALL: + switch p.To.Type { + case obj.TYPE_MEM: + // Handled in preprocess. + case obj.TYPE_REG: + p.As = AJALR + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + default: + ctxt.Diag("unknown destination type %+v in CALL: %v", p.To.Type, p) + } + + case obj.AUNDEF: + p.As = AEBREAK + + case ASCALL: + // SCALL is the old name for ECALL. + p.As = AECALL + + case ASBREAK: + // SBREAK is the old name for EBREAK. + p.As = AEBREAK + + case AMOV: + // Put >32-bit constants in memory and load them. + if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == obj.REG_NONE && int64(int32(p.From.Offset)) != p.From.Offset { + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Int64Sym(p.From.Offset) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + } +} + +// addrToReg extracts the register from an Addr, handling special Addr.Names. +func addrToReg(a obj.Addr) int16 { + switch a.Name { + case obj.NAME_PARAM, obj.NAME_AUTO: + return REG_SP + } + return a.Reg +} + +// movToLoad converts a MOV mnemonic into the corresponding load instruction. +func movToLoad(mnemonic obj.As) obj.As { + switch mnemonic { + case AMOV: + return ALD + case AMOVB: + return ALB + case AMOVH: + return ALH + case AMOVW: + return ALW + case AMOVBU: + return ALBU + case AMOVHU: + return ALHU + case AMOVWU: + return ALWU + case AMOVF: + return AFLW + case AMOVD: + return AFLD + default: + panic(fmt.Sprintf("%+v is not a MOV", mnemonic)) + } +} + +// movToStore converts a MOV mnemonic into the corresponding store instruction. +func movToStore(mnemonic obj.As) obj.As { + switch mnemonic { + case AMOV: + return ASD + case AMOVB: + return ASB + case AMOVH: + return ASH + case AMOVW: + return ASW + case AMOVF: + return AFSW + case AMOVD: + return AFSD + default: + panic(fmt.Sprintf("%+v is not a MOV", mnemonic)) + } +} + +// markRelocs marks an obj.Prog that specifies a MOV pseudo-instruction and +// requires relocation. +func markRelocs(p *obj.Prog) { + switch p.As { + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + switch { + case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG: + switch p.From.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + p.Mark |= NEED_PCREL_ITYPE_RELOC + } + case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG: + switch p.From.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + p.Mark |= NEED_PCREL_ITYPE_RELOC + } + case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM: + switch p.To.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + p.Mark |= NEED_PCREL_STYPE_RELOC + } + } + } +} + +// InvertBranch inverts the condition of a conditional branch. +func InvertBranch(as obj.As) obj.As { + switch as { + case ABEQ: + return ABNE + case ABEQZ: + return ABNEZ + case ABGE: + return ABLT + case ABGEU: + return ABLTU + case ABGEZ: + return ABLTZ + case ABGT: + return ABLE + case ABGTU: + return ABLEU + case ABGTZ: + return ABLEZ + case ABLE: + return ABGT + case ABLEU: + return ABGTU + case ABLEZ: + return ABGTZ + case ABLT: + return ABGE + case ABLTU: + return ABGEU + case ABLTZ: + return ABGEZ + case ABNE: + return ABEQ + case ABNEZ: + return ABEQZ + default: + panic("InvertBranch: not a branch") + } +} + +// containsCall reports whether the symbol contains a CALL (or equivalent) +// instruction. Must be called after progedit. +func containsCall(sym *obj.LSym) bool { + // CALLs are CALL or JAL(R) with link register LR. + for p := sym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: + return true + case AJAL, AJALR: + if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR { + return true + } + } + } + + return false +} + +// setPCs sets the Pc field in all instructions reachable from p. +// It uses pc as the initial value and returns the next available pc. +func setPCs(p *obj.Prog, pc int64) int64 { + for ; p != nil; p = p.Link { + p.Pc = pc + for _, ins := range instructionsForProg(p) { + pc += int64(ins.length()) + } + } + return pc +} + +// stackOffset updates Addr offsets based on the current stack size. +// +// The stack looks like: +// ------------------- +// | | +// | PARAMs | +// | | +// | | +// ------------------- +// | Parent RA | SP on function entry +// ------------------- +// | | +// | | +// | AUTOs | +// | | +// | | +// ------------------- +// | RA | SP during function execution +// ------------------- +// +// FixedFrameSize makes other packages aware of the space allocated for RA. +// +// A nicer version of this diagram can be found on slide 21 of the presentation +// attached to https://golang.org/issue/16922#issuecomment-243748180. +func stackOffset(a *obj.Addr, stacksize int64) { + switch a.Name { + case obj.NAME_AUTO: + // Adjust to the top of AUTOs. + a.Offset += stacksize + case obj.NAME_PARAM: + // Adjust to the bottom of PARAMs. + a.Offset += stacksize + 8 + } +} + +// preprocess generates prologue and epilogue code, computes PC-relative branch +// and jump offsets, and resolves pseudo-registers. +// +// preprocess is called once per linker symbol. +// +// When preprocess finishes, all instructions in the symbol are either +// concrete, real RISC-V instructions or directive pseudo-ops like TEXT, +// PCDATA, and FUNCDATA. +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + // Generate the prologue. + text := cursym.Func().Text + if text.As != obj.ATEXT { + ctxt.Diag("preprocess: found symbol that does not start with TEXT directive") + return + } + + stacksize := text.To.Offset + if stacksize == -8 { + // Historical way to mark NOFRAME. + text.From.Sym.Set(obj.AttrNoFrame, true) + stacksize = 0 + } + if stacksize < 0 { + ctxt.Diag("negative frame size %d - did you mean NOFRAME?", stacksize) + } + if text.From.Sym.NoFrame() { + if stacksize != 0 { + ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", stacksize) + } + } + + if !containsCall(cursym) { + text.From.Sym.Set(obj.AttrLeaf, true) + if stacksize == 0 { + // A leaf function with no locals has no frame. + text.From.Sym.Set(obj.AttrNoFrame, true) + } + } + + // Save LR unless there is no frame. + if !text.From.Sym.NoFrame() { + stacksize += ctxt.Arch.FixedFrameSize + } + + cursym.Func().Args = text.To.Val.(int32) + cursym.Func().Locals = int32(stacksize) + + prologue := text + + if !cursym.Func().Text.From.Sym.NoSplit() { + prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check + } + + if stacksize != 0 { + prologue = ctxt.StartUnsafePoint(prologue, newprog) + + // Actually save LR. + prologue = obj.Appendp(prologue, newprog) + prologue.As = AMOV + prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -stacksize} + + // Insert stack adjustment. + prologue = obj.Appendp(prologue, newprog) + prologue.As = AADDI + prologue.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -stacksize} + prologue.Reg = REG_SP + prologue.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP} + prologue.Spadj = int32(stacksize) + + prologue = ctxt.EndUnsafePoint(prologue, newprog, -1) + + // On Linux, in a cgo binary we may get a SIGSETXID signal early on + // before the signal stack is set, as glibc doesn't allow us to block + // SIGSETXID. So a signal may land on the current stack and clobber + // the content below the SP. We store the LR again after the SP is + // decremented. + prologue = obj.Appendp(prologue, newprog) + prologue.As = AMOV + prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0} + } + + if cursym.Func().Text.From.Sym.Wrapper() { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOV g_panic(g), X5 + // BNE X5, ZERO, adjust + // end: + // NOP + // ...rest of function.. + // adjust: + // MOV panic_argp(X5), X6 + // ADD $(autosize+FIXED_FRAME), SP, X7 + // BNE X6, X7, end + // ADD $FIXED_FRAME, SP, X6 + // MOV X6, panic_argp(X5) + // JMP end + // + // The NOP is needed to give the jumps somewhere to land. + + ldpanic := obj.Appendp(prologue, newprog) + + ldpanic.As = AMOV + ldpanic.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGG, Offset: 4 * int64(ctxt.Arch.PtrSize)} // G.panic + ldpanic.Reg = obj.REG_NONE + ldpanic.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5} + + bneadj := obj.Appendp(ldpanic, newprog) + bneadj.As = ABNE + bneadj.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5} + bneadj.Reg = REG_ZERO + bneadj.To.Type = obj.TYPE_BRANCH + + endadj := obj.Appendp(bneadj, newprog) + endadj.As = obj.ANOP + + last := endadj + for last.Link != nil { + last = last.Link + } + + getargp := obj.Appendp(last, newprog) + getargp.As = AMOV + getargp.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp + getargp.Reg = obj.REG_NONE + getargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6} + + bneadj.To.SetTarget(getargp) + + calcargp := obj.Appendp(getargp, newprog) + calcargp.As = AADDI + calcargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize + ctxt.Arch.FixedFrameSize} + calcargp.Reg = REG_SP + calcargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X7} + + testargp := obj.Appendp(calcargp, newprog) + testargp.As = ABNE + testargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6} + testargp.Reg = REG_X7 + testargp.To.Type = obj.TYPE_BRANCH + testargp.To.SetTarget(endadj) + + adjargp := obj.Appendp(testargp, newprog) + adjargp.As = AADDI + adjargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(ctxt.Arch.PtrSize)} + adjargp.Reg = REG_SP + adjargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6} + + setargp := obj.Appendp(adjargp, newprog) + setargp.As = AMOV + setargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6} + setargp.Reg = obj.REG_NONE + setargp.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp + + godone := obj.Appendp(setargp, newprog) + godone.As = AJAL + godone.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + godone.To.Type = obj.TYPE_BRANCH + godone.To.SetTarget(endadj) + } + + // Update stack-based offsets. + for p := cursym.Func().Text; p != nil; p = p.Link { + stackOffset(&p.From, stacksize) + stackOffset(&p.To, stacksize) + } + + // Additional instruction rewriting. + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.AGETCALLERPC: + if cursym.Leaf() { + // MOV LR, Rd + p.As = AMOV + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + } else { + // MOV (RSP), Rd + p.As = AMOV + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_SP + } + + case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: + switch p.To.Type { + case obj.TYPE_MEM: + jalToSym(ctxt, p, REG_LR) + } + + case obj.AJMP: + switch p.To.Type { + case obj.TYPE_MEM: + switch p.To.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + jalToSym(ctxt, p, REG_ZERO) + } + } + + case obj.ARET: + // Replace RET with epilogue. + retJMP := p.To.Sym + + if stacksize != 0 { + // Restore LR. + p.As = AMOV + p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0} + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + p = obj.Appendp(p, newprog) + + p.As = AADDI + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize} + p.Reg = REG_SP + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP} + p.Spadj = int32(-stacksize) + p = obj.Appendp(p, newprog) + } + + if retJMP != nil { + p.As = obj.ARET + p.To.Sym = retJMP + jalToSym(ctxt, p, REG_ZERO) + } else { + p.As = AJALR + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + p.Reg = obj.REG_NONE + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + } + + // "Add back" the stack removed in the previous instruction. + // + // This is to avoid confusing pctospadj, which sums + // Spadj from function entry to each PC, and shouldn't + // count adjustments from earlier epilogues, since they + // won't affect later PCs. + p.Spadj = int32(stacksize) + + case AADDI: + // Refine Spadjs account for adjustment via ADDI instruction. + if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.From.Type == obj.TYPE_CONST { + p.Spadj = int32(-p.From.Offset) + } + } + + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { + f := cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + f.FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + } + + var callCount int + for p := cursym.Func().Text; p != nil; p = p.Link { + markRelocs(p) + if p.Mark&NEED_CALL_RELOC == NEED_CALL_RELOC { + callCount++ + } + } + const callTrampSize = 8 // 2 machine instructions. + maxTrampSize := int64(callCount * callTrampSize) + + // Compute instruction addresses. Once we do that, we need to check for + // overextended jumps and branches. Within each iteration, Pc differences + // are always lower bounds (since the program gets monotonically longer, + // a fixed point will be reached). No attempt to handle functions > 2GiB. + for { + big, rescan := false, false + maxPC := setPCs(cursym.Func().Text, 0) + if maxPC+maxTrampSize > (1 << 20) { + big = true + } + + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: + if p.To.Type != obj.TYPE_BRANCH { + panic("assemble: instruction with branch-like opcode lacks destination") + } + offset := p.To.Target().Pc - p.Pc + if offset < -4096 || 4096 <= offset { + // Branch is long. Replace it with a jump. + jmp := obj.Appendp(p, newprog) + jmp.As = AJAL + jmp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + jmp.To = obj.Addr{Type: obj.TYPE_BRANCH} + jmp.To.SetTarget(p.To.Target()) + + p.As = InvertBranch(p.As) + p.To.SetTarget(jmp.Link) + + // We may have made previous branches too long, + // so recheck them. + rescan = true + } + case AJAL: + // Linker will handle the intersymbol case and trampolines. + if p.To.Target() == nil { + if !big { + break + } + // This function is going to be too large for JALs + // to reach trampolines. Replace with AUIPC+JALR. + jmp := obj.Appendp(p, newprog) + jmp.As = AJALR + jmp.From = p.From + jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + + p.As = AAUIPC + p.Mark = (p.Mark &^ NEED_CALL_RELOC) | NEED_PCREL_ITYPE_RELOC + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} + p.Reg = obj.REG_NONE + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + + rescan = true + break + } + offset := p.To.Target().Pc - p.Pc + if offset < -(1<<20) || (1<<20) <= offset { + // Replace with 2-instruction sequence. This assumes + // that TMP is not live across J instructions, since + // it is reserved by SSA. + jmp := obj.Appendp(p, newprog) + jmp.As = AJALR + jmp.From = p.From + jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + + // p.From is not generally valid, however will be + // fixed up in the next loop. + p.As = AAUIPC + p.From = obj.Addr{Type: obj.TYPE_BRANCH, Sym: p.From.Sym} + p.From.SetTarget(p.To.Target()) + p.Reg = obj.REG_NONE + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + + rescan = true + } + } + } + + if !rescan { + break + } + } + + // Now that there are no long branches, resolve branch and jump targets. + // At this point, instruction rewriting which changes the number of + // instructions will break everything--don't do it! + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: + switch p.To.Type { + case obj.TYPE_BRANCH: + p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc + case obj.TYPE_MEM: + panic("unhandled type") + } + + case AJAL: + // Linker will handle the intersymbol case and trampolines. + if p.To.Target() != nil { + p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc + } + + case AAUIPC: + if p.From.Type == obj.TYPE_BRANCH { + low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc) + if err != nil { + ctxt.Diag("%v: jump displacement %d too large", p, p.To.Target().Pc-p.Pc) + } + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high, Sym: cursym} + p.Link.From.Offset = low + } + } + } + + // Validate all instructions - this provides nice error messages. + for p := cursym.Func().Text; p != nil; p = p.Link { + for _, ins := range instructionsForProg(p) { + ins.validate(ctxt) + } + } +} + +func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgAlloc, framesize int64) *obj.Prog { + // Leaf function with no frame is effectively NOSPLIT. + if framesize == 0 { + return p + } + + if ctxt.Flag_maymorestack != "" { + // Save LR and REGCTXT + const frameSize = 16 + p = ctxt.StartUnsafePoint(p, newprog) + + // Spill Arguments. This has to happen before we open + // any more frame space. + p = cursym.Func().SpillRegisterArgs(p, newprog) + + // MOV LR, -16(SP) + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -frameSize} + // ADDI $-16, SP + p = obj.Appendp(p, newprog) + p.As = AADDI + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -frameSize} + p.Reg = REG_SP + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP} + p.Spadj = frameSize + // MOV REGCTXT, 8(SP) + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT} + p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8} + + // CALL maymorestack + p = obj.Appendp(p, newprog) + p.As = obj.ACALL + p.To.Type = obj.TYPE_BRANCH + // See ../x86/obj6.go + p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI()) + jalToSym(ctxt, p, REG_X5) + + // Restore LR and REGCTXT + + // MOV 8(SP), REGCTXT + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8} + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT} + // MOV (SP), LR + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0} + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + // ADDI $16, SP + p = obj.Appendp(p, newprog) + p.As = AADDI + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: frameSize} + p.Reg = REG_SP + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP} + p.Spadj = -frameSize + + // Unspill arguments + p = cursym.Func().UnspillRegisterArgs(p, newprog) + p = ctxt.EndUnsafePoint(p, newprog, -1) + } + + // Jump back to here after morestack returns. + startPred := p + + // MOV g_stackguard(g), X6 + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 + if cursym.CFunc() { + p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X6 + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = ctxt.StartUnsafePoint(p, newprog) + + var to_done, to_more *obj.Prog + + if framesize <= objabi.StackSmall { + // small stack + // // if SP > stackguard { goto done } + // BLTU stackguard, SP, done + p = obj.Appendp(p, newprog) + p.As = ABLTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_X6 + p.Reg = REG_SP + p.To.Type = obj.TYPE_BRANCH + to_done = p + } else { + // large stack: SP-framesize < stackguard-StackSmall + offset := int64(framesize) - objabi.StackSmall + if framesize > objabi.StackBig { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // MOV $(framesize-StackSmall), X7 + // BLTU SP, X7, label-of-call-to-morestack + + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From.Type = obj.TYPE_CONST + p.From.Offset = offset + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X7 + + p = obj.Appendp(p, newprog) + p.As = ABLTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_SP + p.Reg = REG_X7 + p.To.Type = obj.TYPE_BRANCH + to_more = p + } + + // Check against the stack guard. We've ensured this won't underflow. + // ADD $-(framesize-StackSmall), SP, X7 + // // if X7 > stackguard { goto done } + // BLTU stackguard, X7, done + p = obj.Appendp(p, newprog) + p.As = AADDI + p.From.Type = obj.TYPE_CONST + p.From.Offset = -offset + p.Reg = REG_SP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X7 + + p = obj.Appendp(p, newprog) + p.As = ABLTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_X6 + p.Reg = REG_X7 + p.To.Type = obj.TYPE_BRANCH + to_done = p + } + + // Spill the register args that could be clobbered by the + // morestack code + p = ctxt.EmitEntryStackMap(cursym, p, newprog) + p = cursym.Func().SpillRegisterArgs(p, newprog) + + // CALL runtime.morestack(SB) + p = obj.Appendp(p, newprog) + p.As = obj.ACALL + p.To.Type = obj.TYPE_BRANCH + + if cursym.CFunc() { + p.To.Sym = ctxt.Lookup("runtime.morestackc") + } else if !cursym.Func().Text.From.Sym.NeedCtxt() { + p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt") + } else { + p.To.Sym = ctxt.Lookup("runtime.morestack") + } + if to_more != nil { + to_more.To.SetTarget(p) + } + jalToSym(ctxt, p, REG_X5) + + p = cursym.Func().UnspillRegisterArgs(p, newprog) + p = ctxt.EndUnsafePoint(p, newprog, -1) + + // JMP start + p = obj.Appendp(p, newprog) + p.As = AJAL + p.To = obj.Addr{Type: obj.TYPE_BRANCH} + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + p.To.SetTarget(startPred.Link) + + // placeholder for to_done's jump target + p = obj.Appendp(p, newprog) + p.As = obj.ANOP // zero-width place holder + to_done.To.SetTarget(p) + + return p +} + +// signExtend sign extends val starting at bit bit. +func signExtend(val int64, bit uint) int64 { + return val << (64 - bit) >> (64 - bit) +} + +// Split32BitImmediate splits a signed 32-bit immediate into a signed 20-bit +// upper immediate and a signed 12-bit lower immediate to be added to the upper +// result. For example, high may be used in LUI and low in a following ADDI to +// generate a full 32-bit constant. +func Split32BitImmediate(imm int64) (low, high int64, err error) { + if !immIFits(imm, 32) { + return 0, 0, fmt.Errorf("immediate does not fit in 32 bits: %d", imm) + } + + // Nothing special needs to be done if the immediate fits in 12 bits. + if immIFits(imm, 12) { + return imm, 0, nil + } + + high = imm >> 12 + + // The bottom 12 bits will be treated as signed. + // + // If that will result in a negative 12 bit number, add 1 to + // our upper bits to adjust for the borrow. + // + // It is not possible for this increment to overflow. To + // overflow, the 20 top bits would be 1, and the sign bit for + // the low 12 bits would be set, in which case the entire 32 + // bit pattern fits in a 12 bit signed value. + if imm&(1<<11) != 0 { + high++ + } + + low = signExtend(imm, 12) + high = signExtend(high, 20) + + return low, high, nil +} + +func regVal(r, min, max uint32) uint32 { + if r < min || r > max { + panic(fmt.Sprintf("register out of range, want %d < %d < %d", min, r, max)) + } + return r - min +} + +// regI returns an integer register. +func regI(r uint32) uint32 { + return regVal(r, REG_X0, REG_X31) +} + +// regF returns a float register. +func regF(r uint32) uint32 { + return regVal(r, REG_F0, REG_F31) +} + +// regAddr extracts a register from an Addr. +func regAddr(a obj.Addr, min, max uint32) uint32 { + if a.Type != obj.TYPE_REG { + panic(fmt.Sprintf("ill typed: %+v", a)) + } + return regVal(uint32(a.Reg), min, max) +} + +// regIAddr extracts the integer register from an Addr. +func regIAddr(a obj.Addr) uint32 { + return regAddr(a, REG_X0, REG_X31) +} + +// regFAddr extracts the float register from an Addr. +func regFAddr(a obj.Addr) uint32 { + return regAddr(a, REG_F0, REG_F31) +} + +// immIFits reports whether immediate value x fits in nbits bits +// as a signed integer. +func immIFits(x int64, nbits uint) bool { + nbits-- + var min int64 = -1 << nbits + var max int64 = 1<<nbits - 1 + return min <= x && x <= max +} + +// immI extracts the signed integer of the specified size from an immediate. +func immI(as obj.As, imm int64, nbits uint) uint32 { + if !immIFits(imm, nbits) { + panic(fmt.Sprintf("%v: signed immediate %d cannot fit in %d bits", as, imm, nbits)) + } + return uint32(imm) +} + +func wantImmI(ctxt *obj.Link, as obj.As, imm int64, nbits uint) { + if !immIFits(imm, nbits) { + ctxt.Diag("%v: signed immediate %d cannot be larger than %d bits", as, imm, nbits) + } +} + +func wantReg(ctxt *obj.Link, as obj.As, pos string, descr string, r, min, max uint32) { + if r < min || r > max { + var suffix string + if r != obj.REG_NONE { + suffix = fmt.Sprintf(" but got non-%s register %s", descr, RegName(int(r))) + } + ctxt.Diag("%v: expected %s register in %s position%s", as, descr, pos, suffix) + } +} + +func wantNoneReg(ctxt *obj.Link, as obj.As, pos string, r uint32) { + if r != obj.REG_NONE { + ctxt.Diag("%v: expected no register in %s but got register %s", as, pos, RegName(int(r))) + } +} + +// wantIntReg checks that r is an integer register. +func wantIntReg(ctxt *obj.Link, as obj.As, pos string, r uint32) { + wantReg(ctxt, as, pos, "integer", r, REG_X0, REG_X31) +} + +// wantFloatReg checks that r is a floating-point register. +func wantFloatReg(ctxt *obj.Link, as obj.As, pos string, r uint32) { + wantReg(ctxt, as, pos, "float", r, REG_F0, REG_F31) +} + +// wantEvenOffset checks that the offset is a multiple of two. +func wantEvenOffset(ctxt *obj.Link, as obj.As, offset int64) { + if offset%1 != 0 { + ctxt.Diag("%v: jump offset %d must be a multiple of two", as, offset) + } +} + +func validateRIII(ctxt *obj.Link, ins *instruction) { + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) + wantIntReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateRFFF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantFloatReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateRFFFF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantFloatReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) + wantFloatReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateRFFI(ctxt *obj.Link, ins *instruction) { + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantFloatReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateRFI(ctxt *obj.Link, ins *instruction) { + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateRIF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantIntReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateRFF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateII(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 12) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) + wantNoneReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateIF(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 12) + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) + wantNoneReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateSI(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 12) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) + wantNoneReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateSF(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 12) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantFloatReg(ctxt, ins.as, "rs1", ins.rs1) + wantNoneReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateB(ctxt *obj.Link, ins *instruction) { + // Offsets are multiples of two, so accept 13 bit immediates for the + // 12 bit slot. We implicitly drop the least significant bit in encodeB. + wantEvenOffset(ctxt, ins.as, ins.imm) + wantImmI(ctxt, ins.as, ins.imm, 13) + wantNoneReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) + wantIntReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateU(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 20) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantNoneReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateJ(ctxt *obj.Link, ins *instruction) { + // Offsets are multiples of two, so accept 21 bit immediates for the + // 20 bit slot. We implicitly drop the least significant bit in encodeJ. + wantEvenOffset(ctxt, ins.as, ins.imm) + wantImmI(ctxt, ins.as, ins.imm, 21) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantNoneReg(ctxt, ins.as, "rs2", ins.rs2) + wantNoneReg(ctxt, ins.as, "rs3", ins.rs3) +} + +func validateRaw(ctxt *obj.Link, ins *instruction) { + // Treat the raw value specially as a 32-bit unsigned integer. + // Nobody wants to enter negative machine code. + if ins.imm < 0 || 1<<32 <= ins.imm { + ctxt.Diag("%v: immediate %d in raw position cannot be larger than 32 bits", ins.as, ins.imm) + } +} + +// encodeR encodes an R-type RISC-V instruction. +func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeR: could not encode instruction") + } + if enc.rs2 != 0 && rs2 != 0 { + panic("encodeR: instruction uses rs2, but rs2 was nonzero") + } + return funct7<<25 | enc.funct7<<25 | enc.rs2<<20 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode +} + +// encodeR4 encodes an R4-type RISC-V instruction. +func encodeR4(as obj.As, rs1, rs2, rs3, rd, funct3, funct2 uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeR4: could not encode instruction") + } + if enc.rs2 != 0 { + panic("encodeR4: instruction uses rs2") + } + funct2 |= enc.funct7 + if funct2&^3 != 0 { + panic("encodeR4: funct2 requires more than 2 bits") + } + return rs3<<27 | funct2<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode +} + +func encodeRIII(ins *instruction) uint32 { + return encodeR(ins.as, regI(ins.rs1), regI(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFFF(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFFFF(ins *instruction) uint32 { + return encodeR4(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rs3), regF(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFFI(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFI(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs2), 0, regI(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRIF(ins *instruction) uint32 { + return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFF(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) +} + +// encodeI encodes an I-type RISC-V instruction. +func encodeI(as obj.As, rs1, rd, imm uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeI: could not encode instruction") + } + imm |= uint32(enc.csr) + return imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode +} + +func encodeII(ins *instruction) uint32 { + return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm)) +} + +func encodeIF(ins *instruction) uint32 { + return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm)) +} + +// encodeS encodes an S-type RISC-V instruction. +func encodeS(as obj.As, rs1, rs2, imm uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeS: could not encode instruction") + } + return (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode +} + +func encodeSI(ins *instruction) uint32 { + return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm)) +} + +func encodeSF(ins *instruction) uint32 { + return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm)) +} + +// encodeB encodes a B-type RISC-V instruction. +func encodeB(ins *instruction) uint32 { + imm := immI(ins.as, ins.imm, 13) + rs2 := regI(ins.rs1) + rs1 := regI(ins.rs2) + enc := encode(ins.as) + if enc == nil { + panic("encodeB: could not encode instruction") + } + return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7 | enc.opcode +} + +// encodeU encodes a U-type RISC-V instruction. +func encodeU(ins *instruction) uint32 { + // The immediates for encodeU are the upper 20 bits of a 32 bit value. + // Rather than have the user/compiler generate a 32 bit constant, the + // bottommost bits of which must all be zero, instead accept just the + // top bits. + imm := immI(ins.as, ins.imm, 20) + rd := regI(ins.rd) + enc := encode(ins.as) + if enc == nil { + panic("encodeU: could not encode instruction") + } + return imm<<12 | rd<<7 | enc.opcode +} + +// encodeJImmediate encodes an immediate for a J-type RISC-V instruction. +func encodeJImmediate(imm uint32) uint32 { + return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12 +} + +// encodeJ encodes a J-type RISC-V instruction. +func encodeJ(ins *instruction) uint32 { + imm := immI(ins.as, ins.imm, 21) + rd := regI(ins.rd) + enc := encode(ins.as) + if enc == nil { + panic("encodeJ: could not encode instruction") + } + return encodeJImmediate(imm) | rd<<7 | enc.opcode +} + +func encodeRawIns(ins *instruction) uint32 { + // Treat the raw value specially as a 32-bit unsigned integer. + // Nobody wants to enter negative machine code. + if ins.imm < 0 || 1<<32 <= ins.imm { + panic(fmt.Sprintf("immediate %d cannot fit in 32 bits", ins.imm)) + } + return uint32(ins.imm) +} + +func EncodeJImmediate(imm int64) (int64, error) { + if !immIFits(imm, 21) { + return 0, fmt.Errorf("immediate %#x does not fit in 21 bits", imm) + } + if imm&1 != 0 { + return 0, fmt.Errorf("immediate %#x is not a multiple of two", imm) + } + return int64(encodeJImmediate(uint32(imm))), nil +} + +func EncodeIImmediate(imm int64) (int64, error) { + if !immIFits(imm, 12) { + return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm) + } + return imm << 20, nil +} + +func EncodeSImmediate(imm int64) (int64, error) { + if !immIFits(imm, 12) { + return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm) + } + return ((imm >> 5) << 25) | ((imm & 0x1f) << 7), nil +} + +func EncodeUImmediate(imm int64) (int64, error) { + if !immIFits(imm, 20) { + return 0, fmt.Errorf("immediate %#x does not fit in 20 bits", imm) + } + return imm << 12, nil +} + +type encoding struct { + encode func(*instruction) uint32 // encode returns the machine code for an instruction + validate func(*obj.Link, *instruction) // validate validates an instruction + length int // length of encoded instruction; 0 for pseudo-ops, 4 otherwise +} + +var ( + // Encodings have the following naming convention: + // + // 1. the instruction encoding (R/I/S/B/U/J), in lowercase + // 2. zero or more register operand identifiers (I = integer + // register, F = float register), in uppercase + // 3. the word "Encoding" + // + // For example, rIIIEncoding indicates an R-type instruction with two + // integer register inputs and an integer register output; sFEncoding + // indicates an S-type instruction with rs2 being a float register. + + rIIIEncoding = encoding{encode: encodeRIII, validate: validateRIII, length: 4} + rFFFEncoding = encoding{encode: encodeRFFF, validate: validateRFFF, length: 4} + rFFFFEncoding = encoding{encode: encodeRFFFF, validate: validateRFFFF, length: 4} + rFFIEncoding = encoding{encode: encodeRFFI, validate: validateRFFI, length: 4} + rFIEncoding = encoding{encode: encodeRFI, validate: validateRFI, length: 4} + rIFEncoding = encoding{encode: encodeRIF, validate: validateRIF, length: 4} + rFFEncoding = encoding{encode: encodeRFF, validate: validateRFF, length: 4} + + iIEncoding = encoding{encode: encodeII, validate: validateII, length: 4} + iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4} + + sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4} + sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4} + + bEncoding = encoding{encode: encodeB, validate: validateB, length: 4} + uEncoding = encoding{encode: encodeU, validate: validateU, length: 4} + jEncoding = encoding{encode: encodeJ, validate: validateJ, length: 4} + + // rawEncoding encodes a raw instruction byte sequence. + rawEncoding = encoding{encode: encodeRawIns, validate: validateRaw, length: 4} + + // pseudoOpEncoding panics if encoding is attempted, but does no validation. + pseudoOpEncoding = encoding{encode: nil, validate: func(*obj.Link, *instruction) {}, length: 0} + + // badEncoding is used when an invalid op is encountered. + // An error has already been generated, so let anything else through. + badEncoding = encoding{encode: func(*instruction) uint32 { return 0 }, validate: func(*obj.Link, *instruction) {}, length: 0} +) + +// encodings contains the encodings for RISC-V instructions. +// Instructions are masked with obj.AMask to keep indices small. +var encodings = [ALAST & obj.AMask]encoding{ + + // Unprivileged ISA + + // 2.4: Integer Computational Instructions + AADDI & obj.AMask: iIEncoding, + ASLTI & obj.AMask: iIEncoding, + ASLTIU & obj.AMask: iIEncoding, + AANDI & obj.AMask: iIEncoding, + AORI & obj.AMask: iIEncoding, + AXORI & obj.AMask: iIEncoding, + ASLLI & obj.AMask: iIEncoding, + ASRLI & obj.AMask: iIEncoding, + ASRAI & obj.AMask: iIEncoding, + ALUI & obj.AMask: uEncoding, + AAUIPC & obj.AMask: uEncoding, + AADD & obj.AMask: rIIIEncoding, + ASLT & obj.AMask: rIIIEncoding, + ASLTU & obj.AMask: rIIIEncoding, + AAND & obj.AMask: rIIIEncoding, + AOR & obj.AMask: rIIIEncoding, + AXOR & obj.AMask: rIIIEncoding, + ASLL & obj.AMask: rIIIEncoding, + ASRL & obj.AMask: rIIIEncoding, + ASUB & obj.AMask: rIIIEncoding, + ASRA & obj.AMask: rIIIEncoding, + + // 2.5: Control Transfer Instructions + AJAL & obj.AMask: jEncoding, + AJALR & obj.AMask: iIEncoding, + ABEQ & obj.AMask: bEncoding, + ABNE & obj.AMask: bEncoding, + ABLT & obj.AMask: bEncoding, + ABLTU & obj.AMask: bEncoding, + ABGE & obj.AMask: bEncoding, + ABGEU & obj.AMask: bEncoding, + + // 2.6: Load and Store Instructions + ALW & obj.AMask: iIEncoding, + ALWU & obj.AMask: iIEncoding, + ALH & obj.AMask: iIEncoding, + ALHU & obj.AMask: iIEncoding, + ALB & obj.AMask: iIEncoding, + ALBU & obj.AMask: iIEncoding, + ASW & obj.AMask: sIEncoding, + ASH & obj.AMask: sIEncoding, + ASB & obj.AMask: sIEncoding, + + // 2.7: Memory Ordering + AFENCE & obj.AMask: iIEncoding, + + // 5.2: Integer Computational Instructions (RV64I) + AADDIW & obj.AMask: iIEncoding, + ASLLIW & obj.AMask: iIEncoding, + ASRLIW & obj.AMask: iIEncoding, + ASRAIW & obj.AMask: iIEncoding, + AADDW & obj.AMask: rIIIEncoding, + ASLLW & obj.AMask: rIIIEncoding, + ASRLW & obj.AMask: rIIIEncoding, + ASUBW & obj.AMask: rIIIEncoding, + ASRAW & obj.AMask: rIIIEncoding, + + // 5.3: Load and Store Instructions (RV64I) + ALD & obj.AMask: iIEncoding, + ASD & obj.AMask: sIEncoding, + + // 7.1: Multiplication Operations + AMUL & obj.AMask: rIIIEncoding, + AMULH & obj.AMask: rIIIEncoding, + AMULHU & obj.AMask: rIIIEncoding, + AMULHSU & obj.AMask: rIIIEncoding, + AMULW & obj.AMask: rIIIEncoding, + ADIV & obj.AMask: rIIIEncoding, + ADIVU & obj.AMask: rIIIEncoding, + AREM & obj.AMask: rIIIEncoding, + AREMU & obj.AMask: rIIIEncoding, + ADIVW & obj.AMask: rIIIEncoding, + ADIVUW & obj.AMask: rIIIEncoding, + AREMW & obj.AMask: rIIIEncoding, + AREMUW & obj.AMask: rIIIEncoding, + + // 8.2: Load-Reserved/Store-Conditional + ALRW & obj.AMask: rIIIEncoding, + ALRD & obj.AMask: rIIIEncoding, + ASCW & obj.AMask: rIIIEncoding, + ASCD & obj.AMask: rIIIEncoding, + + // 8.3: Atomic Memory Operations + AAMOSWAPW & obj.AMask: rIIIEncoding, + AAMOSWAPD & obj.AMask: rIIIEncoding, + AAMOADDW & obj.AMask: rIIIEncoding, + AAMOADDD & obj.AMask: rIIIEncoding, + AAMOANDW & obj.AMask: rIIIEncoding, + AAMOANDD & obj.AMask: rIIIEncoding, + AAMOORW & obj.AMask: rIIIEncoding, + AAMOORD & obj.AMask: rIIIEncoding, + AAMOXORW & obj.AMask: rIIIEncoding, + AAMOXORD & obj.AMask: rIIIEncoding, + AAMOMAXW & obj.AMask: rIIIEncoding, + AAMOMAXD & obj.AMask: rIIIEncoding, + AAMOMAXUW & obj.AMask: rIIIEncoding, + AAMOMAXUD & obj.AMask: rIIIEncoding, + AAMOMINW & obj.AMask: rIIIEncoding, + AAMOMIND & obj.AMask: rIIIEncoding, + AAMOMINUW & obj.AMask: rIIIEncoding, + AAMOMINUD & obj.AMask: rIIIEncoding, + + // 10.1: Base Counters and Timers + ARDCYCLE & obj.AMask: iIEncoding, + ARDTIME & obj.AMask: iIEncoding, + ARDINSTRET & obj.AMask: iIEncoding, + + // 11.5: Single-Precision Load and Store Instructions + AFLW & obj.AMask: iFEncoding, + AFSW & obj.AMask: sFEncoding, + + // 11.6: Single-Precision Floating-Point Computational Instructions + AFADDS & obj.AMask: rFFFEncoding, + AFSUBS & obj.AMask: rFFFEncoding, + AFMULS & obj.AMask: rFFFEncoding, + AFDIVS & obj.AMask: rFFFEncoding, + AFMINS & obj.AMask: rFFFEncoding, + AFMAXS & obj.AMask: rFFFEncoding, + AFSQRTS & obj.AMask: rFFFEncoding, + AFMADDS & obj.AMask: rFFFFEncoding, + AFMSUBS & obj.AMask: rFFFFEncoding, + AFNMSUBS & obj.AMask: rFFFFEncoding, + AFNMADDS & obj.AMask: rFFFFEncoding, + + // 11.7: Single-Precision Floating-Point Conversion and Move Instructions + AFCVTWS & obj.AMask: rFIEncoding, + AFCVTLS & obj.AMask: rFIEncoding, + AFCVTSW & obj.AMask: rIFEncoding, + AFCVTSL & obj.AMask: rIFEncoding, + AFCVTWUS & obj.AMask: rFIEncoding, + AFCVTLUS & obj.AMask: rFIEncoding, + AFCVTSWU & obj.AMask: rIFEncoding, + AFCVTSLU & obj.AMask: rIFEncoding, + AFSGNJS & obj.AMask: rFFFEncoding, + AFSGNJNS & obj.AMask: rFFFEncoding, + AFSGNJXS & obj.AMask: rFFFEncoding, + AFMVXS & obj.AMask: rFIEncoding, + AFMVSX & obj.AMask: rIFEncoding, + AFMVXW & obj.AMask: rFIEncoding, + AFMVWX & obj.AMask: rIFEncoding, + + // 11.8: Single-Precision Floating-Point Compare Instructions + AFEQS & obj.AMask: rFFIEncoding, + AFLTS & obj.AMask: rFFIEncoding, + AFLES & obj.AMask: rFFIEncoding, + + // 11.9: Single-Precision Floating-Point Classify Instruction + AFCLASSS & obj.AMask: rFIEncoding, + + // 12.3: Double-Precision Load and Store Instructions + AFLD & obj.AMask: iFEncoding, + AFSD & obj.AMask: sFEncoding, + + // 12.4: Double-Precision Floating-Point Computational Instructions + AFADDD & obj.AMask: rFFFEncoding, + AFSUBD & obj.AMask: rFFFEncoding, + AFMULD & obj.AMask: rFFFEncoding, + AFDIVD & obj.AMask: rFFFEncoding, + AFMIND & obj.AMask: rFFFEncoding, + AFMAXD & obj.AMask: rFFFEncoding, + AFSQRTD & obj.AMask: rFFFEncoding, + AFMADDD & obj.AMask: rFFFFEncoding, + AFMSUBD & obj.AMask: rFFFFEncoding, + AFNMSUBD & obj.AMask: rFFFFEncoding, + AFNMADDD & obj.AMask: rFFFFEncoding, + + // 12.5: Double-Precision Floating-Point Conversion and Move Instructions + AFCVTWD & obj.AMask: rFIEncoding, + AFCVTLD & obj.AMask: rFIEncoding, + AFCVTDW & obj.AMask: rIFEncoding, + AFCVTDL & obj.AMask: rIFEncoding, + AFCVTWUD & obj.AMask: rFIEncoding, + AFCVTLUD & obj.AMask: rFIEncoding, + AFCVTDWU & obj.AMask: rIFEncoding, + AFCVTDLU & obj.AMask: rIFEncoding, + AFCVTSD & obj.AMask: rFFEncoding, + AFCVTDS & obj.AMask: rFFEncoding, + AFSGNJD & obj.AMask: rFFFEncoding, + AFSGNJND & obj.AMask: rFFFEncoding, + AFSGNJXD & obj.AMask: rFFFEncoding, + AFMVXD & obj.AMask: rFIEncoding, + AFMVDX & obj.AMask: rIFEncoding, + + // 12.6: Double-Precision Floating-Point Compare Instructions + AFEQD & obj.AMask: rFFIEncoding, + AFLTD & obj.AMask: rFFIEncoding, + AFLED & obj.AMask: rFFIEncoding, + + // 12.7: Double-Precision Floating-Point Classify Instruction + AFCLASSD & obj.AMask: rFIEncoding, + + // Privileged ISA + + // 3.2.1: Environment Call and Breakpoint + AECALL & obj.AMask: iIEncoding, + AEBREAK & obj.AMask: iIEncoding, + + // Escape hatch + AWORD & obj.AMask: rawEncoding, + + // Pseudo-operations + obj.AFUNCDATA: pseudoOpEncoding, + obj.APCDATA: pseudoOpEncoding, + obj.ATEXT: pseudoOpEncoding, + obj.ANOP: pseudoOpEncoding, + obj.ADUFFZERO: pseudoOpEncoding, + obj.ADUFFCOPY: pseudoOpEncoding, +} + +// encodingForAs returns the encoding for an obj.As. +func encodingForAs(as obj.As) (encoding, error) { + if base := as &^ obj.AMask; base != obj.ABaseRISCV && base != 0 { + return badEncoding, fmt.Errorf("encodingForAs: not a RISC-V instruction %s", as) + } + asi := as & obj.AMask + if int(asi) >= len(encodings) { + return badEncoding, fmt.Errorf("encodingForAs: bad RISC-V instruction %s", as) + } + enc := encodings[asi] + if enc.validate == nil { + return badEncoding, fmt.Errorf("encodingForAs: no encoding for instruction %s", as) + } + return enc, nil +} + +type instruction struct { + as obj.As // Assembler opcode + rd uint32 // Destination register + rs1 uint32 // Source register 1 + rs2 uint32 // Source register 2 + rs3 uint32 // Source register 3 + imm int64 // Immediate + funct3 uint32 // Function 3 + funct7 uint32 // Function 7 (or Function 2) +} + +func (ins *instruction) encode() (uint32, error) { + enc, err := encodingForAs(ins.as) + if err != nil { + return 0, err + } + if enc.length > 0 { + return enc.encode(ins), nil + } + return 0, fmt.Errorf("fixme") +} + +func (ins *instruction) length() int { + enc, err := encodingForAs(ins.as) + if err != nil { + return 0 + } + return enc.length +} + +func (ins *instruction) validate(ctxt *obj.Link) { + enc, err := encodingForAs(ins.as) + if err != nil { + ctxt.Diag(err.Error()) + return + } + enc.validate(ctxt, ins) +} + +func (ins *instruction) usesRegTmp() bool { + return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP +} + +// instructionForProg returns the default *obj.Prog to instruction mapping. +func instructionForProg(p *obj.Prog) *instruction { + ins := &instruction{ + as: p.As, + rd: uint32(p.To.Reg), + rs1: uint32(p.Reg), + rs2: uint32(p.From.Reg), + imm: p.From.Offset, + } + if len(p.RestArgs) == 1 { + ins.rs3 = uint32(p.RestArgs[0].Reg) + } + return ins +} + +// instructionsForOpImmediate returns the machine instructions for a immedate +// operand. The instruction is specified by as and the source register is +// specified by rs, instead of the obj.Prog. +func instructionsForOpImmediate(p *obj.Prog, as obj.As, rs int16) []*instruction { + // <opi> $imm, REG, TO + ins := instructionForProg(p) + ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE + + low, high, err := Split32BitImmediate(ins.imm) + if err != nil { + p.Ctxt.Diag("%v: constant %d too large", p, ins.imm, err) + return nil + } + if high == 0 { + return []*instruction{ins} + } + + // Split into two additions, if possible. + // Do not split SP-writing instructions, as otherwise the recorded SP delta may be wrong. + if p.Spadj == 0 && ins.as == AADDI && ins.imm >= -(1<<12) && ins.imm < 1<<12-1 { + imm0 := ins.imm / 2 + imm1 := ins.imm - imm0 + + // ADDI $(imm/2), REG, TO + // ADDI $(imm-imm/2), TO, TO + ins.imm = imm0 + insADDI := &instruction{as: AADDI, rd: ins.rd, rs1: ins.rd, imm: imm1} + return []*instruction{ins, insADDI} + } + + // LUI $high, TMP + // ADDIW $low, TMP, TMP + // <op> TMP, REG, TO + insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high} + insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP, imm: low} + switch ins.as { + case AADDI: + ins.as = AADD + case AANDI: + ins.as = AAND + case AORI: + ins.as = AOR + case AXORI: + ins.as = AXOR + default: + p.Ctxt.Diag("unsupported immediate instruction %v for splitting", p) + return nil + } + ins.rs2 = REG_TMP + if low == 0 { + return []*instruction{insLUI, ins} + } + return []*instruction{insLUI, insADDIW, ins} +} + +// instructionsForLoad returns the machine instructions for a load. The load +// instruction is specified by as and the base/source register is specified +// by rs, instead of the obj.Prog. +func instructionsForLoad(p *obj.Prog, as obj.As, rs int16) []*instruction { + if p.From.Type != obj.TYPE_MEM { + p.Ctxt.Diag("%v requires memory for source", p) + return nil + } + + switch as { + case ALD, ALB, ALH, ALW, ALBU, ALHU, ALWU, AFLW, AFLD: + default: + p.Ctxt.Diag("%v: unknown load instruction %v", p, as) + return nil + } + + // <load> $imm, REG, TO (load $imm+(REG), TO) + ins := instructionForProg(p) + ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE + ins.imm = p.From.Offset + + low, high, err := Split32BitImmediate(ins.imm) + if err != nil { + p.Ctxt.Diag("%v: constant %d too large", p, ins.imm) + return nil + } + if high == 0 { + return []*instruction{ins} + } + + // LUI $high, TMP + // ADD TMP, REG, TMP + // <load> $low, TMP, TO + insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high} + insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rs1} + ins.rs1, ins.imm = REG_TMP, low + + return []*instruction{insLUI, insADD, ins} +} + +// instructionsForStore returns the machine instructions for a store. The store +// instruction is specified by as and the target/source register is specified +// by rd, instead of the obj.Prog. +func instructionsForStore(p *obj.Prog, as obj.As, rd int16) []*instruction { + if p.To.Type != obj.TYPE_MEM { + p.Ctxt.Diag("%v requires memory for destination", p) + return nil + } + + switch as { + case ASW, ASH, ASB, ASD, AFSW, AFSD: + default: + p.Ctxt.Diag("%v: unknown store instruction %v", p, as) + return nil + } + + // <store> $imm, REG, TO (store $imm+(TO), REG) + ins := instructionForProg(p) + ins.as, ins.rd, ins.rs1, ins.rs2 = as, uint32(rd), uint32(p.From.Reg), obj.REG_NONE + ins.imm = p.To.Offset + + low, high, err := Split32BitImmediate(ins.imm) + if err != nil { + p.Ctxt.Diag("%v: constant %d too large", p, ins.imm) + return nil + } + if high == 0 { + return []*instruction{ins} + } + + // LUI $high, TMP + // ADD TMP, TO, TMP + // <store> $low, REG, TMP + insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high} + insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rd} + ins.rd, ins.imm = REG_TMP, low + + return []*instruction{insLUI, insADD, ins} +} + +// instructionsForMOV returns the machine instructions for an *obj.Prog that +// uses a MOV pseudo-instruction. +func instructionsForMOV(p *obj.Prog) []*instruction { + ins := instructionForProg(p) + inss := []*instruction{ins} + + if p.Reg != 0 { + p.Ctxt.Diag("%v: illegal MOV instruction", p) + return nil + } + + switch { + case p.From.Type == obj.TYPE_CONST && p.To.Type == obj.TYPE_REG: + // Handle constant to register moves. + if p.As != AMOV { + p.Ctxt.Diag("%v: unsupported constant load", p) + return nil + } + + low, high, err := Split32BitImmediate(ins.imm) + if err != nil { + p.Ctxt.Diag("%v: constant %d too large: %v", p, ins.imm, err) + return nil + } + + // MOV $c, R -> ADD $c, ZERO, R + ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, REG_ZERO, obj.REG_NONE, low + + // LUI is only necessary if the constant does not fit in 12 bits. + if high == 0 { + break + } + + // LUI top20bits(c), R + // ADD bottom12bits(c), R, R + insLUI := &instruction{as: ALUI, rd: ins.rd, imm: high} + inss = []*instruction{insLUI} + if low != 0 { + ins.as, ins.rs1 = AADDIW, ins.rd + inss = append(inss, ins) + } + + case p.From.Type == obj.TYPE_CONST && p.To.Type != obj.TYPE_REG: + p.Ctxt.Diag("%v: constant load must target register", p) + return nil + + case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG: + // Handle register to register moves. + switch p.As { + case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, uint32(p.From.Reg), obj.REG_NONE, 0 + case AMOVW: // MOVW Ra, Rb -> ADDIW $0, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AADDIW, uint32(p.From.Reg), obj.REG_NONE, 0 + case AMOVBU: // MOVBU Ra, Rb -> ANDI $255, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AANDI, uint32(p.From.Reg), obj.REG_NONE, 255 + case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb + ins.as, ins.rs1 = AFSGNJS, uint32(p.From.Reg) + case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb + ins.as, ins.rs1 = AFSGNJD, uint32(p.From.Reg) + case AMOVB, AMOVH: + // Use SLLI/SRAI to extend. + ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE + if p.As == AMOVB { + ins.imm = 56 + } else if p.As == AMOVH { + ins.imm = 48 + } + ins2 := &instruction{as: ASRAI, rd: ins.rd, rs1: ins.rd, imm: ins.imm} + inss = append(inss, ins2) + case AMOVHU, AMOVWU: + // Use SLLI/SRLI to extend. + ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE + if p.As == AMOVHU { + ins.imm = 48 + } else if p.As == AMOVWU { + ins.imm = 32 + } + ins2 := &instruction{as: ASRLI, rd: ins.rd, rs1: ins.rd, imm: ins.imm} + inss = append(inss, ins2) + } + + case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG: + // Memory to register loads. + switch p.From.Name { + case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE: + // MOV c(Rs), Rd -> L $c, Rs, Rd + inss = instructionsForLoad(p, movToLoad(p.As), addrToReg(p.From)) + + case obj.NAME_EXTERN, obj.NAME_STATIC: + // Note that the values for $off_hi and $off_lo are currently + // zero and will be assigned during relocation. + // + // AUIPC $off_hi, Rd + // L $off_lo, Rd, Rd + insAUIPC := &instruction{as: AAUIPC, rd: ins.rd} + ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), ins.rd, obj.REG_NONE, 0 + inss = []*instruction{insAUIPC, ins} + + default: + p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p) + return nil + } + + case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM: + // Register to memory stores. + switch p.As { + case AMOVBU, AMOVHU, AMOVWU: + p.Ctxt.Diag("%v: unsupported unsigned store", p) + return nil + } + switch p.To.Name { + case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE: + // MOV Rs, c(Rd) -> S $c, Rs, Rd + inss = instructionsForStore(p, movToStore(p.As), addrToReg(p.To)) + + case obj.NAME_EXTERN, obj.NAME_STATIC: + // Note that the values for $off_hi and $off_lo are currently + // zero and will be assigned during relocation. + // + // AUIPC $off_hi, Rtmp + // S $off_lo, Rtmp, Rd + insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP} + ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0 + inss = []*instruction{insAUIPC, ins} + + default: + p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p) + return nil + } + + case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG: + // MOV $sym+off(SP/SB), R + if p.As != AMOV { + p.Ctxt.Diag("%v: unsupported address load", p) + return nil + } + switch p.From.Name { + case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE: + inss = instructionsForOpImmediate(p, AADDI, addrToReg(p.From)) + + case obj.NAME_EXTERN, obj.NAME_STATIC: + // Note that the values for $off_hi and $off_lo are currently + // zero and will be assigned during relocation. + // + // AUIPC $off_hi, R + // ADDI $off_lo, R + insAUIPC := &instruction{as: AAUIPC, rd: ins.rd} + ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, ins.rd, obj.REG_NONE, 0 + inss = []*instruction{insAUIPC, ins} + + default: + p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p) + return nil + } + + case p.From.Type == obj.TYPE_ADDR && p.To.Type != obj.TYPE_REG: + p.Ctxt.Diag("%v: address load must target register", p) + return nil + + default: + p.Ctxt.Diag("%v: unsupported MOV", p) + return nil + } + + return inss +} + +// instructionsForProg returns the machine instructions for an *obj.Prog. +func instructionsForProg(p *obj.Prog) []*instruction { + ins := instructionForProg(p) + inss := []*instruction{ins} + + if len(p.RestArgs) > 1 { + p.Ctxt.Diag("too many source registers") + return nil + } + + switch ins.as { + case AJAL, AJALR: + ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.To.Reg), obj.REG_NONE + ins.imm = p.To.Offset + + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: + switch ins.as { + case ABEQZ: + ins.as, ins.rs1, ins.rs2 = ABEQ, REG_ZERO, uint32(p.From.Reg) + case ABGEZ: + ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg) + case ABGT: + ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), uint32(p.Reg) + case ABGTU: + ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.From.Reg), uint32(p.Reg) + case ABGTZ: + ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO + case ABLE: + ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), uint32(p.Reg) + case ABLEU: + ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.From.Reg), uint32(p.Reg) + case ABLEZ: + ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO + case ABLTZ: + ins.as, ins.rs1, ins.rs2 = ABLT, REG_ZERO, uint32(p.From.Reg) + case ABNEZ: + ins.as, ins.rs1, ins.rs2 = ABNE, REG_ZERO, uint32(p.From.Reg) + } + ins.imm = p.To.Offset + + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + return instructionsForMOV(p) + + case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD: + return instructionsForLoad(p, ins.as, p.From.Reg) + + case ASW, ASH, ASB, ASD, AFSW, AFSD: + return instructionsForStore(p, ins.as, p.To.Reg) + + case ALRW, ALRD: + // Set aq to use acquire access ordering + ins.funct7 = 2 + ins.rs1, ins.rs2 = uint32(p.From.Reg), REG_ZERO + + case AADDI, AANDI, AORI, AXORI: + inss = instructionsForOpImmediate(p, ins.as, p.Reg) + + case ASCW, ASCD: + // Set release access ordering + ins.funct7 = 1 + ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg) + + case AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD, + AAMOXORW, AAMOXORD, AAMOMINW, AAMOMIND, AAMOMINUW, AAMOMINUD, AAMOMAXW, AAMOMAXD, AAMOMAXUW, AAMOMAXUD: + // Set aqrl to use acquire & release access ordering + ins.funct7 = 3 + ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg) + + case AECALL, AEBREAK, ARDCYCLE, ARDTIME, ARDINSTRET: + insEnc := encode(p.As) + if p.To.Type == obj.TYPE_NONE { + ins.rd = REG_ZERO + } + ins.rs1 = REG_ZERO + ins.imm = insEnc.csr + + case AFENCE: + ins.rd, ins.rs1, ins.rs2 = REG_ZERO, REG_ZERO, obj.REG_NONE + ins.imm = 0x0ff + + case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD: + // Set the rounding mode in funct3 to round to zero. + ins.funct3 = 1 + + case AFNES, AFNED: + // Replace FNE[SD] with FEQ[SD] and NOT. + if p.To.Type != obj.TYPE_REG { + p.Ctxt.Diag("%v needs an integer register output", ins.as) + return nil + } + if ins.as == AFNES { + ins.as = AFEQS + } else { + ins.as = AFEQD + } + ins2 := &instruction{ + as: AXORI, // [bit] xor 1 = not [bit] + rd: ins.rd, + rs1: ins.rd, + imm: 1, + } + inss = append(inss, ins2) + + case AFSQRTS, AFSQRTD: + // These instructions expect a zero (i.e. float register 0) + // to be the second input operand. + ins.rs1 = uint32(p.From.Reg) + ins.rs2 = REG_F0 + + case AFMADDS, AFMSUBS, AFNMADDS, AFNMSUBS, + AFMADDD, AFMSUBD, AFNMADDD, AFNMSUBD: + // Swap the first two operands so that the operands are in the same + // order as they are in the specification: RS1, RS2, RS3, RD. + ins.rs1, ins.rs2 = ins.rs2, ins.rs1 + + case ANEG, ANEGW: + // NEG rs, rd -> SUB rs, X0, rd + ins.as = ASUB + if p.As == ANEGW { + ins.as = ASUBW + } + ins.rs1 = REG_ZERO + if ins.rd == obj.REG_NONE { + ins.rd = ins.rs2 + } + + case ANOT: + // NOT rs, rd -> XORI $-1, rs, rd + ins.as = AXORI + ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE + if ins.rd == obj.REG_NONE { + ins.rd = ins.rs1 + } + ins.imm = -1 + + case ASEQZ: + // SEQZ rs, rd -> SLTIU $1, rs, rd + ins.as = ASLTIU + ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE + ins.imm = 1 + + case ASNEZ: + // SNEZ rs, rd -> SLTU rs, x0, rd + ins.as = ASLTU + ins.rs1 = REG_ZERO + + case AFABSS: + // FABSS rs, rd -> FSGNJXS rs, rs, rd + ins.as = AFSGNJXS + ins.rs1 = uint32(p.From.Reg) + + case AFABSD: + // FABSD rs, rd -> FSGNJXD rs, rs, rd + ins.as = AFSGNJXD + ins.rs1 = uint32(p.From.Reg) + + case AFNEGS: + // FNEGS rs, rd -> FSGNJNS rs, rs, rd + ins.as = AFSGNJNS + ins.rs1 = uint32(p.From.Reg) + + case AFNEGD: + // FNEGD rs, rd -> FSGNJND rs, rs, rd + ins.as = AFSGNJND + ins.rs1 = uint32(p.From.Reg) + } + return inss +} + +// assemble emits machine code. +// It is called at the very end of the assembly process. +func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on riscv") + ctxt.Retpoline = false // don't keep printing + } + + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case AJAL: + if p.Mark&NEED_CALL_RELOC == NEED_CALL_RELOC { + rel := obj.Addrel(cursym) + rel.Off = int32(p.Pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_RISCV_CALL + } + case AJALR: + if p.To.Sym != nil { + ctxt.Diag("%v: unexpected AJALR with to symbol", p) + } + + case AAUIPC, AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + var addr *obj.Addr + var rt objabi.RelocType + if p.Mark&NEED_PCREL_ITYPE_RELOC == NEED_PCREL_ITYPE_RELOC { + rt = objabi.R_RISCV_PCREL_ITYPE + addr = &p.From + } else if p.Mark&NEED_PCREL_STYPE_RELOC == NEED_PCREL_STYPE_RELOC { + rt = objabi.R_RISCV_PCREL_STYPE + addr = &p.To + } else { + break + } + if p.As == AAUIPC { + if p.Link == nil { + ctxt.Diag("AUIPC needing PC-relative reloc missing following instruction") + break + } + addr = &p.RestArgs[0].Addr + } + if addr.Sym == nil { + ctxt.Diag("PC-relative relocation missing symbol") + break + } + if addr.Sym.Type == objabi.STLSBSS { + if rt == objabi.R_RISCV_PCREL_ITYPE { + rt = objabi.R_RISCV_TLS_IE_ITYPE + } else if rt == objabi.R_RISCV_PCREL_STYPE { + rt = objabi.R_RISCV_TLS_IE_STYPE + } + } + + rel := obj.Addrel(cursym) + rel.Off = int32(p.Pc) + rel.Siz = 8 + rel.Sym = addr.Sym + rel.Add = addr.Offset + rel.Type = rt + } + + offset := p.Pc + for _, ins := range instructionsForProg(p) { + if ic, err := ins.encode(); err == nil { + cursym.WriteInt(ctxt, offset, ins.length(), int64(ic)) + offset += int64(ins.length()) + } + if ins.usesRegTmp() { + p.Mark |= USES_REG_TMP + } + } + } + + obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil) +} + +func isUnsafePoint(p *obj.Prog) bool { + return p.Mark&USES_REG_TMP == USES_REG_TMP || p.From.Reg == REG_TMP || p.To.Reg == REG_TMP || p.Reg == REG_TMP +} + +var LinkRISCV64 = obj.LinkArch{ + Arch: sys.ArchRISCV64, + Init: buildop, + Preprocess: preprocess, + Assemble: assemble, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: RISCV64DWARFRegisters, +} diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go new file mode 100644 index 0000000..843398d --- /dev/null +++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go @@ -0,0 +1,133 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build riscv64 +// +build riscv64 + +package testbranch + +import ( + "testing" +) + +func testBEQZ(a int64) (r bool) +func testBGE(a, b int64) (r bool) +func testBGEU(a, b int64) (r bool) +func testBGEZ(a int64) (r bool) +func testBGT(a, b int64) (r bool) +func testBGTU(a, b int64) (r bool) +func testBGTZ(a int64) (r bool) +func testBLE(a, b int64) (r bool) +func testBLEU(a, b int64) (r bool) +func testBLEZ(a int64) (r bool) +func testBLT(a, b int64) (r bool) +func testBLTU(a, b int64) (r bool) +func testBLTZ(a int64) (r bool) +func testBNEZ(a int64) (r bool) + +func testGoBGE(a, b int64) bool { return a >= b } +func testGoBGEU(a, b int64) bool { return uint64(a) >= uint64(b) } +func testGoBGT(a, b int64) bool { return a > b } +func testGoBGTU(a, b int64) bool { return uint64(a) > uint64(b) } +func testGoBLE(a, b int64) bool { return a <= b } +func testGoBLEU(a, b int64) bool { return uint64(a) <= uint64(b) } +func testGoBLT(a, b int64) bool { return a < b } +func testGoBLTU(a, b int64) bool { return uint64(a) < uint64(b) } + +func TestBranchCondition(t *testing.T) { + tests := []struct { + ins string + a int64 + b int64 + fn func(a, b int64) bool + goFn func(a, b int64) bool + want bool + }{ + {"BGE", 0, 1, testBGE, testGoBGE, false}, + {"BGE", 0, 0, testBGE, testGoBGE, true}, + {"BGE", 0, -1, testBGE, testGoBGE, true}, + {"BGE", -1, 0, testBGE, testGoBGE, false}, + {"BGE", 1, 0, testBGE, testGoBGE, true}, + {"BGEU", 0, 1, testBGEU, testGoBGEU, false}, + {"BGEU", 0, 0, testBGEU, testGoBGEU, true}, + {"BGEU", 0, -1, testBGEU, testGoBGEU, false}, + {"BGEU", -1, 0, testBGEU, testGoBGEU, true}, + {"BGEU", 1, 0, testBGEU, testGoBGEU, true}, + {"BGT", 0, 1, testBGT, testGoBGT, false}, + {"BGT", 0, 0, testBGT, testGoBGT, false}, + {"BGT", 0, -1, testBGT, testGoBGT, true}, + {"BGT", -1, 0, testBGT, testGoBGT, false}, + {"BGT", 1, 0, testBGT, testGoBGT, true}, + {"BGTU", 0, 1, testBGTU, testGoBGTU, false}, + {"BGTU", 0, 0, testBGTU, testGoBGTU, false}, + {"BGTU", 0, -1, testBGTU, testGoBGTU, false}, + {"BGTU", -1, 0, testBGTU, testGoBGTU, true}, + {"BGTU", 1, 0, testBGTU, testGoBGTU, true}, + {"BLE", 0, 1, testBLE, testGoBLE, true}, + {"BLE", 0, 0, testBLE, testGoBLE, true}, + {"BLE", 0, -1, testBLE, testGoBLE, false}, + {"BLE", -1, 0, testBLE, testGoBLE, true}, + {"BLE", 1, 0, testBLE, testGoBLE, false}, + {"BLEU", 0, 1, testBLEU, testGoBLEU, true}, + {"BLEU", 0, 0, testBLEU, testGoBLEU, true}, + {"BLEU", 0, -1, testBLEU, testGoBLEU, true}, + {"BLEU", -1, 0, testBLEU, testGoBLEU, false}, + {"BLEU", 1, 0, testBLEU, testGoBLEU, false}, + {"BLT", 0, 1, testBLT, testGoBLT, true}, + {"BLT", 0, 0, testBLT, testGoBLT, false}, + {"BLT", 0, -1, testBLT, testGoBLT, false}, + {"BLT", -1, 0, testBLT, testGoBLT, true}, + {"BLT", 1, 0, testBLT, testGoBLT, false}, + {"BLTU", 0, 1, testBLTU, testGoBLTU, true}, + {"BLTU", 0, 0, testBLTU, testGoBLTU, false}, + {"BLTU", 0, -1, testBLTU, testGoBLTU, true}, + {"BLTU", -1, 0, testBLTU, testGoBLTU, false}, + {"BLTU", 1, 0, testBLTU, testGoBLTU, false}, + } + for _, test := range tests { + t.Run(test.ins, func(t *testing.T) { + if got := test.fn(test.a, test.b); got != test.want { + t.Errorf("Assembly %v %v, %v = %v, want %v", test.ins, test.a, test.b, got, test.want) + } + if got := test.goFn(test.a, test.b); got != test.want { + t.Errorf("Go %v %v, %v = %v, want %v", test.ins, test.a, test.b, got, test.want) + } + }) + } +} + +func TestBranchZero(t *testing.T) { + tests := []struct { + ins string + a int64 + fn func(a int64) bool + want bool + }{ + {"BEQZ", -1, testBEQZ, false}, + {"BEQZ", 0, testBEQZ, true}, + {"BEQZ", 1, testBEQZ, false}, + {"BGEZ", -1, testBGEZ, false}, + {"BGEZ", 0, testBGEZ, true}, + {"BGEZ", 1, testBGEZ, true}, + {"BGTZ", -1, testBGTZ, false}, + {"BGTZ", 0, testBGTZ, false}, + {"BGTZ", 1, testBGTZ, true}, + {"BLEZ", -1, testBLEZ, true}, + {"BLEZ", 0, testBLEZ, true}, + {"BLEZ", 1, testBLEZ, false}, + {"BLTZ", -1, testBLTZ, true}, + {"BLTZ", 0, testBLTZ, false}, + {"BLTZ", 1, testBLTZ, false}, + {"BNEZ", -1, testBNEZ, true}, + {"BNEZ", 0, testBNEZ, false}, + {"BNEZ", 1, testBNEZ, true}, + } + for _, test := range tests { + t.Run(test.ins, func(t *testing.T) { + if got := test.fn(test.a); got != test.want { + t.Errorf("%v %v = %v, want %v", test.ins, test.a, got, test.want) + } + }) + } +} diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s new file mode 100644 index 0000000..d7141e3 --- /dev/null +++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s @@ -0,0 +1,156 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build riscv64 +// +build riscv64 + +#include "textflag.h" + +// func testBEQZ(a int64) (r bool) +TEXT ·testBEQZ(SB),NOSPLIT,$0-9 + MOV a+0(FP), X5 + MOV $1, X6 + BEQZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBGE(a, b int64) (r bool) +TEXT ·testBGE(SB),NOSPLIT,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BGE X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBGEU(a, b int64) (r bool) +TEXT ·testBGEU(SB),NOSPLIT,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BGEU X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBGEZ(a int64) (r bool) +TEXT ·testBGEZ(SB),NOSPLIT,$0-9 + MOV a+0(FP), X5 + MOV $1, X6 + BGEZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBGT(a, b int64) (r bool) +TEXT ·testBGT(SB),NOSPLIT,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BGT X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBGTU(a, b int64) (r bool) +TEXT ·testBGTU(SB),NOSPLIT,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BGTU X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBGTZ(a int64) (r bool) +TEXT ·testBGTZ(SB),NOSPLIT,$0-9 + MOV a+0(FP), X5 + MOV $1, X6 + BGTZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBLE(a, b int64) (r bool) +TEXT ·testBLE(SB),NOSPLIT,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BLE X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBLEU(a, b int64) (r bool) +TEXT ·testBLEU(SB),NOSPLIT,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BLEU X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBLEZ(a int64) (r bool) +TEXT ·testBLEZ(SB),NOSPLIT,$0-9 + MOV a+0(FP), X5 + MOV $1, X6 + BLEZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBLT(a, b int64) (r bool) +TEXT ·testBLT(SB),NOSPLIT,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BLT X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBLTU(a, b int64) (r bool) +TEXT ·testBLTU(SB),NOSPLIT,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BLTU X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBLTZ(a int64) (r bool) +TEXT ·testBLTZ(SB),NOSPLIT,$0-9 + MOV a+0(FP), X5 + MOV $1, X6 + BLTZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBNEZ(a int64) (r bool) +TEXT ·testBNEZ(SB),NOSPLIT,$0-9 + MOV a+0(FP), X5 + MOV $1, X6 + BNEZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET diff --git a/src/cmd/internal/obj/s390x/a.out.go b/src/cmd/internal/obj/s390x/a.out.go new file mode 100644 index 0000000..c1bda1f --- /dev/null +++ b/src/cmd/internal/obj/s390x/a.out.go @@ -0,0 +1,1003 @@ +// Based on cmd/internal/obj/ppc64/a.out.go. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package s390x + +import "cmd/internal/obj" + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p s390x + +const ( + NSNAME = 8 + NSYM = 50 + NREG = 16 // number of general purpose registers + NFREG = 16 // number of floating point registers +) + +const ( + // General purpose registers (GPRs). + REG_R0 = obj.RBaseS390X + iota + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + + // Floating point registers (FPRs). + REG_F0 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + + // Vector registers (VRs) - only available when the vector + // facility is installed. + // V0-V15 are aliases for F0-F15. + // We keep them in a separate space to make printing etc. easier + // If the code generator ever emits vector instructions it will + // need to take into account the aliasing. + REG_V0 + REG_V1 + REG_V2 + REG_V3 + REG_V4 + REG_V5 + REG_V6 + REG_V7 + REG_V8 + REG_V9 + REG_V10 + REG_V11 + REG_V12 + REG_V13 + REG_V14 + REG_V15 + REG_V16 + REG_V17 + REG_V18 + REG_V19 + REG_V20 + REG_V21 + REG_V22 + REG_V23 + REG_V24 + REG_V25 + REG_V26 + REG_V27 + REG_V28 + REG_V29 + REG_V30 + REG_V31 + + // Access registers (ARs). + // The thread pointer is typically stored in the register pair + // AR0 and AR1. + REG_AR0 + REG_AR1 + REG_AR2 + REG_AR3 + REG_AR4 + REG_AR5 + REG_AR6 + REG_AR7 + REG_AR8 + REG_AR9 + REG_AR10 + REG_AR11 + REG_AR12 + REG_AR13 + REG_AR14 + REG_AR15 + + REG_RESERVED // end of allocated registers + + REGARG = -1 // -1 disables passing the first argument in register + REGRT1 = REG_R3 // used during zeroing of the stack - not reserved + REGRT2 = REG_R4 // used during zeroing of the stack - not reserved + REGTMP = REG_R10 // scratch register used in the assembler and linker + REGTMP2 = REG_R11 // scratch register used in the assembler and linker + REGCTXT = REG_R12 // context for closures + REGG = REG_R13 // G + REG_LR = REG_R14 // link register + REGSP = REG_R15 // stack pointer +) + +// LINUX for zSeries ELF Application Binary Interface Supplement +// https://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_zSeries/x1472.html +var S390XDWARFRegisters = map[int16]int16{} + +func init() { + // f assigns dwarfregisters[from:to by step] = (base):((to-from)/step+base) + f := func(from, step, to, base int16) { + for r := int16(from); r <= to; r += step { + S390XDWARFRegisters[r] = (r-from)/step + base + } + } + f(REG_R0, 1, REG_R15, 0) + + f(REG_F0, 2, REG_F6, 16) + f(REG_F1, 2, REG_F7, 20) + f(REG_F8, 2, REG_F14, 24) + f(REG_F9, 2, REG_F15, 28) + + f(REG_V0, 2, REG_V6, 16) // V0:15 aliased to F0:15 + f(REG_V1, 2, REG_V7, 20) // TODO what about V16:31? + f(REG_V8, 2, REG_V14, 24) + f(REG_V9, 2, REG_V15, 28) + + f(REG_AR0, 1, REG_AR15, 48) +} + +const ( + BIG = 32768 - 8 + DISP12 = 4096 + DISP16 = 65536 + DISP20 = 1048576 +) + +const ( + // mark flags + LEAF = 1 << iota + BRANCH + USETMP // generated code of this Prog uses REGTMP +) + +const ( // comments from func aclass in asmz.go + C_NONE = iota + C_REG // general-purpose register (64-bit) + C_FREG // floating-point register (64-bit) + C_VREG // vector register (128-bit) + C_AREG // access register (32-bit) + C_ZCON // constant == 0 + C_SCON // 0 <= constant <= 0x7fff (positive int16) + C_UCON // constant & 0xffff == 0 (int16 or uint16) + C_ADDCON // 0 > constant >= -0x8000 (negative int16) + C_ANDCON // constant <= 0xffff + C_LCON // constant (int32 or uint32) + C_DCON // constant (int64 or uint64) + C_SACON // computed address, 16-bit displacement, possibly SP-relative + C_LACON // computed address, 32-bit displacement, possibly SP-relative + C_DACON // computed address, 64-bit displacement? + C_SBRA // short branch + C_LBRA // long branch + C_SAUTO // short auto + C_LAUTO // long auto + C_ZOREG // heap address, register-based, displacement == 0 + C_SOREG // heap address, register-based, int16 displacement + C_LOREG // heap address, register-based, int32 displacement + C_TLS_LE // TLS - local exec model (for executables) + C_TLS_IE // TLS - initial exec model (for shared libraries loaded at program startup) + C_GOK // general address + C_ADDR // relocation for extern or static symbols (loads and stores) + C_SYMADDR // relocation for extern or static symbols (address taking) + C_GOTADDR // GOT slot for a symbol in -dynlink mode + C_TEXTSIZE // text size + C_ANY + C_NCLASS // must be the last +) + +const ( + // integer arithmetic + AADD = obj.ABaseS390X + obj.A_ARCHSPECIFIC + iota + AADDC + AADDE + AADDW + ADIVW + ADIVWU + ADIVD + ADIVDU + AMODW + AMODWU + AMODD + AMODDU + AMULLW + AMULLD + AMULHD + AMULHDU + AMLGR + ASUB + ASUBC + ASUBV + ASUBE + ASUBW + ANEG + ANEGW + + // integer moves + AMOVWBR + AMOVB + AMOVBZ + AMOVH + AMOVHBR + AMOVHZ + AMOVW + AMOVWZ + AMOVD + AMOVDBR + + // conditional moves + AMOVDEQ + AMOVDGE + AMOVDGT + AMOVDLE + AMOVDLT + AMOVDNE + ALOCR + ALOCGR + + // find leftmost one + AFLOGR + + // population count + APOPCNT + + // integer bitwise + AAND + AANDW + AOR + AORW + AXOR + AXORW + ASLW + ASLD + ASRW + ASRAW + ASRD + ASRAD + ARLL + ARLLG + ARNSBG + ARXSBG + AROSBG + ARNSBGT + ARXSBGT + AROSBGT + ARISBG + ARISBGN + ARISBGZ + ARISBGNZ + ARISBHG + ARISBLG + ARISBHGZ + ARISBLGZ + + // floating point + AFABS + AFADD + AFADDS + AFCMPO + AFCMPU + ACEBR + AFDIV + AFDIVS + AFMADD + AFMADDS + AFMOVD + AFMOVS + AFMSUB + AFMSUBS + AFMUL + AFMULS + AFNABS + AFNEG + AFNEGS + ALEDBR + ALDEBR + ALPDFR + ALNDFR + AFSUB + AFSUBS + AFSQRT + AFSQRTS + AFIEBR + AFIDBR + ACPSDR + ALTEBR + ALTDBR + ATCEB + ATCDB + + // move from GPR to FPR and vice versa + ALDGR + ALGDR + + // convert from int32/int64 to float/float64 + ACEFBRA + ACDFBRA + ACEGBRA + ACDGBRA + + // convert from float/float64 to int32/int64 + ACFEBRA + ACFDBRA + ACGEBRA + ACGDBRA + + // convert from uint32/uint64 to float/float64 + ACELFBR + ACDLFBR + ACELGBR + ACDLGBR + + // convert from float/float64 to uint32/uint64 + ACLFEBR + ACLFDBR + ACLGEBR + ACLGDBR + + // compare + ACMP + ACMPU + ACMPW + ACMPWU + + // test under mask + ATMHH + ATMHL + ATMLH + ATMLL + + // insert program mask + AIPM + + // set program mask + ASPM + + // compare and swap + ACS + ACSG + + // serialize + ASYNC + + // branch + ABC + ABCL + ABRC + ABEQ + ABGE + ABGT + ABLE + ABLT + ABLEU + ABLTU + ABNE + ABVC + ABVS + ASYSCALL + + // branch on count + ABRCT + ABRCTG + + // compare and branch + ACRJ + ACGRJ + ACLRJ + ACLGRJ + ACIJ + ACGIJ + ACLIJ + ACLGIJ + ACMPBEQ + ACMPBGE + ACMPBGT + ACMPBLE + ACMPBLT + ACMPBNE + ACMPUBEQ + ACMPUBGE + ACMPUBGT + ACMPUBLE + ACMPUBLT + ACMPUBNE + + // storage-and-storage + AMVC + AMVCIN + ACLC + AXC + AOC + ANC + + // load + AEXRL + ALARL + ALA + ALAY + + // interlocked load and op + ALAA + ALAAG + ALAAL + ALAALG + ALAN + ALANG + ALAX + ALAXG + ALAO + ALAOG + + // load/store multiple + ALMY + ALMG + ASTMY + ASTMG + + // store clock + ASTCK + ASTCKC + ASTCKE + ASTCKF + + // macros + ACLEAR + + // vector + AVA + AVAB + AVAH + AVAF + AVAG + AVAQ + AVACC + AVACCB + AVACCH + AVACCF + AVACCG + AVACCQ + AVAC + AVACQ + AVACCC + AVACCCQ + AVN + AVNC + AVAVG + AVAVGB + AVAVGH + AVAVGF + AVAVGG + AVAVGL + AVAVGLB + AVAVGLH + AVAVGLF + AVAVGLG + AVCKSM + AVCEQ + AVCEQB + AVCEQH + AVCEQF + AVCEQG + AVCEQBS + AVCEQHS + AVCEQFS + AVCEQGS + AVCH + AVCHB + AVCHH + AVCHF + AVCHG + AVCHBS + AVCHHS + AVCHFS + AVCHGS + AVCHL + AVCHLB + AVCHLH + AVCHLF + AVCHLG + AVCHLBS + AVCHLHS + AVCHLFS + AVCHLGS + AVCLZ + AVCLZB + AVCLZH + AVCLZF + AVCLZG + AVCTZ + AVCTZB + AVCTZH + AVCTZF + AVCTZG + AVEC + AVECB + AVECH + AVECF + AVECG + AVECL + AVECLB + AVECLH + AVECLF + AVECLG + AVERIM + AVERIMB + AVERIMH + AVERIMF + AVERIMG + AVERLL + AVERLLB + AVERLLH + AVERLLF + AVERLLG + AVERLLV + AVERLLVB + AVERLLVH + AVERLLVF + AVERLLVG + AVESLV + AVESLVB + AVESLVH + AVESLVF + AVESLVG + AVESL + AVESLB + AVESLH + AVESLF + AVESLG + AVESRA + AVESRAB + AVESRAH + AVESRAF + AVESRAG + AVESRAV + AVESRAVB + AVESRAVH + AVESRAVF + AVESRAVG + AVESRL + AVESRLB + AVESRLH + AVESRLF + AVESRLG + AVESRLV + AVESRLVB + AVESRLVH + AVESRLVF + AVESRLVG + AVX + AVFAE + AVFAEB + AVFAEH + AVFAEF + AVFAEBS + AVFAEHS + AVFAEFS + AVFAEZB + AVFAEZH + AVFAEZF + AVFAEZBS + AVFAEZHS + AVFAEZFS + AVFEE + AVFEEB + AVFEEH + AVFEEF + AVFEEBS + AVFEEHS + AVFEEFS + AVFEEZB + AVFEEZH + AVFEEZF + AVFEEZBS + AVFEEZHS + AVFEEZFS + AVFENE + AVFENEB + AVFENEH + AVFENEF + AVFENEBS + AVFENEHS + AVFENEFS + AVFENEZB + AVFENEZH + AVFENEZF + AVFENEZBS + AVFENEZHS + AVFENEZFS + AVFA + AVFADB + AWFADB + AWFK + AWFKDB + AVFCE + AVFCEDB + AVFCEDBS + AWFCEDB + AWFCEDBS + AVFCH + AVFCHDB + AVFCHDBS + AWFCHDB + AWFCHDBS + AVFCHE + AVFCHEDB + AVFCHEDBS + AWFCHEDB + AWFCHEDBS + AWFC + AWFCDB + AVCDG + AVCDGB + AWCDGB + AVCDLG + AVCDLGB + AWCDLGB + AVCGD + AVCGDB + AWCGDB + AVCLGD + AVCLGDB + AWCLGDB + AVFD + AVFDDB + AWFDDB + AVLDE + AVLDEB + AWLDEB + AVLED + AVLEDB + AWLEDB + AVFM + AVFMDB + AWFMDB + AVFMA + AVFMADB + AWFMADB + AVFMS + AVFMSDB + AWFMSDB + AVFPSO + AVFPSODB + AWFPSODB + AVFLCDB + AWFLCDB + AVFLNDB + AWFLNDB + AVFLPDB + AWFLPDB + AVFSQ + AVFSQDB + AWFSQDB + AVFS + AVFSDB + AWFSDB + AVFTCI + AVFTCIDB + AWFTCIDB + AVGFM + AVGFMB + AVGFMH + AVGFMF + AVGFMG + AVGFMA + AVGFMAB + AVGFMAH + AVGFMAF + AVGFMAG + AVGEF + AVGEG + AVGBM + AVZERO + AVONE + AVGM + AVGMB + AVGMH + AVGMF + AVGMG + AVISTR + AVISTRB + AVISTRH + AVISTRF + AVISTRBS + AVISTRHS + AVISTRFS + AVL + AVLR + AVLREP + AVLREPB + AVLREPH + AVLREPF + AVLREPG + AVLC + AVLCB + AVLCH + AVLCF + AVLCG + AVLEH + AVLEF + AVLEG + AVLEB + AVLEIH + AVLEIF + AVLEIG + AVLEIB + AVFI + AVFIDB + AWFIDB + AVLGV + AVLGVB + AVLGVH + AVLGVF + AVLGVG + AVLLEZ + AVLLEZB + AVLLEZH + AVLLEZF + AVLLEZG + AVLM + AVLP + AVLPB + AVLPH + AVLPF + AVLPG + AVLBB + AVLVG + AVLVGB + AVLVGH + AVLVGF + AVLVGG + AVLVGP + AVLL + AVMX + AVMXB + AVMXH + AVMXF + AVMXG + AVMXL + AVMXLB + AVMXLH + AVMXLF + AVMXLG + AVMRH + AVMRHB + AVMRHH + AVMRHF + AVMRHG + AVMRL + AVMRLB + AVMRLH + AVMRLF + AVMRLG + AVMN + AVMNB + AVMNH + AVMNF + AVMNG + AVMNL + AVMNLB + AVMNLH + AVMNLF + AVMNLG + AVMAE + AVMAEB + AVMAEH + AVMAEF + AVMAH + AVMAHB + AVMAHH + AVMAHF + AVMALE + AVMALEB + AVMALEH + AVMALEF + AVMALH + AVMALHB + AVMALHH + AVMALHF + AVMALO + AVMALOB + AVMALOH + AVMALOF + AVMAL + AVMALB + AVMALHW + AVMALF + AVMAO + AVMAOB + AVMAOH + AVMAOF + AVME + AVMEB + AVMEH + AVMEF + AVMH + AVMHB + AVMHH + AVMHF + AVMLE + AVMLEB + AVMLEH + AVMLEF + AVMLH + AVMLHB + AVMLHH + AVMLHF + AVMLO + AVMLOB + AVMLOH + AVMLOF + AVML + AVMLB + AVMLHW + AVMLF + AVMO + AVMOB + AVMOH + AVMOF + AVNO + AVNOT + AVO + AVPK + AVPKH + AVPKF + AVPKG + AVPKLS + AVPKLSH + AVPKLSF + AVPKLSG + AVPKLSHS + AVPKLSFS + AVPKLSGS + AVPKS + AVPKSH + AVPKSF + AVPKSG + AVPKSHS + AVPKSFS + AVPKSGS + AVPERM + AVPDI + AVPOPCT + AVREP + AVREPB + AVREPH + AVREPF + AVREPG + AVREPI + AVREPIB + AVREPIH + AVREPIF + AVREPIG + AVSCEF + AVSCEG + AVSEL + AVSL + AVSLB + AVSLDB + AVSRA + AVSRAB + AVSRL + AVSRLB + AVSEG + AVSEGB + AVSEGH + AVSEGF + AVST + AVSTEH + AVSTEF + AVSTEG + AVSTEB + AVSTM + AVSTL + AVSTRC + AVSTRCB + AVSTRCH + AVSTRCF + AVSTRCBS + AVSTRCHS + AVSTRCFS + AVSTRCZB + AVSTRCZH + AVSTRCZF + AVSTRCZBS + AVSTRCZHS + AVSTRCZFS + AVS + AVSB + AVSH + AVSF + AVSG + AVSQ + AVSCBI + AVSCBIB + AVSCBIH + AVSCBIF + AVSCBIG + AVSCBIQ + AVSBCBI + AVSBCBIQ + AVSBI + AVSBIQ + AVSUMG + AVSUMGH + AVSUMGF + AVSUMQ + AVSUMQF + AVSUMQG + AVSUM + AVSUMB + AVSUMH + AVTM + AVUPH + AVUPHB + AVUPHH + AVUPHF + AVUPLH + AVUPLHB + AVUPLHH + AVUPLHF + AVUPLL + AVUPLLB + AVUPLLH + AVUPLLF + AVUPL + AVUPLB + AVUPLHW + AVUPLF + AVMSLG + AVMSLEG + AVMSLOG + AVMSLEOG + + ANOPH // NOP + + // binary + ABYTE + AWORD + ADWORD + + // end marker + ALAST + + // aliases + ABR = obj.AJMP + ABL = obj.ACALL +) diff --git a/src/cmd/internal/obj/s390x/anames.go b/src/cmd/internal/obj/s390x/anames.go new file mode 100644 index 0000000..8b2a76b --- /dev/null +++ b/src/cmd/internal/obj/s390x/anames.go @@ -0,0 +1,720 @@ +// Code generated by stringer -i a.out.go -o anames.go -p s390x; DO NOT EDIT. + +package s390x + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "ADD", + "ADDC", + "ADDE", + "ADDW", + "DIVW", + "DIVWU", + "DIVD", + "DIVDU", + "MODW", + "MODWU", + "MODD", + "MODDU", + "MULLW", + "MULLD", + "MULHD", + "MULHDU", + "MLGR", + "SUB", + "SUBC", + "SUBV", + "SUBE", + "SUBW", + "NEG", + "NEGW", + "MOVWBR", + "MOVB", + "MOVBZ", + "MOVH", + "MOVHBR", + "MOVHZ", + "MOVW", + "MOVWZ", + "MOVD", + "MOVDBR", + "MOVDEQ", + "MOVDGE", + "MOVDGT", + "MOVDLE", + "MOVDLT", + "MOVDNE", + "LOCR", + "LOCGR", + "FLOGR", + "POPCNT", + "AND", + "ANDW", + "OR", + "ORW", + "XOR", + "XORW", + "SLW", + "SLD", + "SRW", + "SRAW", + "SRD", + "SRAD", + "RLL", + "RLLG", + "RNSBG", + "RXSBG", + "ROSBG", + "RNSBGT", + "RXSBGT", + "ROSBGT", + "RISBG", + "RISBGN", + "RISBGZ", + "RISBGNZ", + "RISBHG", + "RISBLG", + "RISBHGZ", + "RISBLGZ", + "FABS", + "FADD", + "FADDS", + "FCMPO", + "FCMPU", + "CEBR", + "FDIV", + "FDIVS", + "FMADD", + "FMADDS", + "FMOVD", + "FMOVS", + "FMSUB", + "FMSUBS", + "FMUL", + "FMULS", + "FNABS", + "FNEG", + "FNEGS", + "LEDBR", + "LDEBR", + "LPDFR", + "LNDFR", + "FSUB", + "FSUBS", + "FSQRT", + "FSQRTS", + "FIEBR", + "FIDBR", + "CPSDR", + "LTEBR", + "LTDBR", + "TCEB", + "TCDB", + "LDGR", + "LGDR", + "CEFBRA", + "CDFBRA", + "CEGBRA", + "CDGBRA", + "CFEBRA", + "CFDBRA", + "CGEBRA", + "CGDBRA", + "CELFBR", + "CDLFBR", + "CELGBR", + "CDLGBR", + "CLFEBR", + "CLFDBR", + "CLGEBR", + "CLGDBR", + "CMP", + "CMPU", + "CMPW", + "CMPWU", + "TMHH", + "TMHL", + "TMLH", + "TMLL", + "IPM", + "SPM", + "CS", + "CSG", + "SYNC", + "BC", + "BCL", + "BRC", + "BEQ", + "BGE", + "BGT", + "BLE", + "BLT", + "BLEU", + "BLTU", + "BNE", + "BVC", + "BVS", + "SYSCALL", + "BRCT", + "BRCTG", + "CRJ", + "CGRJ", + "CLRJ", + "CLGRJ", + "CIJ", + "CGIJ", + "CLIJ", + "CLGIJ", + "CMPBEQ", + "CMPBGE", + "CMPBGT", + "CMPBLE", + "CMPBLT", + "CMPBNE", + "CMPUBEQ", + "CMPUBGE", + "CMPUBGT", + "CMPUBLE", + "CMPUBLT", + "CMPUBNE", + "MVC", + "MVCIN", + "CLC", + "XC", + "OC", + "NC", + "EXRL", + "LARL", + "LA", + "LAY", + "LAA", + "LAAG", + "LAAL", + "LAALG", + "LAN", + "LANG", + "LAX", + "LAXG", + "LAO", + "LAOG", + "LMY", + "LMG", + "STMY", + "STMG", + "STCK", + "STCKC", + "STCKE", + "STCKF", + "CLEAR", + "VA", + "VAB", + "VAH", + "VAF", + "VAG", + "VAQ", + "VACC", + "VACCB", + "VACCH", + "VACCF", + "VACCG", + "VACCQ", + "VAC", + "VACQ", + "VACCC", + "VACCCQ", + "VN", + "VNC", + "VAVG", + "VAVGB", + "VAVGH", + "VAVGF", + "VAVGG", + "VAVGL", + "VAVGLB", + "VAVGLH", + "VAVGLF", + "VAVGLG", + "VCKSM", + "VCEQ", + "VCEQB", + "VCEQH", + "VCEQF", + "VCEQG", + "VCEQBS", + "VCEQHS", + "VCEQFS", + "VCEQGS", + "VCH", + "VCHB", + "VCHH", + "VCHF", + "VCHG", + "VCHBS", + "VCHHS", + "VCHFS", + "VCHGS", + "VCHL", + "VCHLB", + "VCHLH", + "VCHLF", + "VCHLG", + "VCHLBS", + "VCHLHS", + "VCHLFS", + "VCHLGS", + "VCLZ", + "VCLZB", + "VCLZH", + "VCLZF", + "VCLZG", + "VCTZ", + "VCTZB", + "VCTZH", + "VCTZF", + "VCTZG", + "VEC", + "VECB", + "VECH", + "VECF", + "VECG", + "VECL", + "VECLB", + "VECLH", + "VECLF", + "VECLG", + "VERIM", + "VERIMB", + "VERIMH", + "VERIMF", + "VERIMG", + "VERLL", + "VERLLB", + "VERLLH", + "VERLLF", + "VERLLG", + "VERLLV", + "VERLLVB", + "VERLLVH", + "VERLLVF", + "VERLLVG", + "VESLV", + "VESLVB", + "VESLVH", + "VESLVF", + "VESLVG", + "VESL", + "VESLB", + "VESLH", + "VESLF", + "VESLG", + "VESRA", + "VESRAB", + "VESRAH", + "VESRAF", + "VESRAG", + "VESRAV", + "VESRAVB", + "VESRAVH", + "VESRAVF", + "VESRAVG", + "VESRL", + "VESRLB", + "VESRLH", + "VESRLF", + "VESRLG", + "VESRLV", + "VESRLVB", + "VESRLVH", + "VESRLVF", + "VESRLVG", + "VX", + "VFAE", + "VFAEB", + "VFAEH", + "VFAEF", + "VFAEBS", + "VFAEHS", + "VFAEFS", + "VFAEZB", + "VFAEZH", + "VFAEZF", + "VFAEZBS", + "VFAEZHS", + "VFAEZFS", + "VFEE", + "VFEEB", + "VFEEH", + "VFEEF", + "VFEEBS", + "VFEEHS", + "VFEEFS", + "VFEEZB", + "VFEEZH", + "VFEEZF", + "VFEEZBS", + "VFEEZHS", + "VFEEZFS", + "VFENE", + "VFENEB", + "VFENEH", + "VFENEF", + "VFENEBS", + "VFENEHS", + "VFENEFS", + "VFENEZB", + "VFENEZH", + "VFENEZF", + "VFENEZBS", + "VFENEZHS", + "VFENEZFS", + "VFA", + "VFADB", + "WFADB", + "WFK", + "WFKDB", + "VFCE", + "VFCEDB", + "VFCEDBS", + "WFCEDB", + "WFCEDBS", + "VFCH", + "VFCHDB", + "VFCHDBS", + "WFCHDB", + "WFCHDBS", + "VFCHE", + "VFCHEDB", + "VFCHEDBS", + "WFCHEDB", + "WFCHEDBS", + "WFC", + "WFCDB", + "VCDG", + "VCDGB", + "WCDGB", + "VCDLG", + "VCDLGB", + "WCDLGB", + "VCGD", + "VCGDB", + "WCGDB", + "VCLGD", + "VCLGDB", + "WCLGDB", + "VFD", + "VFDDB", + "WFDDB", + "VLDE", + "VLDEB", + "WLDEB", + "VLED", + "VLEDB", + "WLEDB", + "VFM", + "VFMDB", + "WFMDB", + "VFMA", + "VFMADB", + "WFMADB", + "VFMS", + "VFMSDB", + "WFMSDB", + "VFPSO", + "VFPSODB", + "WFPSODB", + "VFLCDB", + "WFLCDB", + "VFLNDB", + "WFLNDB", + "VFLPDB", + "WFLPDB", + "VFSQ", + "VFSQDB", + "WFSQDB", + "VFS", + "VFSDB", + "WFSDB", + "VFTCI", + "VFTCIDB", + "WFTCIDB", + "VGFM", + "VGFMB", + "VGFMH", + "VGFMF", + "VGFMG", + "VGFMA", + "VGFMAB", + "VGFMAH", + "VGFMAF", + "VGFMAG", + "VGEF", + "VGEG", + "VGBM", + "VZERO", + "VONE", + "VGM", + "VGMB", + "VGMH", + "VGMF", + "VGMG", + "VISTR", + "VISTRB", + "VISTRH", + "VISTRF", + "VISTRBS", + "VISTRHS", + "VISTRFS", + "VL", + "VLR", + "VLREP", + "VLREPB", + "VLREPH", + "VLREPF", + "VLREPG", + "VLC", + "VLCB", + "VLCH", + "VLCF", + "VLCG", + "VLEH", + "VLEF", + "VLEG", + "VLEB", + "VLEIH", + "VLEIF", + "VLEIG", + "VLEIB", + "VFI", + "VFIDB", + "WFIDB", + "VLGV", + "VLGVB", + "VLGVH", + "VLGVF", + "VLGVG", + "VLLEZ", + "VLLEZB", + "VLLEZH", + "VLLEZF", + "VLLEZG", + "VLM", + "VLP", + "VLPB", + "VLPH", + "VLPF", + "VLPG", + "VLBB", + "VLVG", + "VLVGB", + "VLVGH", + "VLVGF", + "VLVGG", + "VLVGP", + "VLL", + "VMX", + "VMXB", + "VMXH", + "VMXF", + "VMXG", + "VMXL", + "VMXLB", + "VMXLH", + "VMXLF", + "VMXLG", + "VMRH", + "VMRHB", + "VMRHH", + "VMRHF", + "VMRHG", + "VMRL", + "VMRLB", + "VMRLH", + "VMRLF", + "VMRLG", + "VMN", + "VMNB", + "VMNH", + "VMNF", + "VMNG", + "VMNL", + "VMNLB", + "VMNLH", + "VMNLF", + "VMNLG", + "VMAE", + "VMAEB", + "VMAEH", + "VMAEF", + "VMAH", + "VMAHB", + "VMAHH", + "VMAHF", + "VMALE", + "VMALEB", + "VMALEH", + "VMALEF", + "VMALH", + "VMALHB", + "VMALHH", + "VMALHF", + "VMALO", + "VMALOB", + "VMALOH", + "VMALOF", + "VMAL", + "VMALB", + "VMALHW", + "VMALF", + "VMAO", + "VMAOB", + "VMAOH", + "VMAOF", + "VME", + "VMEB", + "VMEH", + "VMEF", + "VMH", + "VMHB", + "VMHH", + "VMHF", + "VMLE", + "VMLEB", + "VMLEH", + "VMLEF", + "VMLH", + "VMLHB", + "VMLHH", + "VMLHF", + "VMLO", + "VMLOB", + "VMLOH", + "VMLOF", + "VML", + "VMLB", + "VMLHW", + "VMLF", + "VMO", + "VMOB", + "VMOH", + "VMOF", + "VNO", + "VNOT", + "VO", + "VPK", + "VPKH", + "VPKF", + "VPKG", + "VPKLS", + "VPKLSH", + "VPKLSF", + "VPKLSG", + "VPKLSHS", + "VPKLSFS", + "VPKLSGS", + "VPKS", + "VPKSH", + "VPKSF", + "VPKSG", + "VPKSHS", + "VPKSFS", + "VPKSGS", + "VPERM", + "VPDI", + "VPOPCT", + "VREP", + "VREPB", + "VREPH", + "VREPF", + "VREPG", + "VREPI", + "VREPIB", + "VREPIH", + "VREPIF", + "VREPIG", + "VSCEF", + "VSCEG", + "VSEL", + "VSL", + "VSLB", + "VSLDB", + "VSRA", + "VSRAB", + "VSRL", + "VSRLB", + "VSEG", + "VSEGB", + "VSEGH", + "VSEGF", + "VST", + "VSTEH", + "VSTEF", + "VSTEG", + "VSTEB", + "VSTM", + "VSTL", + "VSTRC", + "VSTRCB", + "VSTRCH", + "VSTRCF", + "VSTRCBS", + "VSTRCHS", + "VSTRCFS", + "VSTRCZB", + "VSTRCZH", + "VSTRCZF", + "VSTRCZBS", + "VSTRCZHS", + "VSTRCZFS", + "VS", + "VSB", + "VSH", + "VSF", + "VSG", + "VSQ", + "VSCBI", + "VSCBIB", + "VSCBIH", + "VSCBIF", + "VSCBIG", + "VSCBIQ", + "VSBCBI", + "VSBCBIQ", + "VSBI", + "VSBIQ", + "VSUMG", + "VSUMGH", + "VSUMGF", + "VSUMQ", + "VSUMQF", + "VSUMQG", + "VSUM", + "VSUMB", + "VSUMH", + "VTM", + "VUPH", + "VUPHB", + "VUPHH", + "VUPHF", + "VUPLH", + "VUPLHB", + "VUPLHH", + "VUPLHF", + "VUPLL", + "VUPLLB", + "VUPLLH", + "VUPLLF", + "VUPL", + "VUPLB", + "VUPLHW", + "VUPLF", + "VMSLG", + "VMSLEG", + "VMSLOG", + "VMSLEOG", + "NOPH", + "BYTE", + "WORD", + "DWORD", + "LAST", +} diff --git a/src/cmd/internal/obj/s390x/anamesz.go b/src/cmd/internal/obj/s390x/anamesz.go new file mode 100644 index 0000000..9c9b4d5 --- /dev/null +++ b/src/cmd/internal/obj/s390x/anamesz.go @@ -0,0 +1,39 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390x + +var cnamesz = []string{ + "NONE", + "REG", + "FREG", + "VREG", + "AREG", + "ZCON", + "SCON", + "UCON", + "ADDCON", + "ANDCON", + "LCON", + "DCON", + "SACON", + "LACON", + "DACON", + "SBRA", + "LBRA", + "SAUTO", + "LAUTO", + "ZOREG", + "SOREG", + "LOREG", + "TLS_LE", + "TLS_IE", + "GOK", + "ADDR", + "SYMADDR", + "GOTADDR", + "TEXTSIZE", + "ANY", + "NCLASS", +} diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go new file mode 100644 index 0000000..d8a36c4 --- /dev/null +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -0,0 +1,5047 @@ +// Based on cmd/internal/obj/ppc64/asm9.go. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package s390x + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "fmt" + "log" + "math" + "sort" +) + +// ctxtz holds state while assembling a single function. +// Each function gets a fresh ctxtz. +// This allows for multiple functions to be safely concurrently assembled. +type ctxtz struct { + ctxt *obj.Link + newprog obj.ProgAlloc + cursym *obj.LSym + autosize int32 + instoffset int64 + pc int64 +} + +// instruction layout. +const ( + funcAlign = 16 +) + +type Optab struct { + as obj.As // opcode + i uint8 // handler index + a1 uint8 // From + a2 uint8 // Reg + a3 uint8 // RestArgs[0] + a4 uint8 // RestArgs[1] + a5 uint8 // RestArgs[2] + a6 uint8 // To +} + +var optab = []Optab{ + // zero-length instructions + {i: 0, as: obj.ATEXT, a1: C_ADDR, a6: C_TEXTSIZE}, + {i: 0, as: obj.ATEXT, a1: C_ADDR, a3: C_LCON, a6: C_TEXTSIZE}, + {i: 0, as: obj.APCDATA, a1: C_LCON, a6: C_LCON}, + {i: 0, as: obj.AFUNCDATA, a1: C_SCON, a6: C_ADDR}, + {i: 0, as: obj.ANOP}, + {i: 0, as: obj.ANOP, a1: C_SAUTO}, + + // move register + {i: 1, as: AMOVD, a1: C_REG, a6: C_REG}, + {i: 1, as: AMOVB, a1: C_REG, a6: C_REG}, + {i: 1, as: AMOVBZ, a1: C_REG, a6: C_REG}, + {i: 1, as: AMOVW, a1: C_REG, a6: C_REG}, + {i: 1, as: AMOVWZ, a1: C_REG, a6: C_REG}, + {i: 1, as: AFMOVD, a1: C_FREG, a6: C_FREG}, + {i: 1, as: AMOVDBR, a1: C_REG, a6: C_REG}, + + // load constant + {i: 26, as: AMOVD, a1: C_LACON, a6: C_REG}, + {i: 26, as: AMOVW, a1: C_LACON, a6: C_REG}, + {i: 26, as: AMOVWZ, a1: C_LACON, a6: C_REG}, + {i: 3, as: AMOVD, a1: C_DCON, a6: C_REG}, + {i: 3, as: AMOVW, a1: C_DCON, a6: C_REG}, + {i: 3, as: AMOVWZ, a1: C_DCON, a6: C_REG}, + {i: 3, as: AMOVB, a1: C_DCON, a6: C_REG}, + {i: 3, as: AMOVBZ, a1: C_DCON, a6: C_REG}, + + // store constant + {i: 72, as: AMOVD, a1: C_SCON, a6: C_LAUTO}, + {i: 72, as: AMOVD, a1: C_ADDCON, a6: C_LAUTO}, + {i: 72, as: AMOVW, a1: C_SCON, a6: C_LAUTO}, + {i: 72, as: AMOVW, a1: C_ADDCON, a6: C_LAUTO}, + {i: 72, as: AMOVWZ, a1: C_SCON, a6: C_LAUTO}, + {i: 72, as: AMOVWZ, a1: C_ADDCON, a6: C_LAUTO}, + {i: 72, as: AMOVB, a1: C_SCON, a6: C_LAUTO}, + {i: 72, as: AMOVB, a1: C_ADDCON, a6: C_LAUTO}, + {i: 72, as: AMOVBZ, a1: C_SCON, a6: C_LAUTO}, + {i: 72, as: AMOVBZ, a1: C_ADDCON, a6: C_LAUTO}, + {i: 72, as: AMOVD, a1: C_SCON, a6: C_LOREG}, + {i: 72, as: AMOVD, a1: C_ADDCON, a6: C_LOREG}, + {i: 72, as: AMOVW, a1: C_SCON, a6: C_LOREG}, + {i: 72, as: AMOVW, a1: C_ADDCON, a6: C_LOREG}, + {i: 72, as: AMOVWZ, a1: C_SCON, a6: C_LOREG}, + {i: 72, as: AMOVWZ, a1: C_ADDCON, a6: C_LOREG}, + {i: 72, as: AMOVB, a1: C_SCON, a6: C_LOREG}, + {i: 72, as: AMOVB, a1: C_ADDCON, a6: C_LOREG}, + {i: 72, as: AMOVBZ, a1: C_SCON, a6: C_LOREG}, + {i: 72, as: AMOVBZ, a1: C_ADDCON, a6: C_LOREG}, + + // store + {i: 35, as: AMOVD, a1: C_REG, a6: C_LAUTO}, + {i: 35, as: AMOVW, a1: C_REG, a6: C_LAUTO}, + {i: 35, as: AMOVWZ, a1: C_REG, a6: C_LAUTO}, + {i: 35, as: AMOVBZ, a1: C_REG, a6: C_LAUTO}, + {i: 35, as: AMOVB, a1: C_REG, a6: C_LAUTO}, + {i: 35, as: AMOVDBR, a1: C_REG, a6: C_LAUTO}, + {i: 35, as: AMOVHBR, a1: C_REG, a6: C_LAUTO}, + {i: 35, as: AMOVD, a1: C_REG, a6: C_LOREG}, + {i: 35, as: AMOVW, a1: C_REG, a6: C_LOREG}, + {i: 35, as: AMOVWZ, a1: C_REG, a6: C_LOREG}, + {i: 35, as: AMOVBZ, a1: C_REG, a6: C_LOREG}, + {i: 35, as: AMOVB, a1: C_REG, a6: C_LOREG}, + {i: 35, as: AMOVDBR, a1: C_REG, a6: C_LOREG}, + {i: 35, as: AMOVHBR, a1: C_REG, a6: C_LOREG}, + {i: 74, as: AMOVD, a1: C_REG, a6: C_ADDR}, + {i: 74, as: AMOVW, a1: C_REG, a6: C_ADDR}, + {i: 74, as: AMOVWZ, a1: C_REG, a6: C_ADDR}, + {i: 74, as: AMOVBZ, a1: C_REG, a6: C_ADDR}, + {i: 74, as: AMOVB, a1: C_REG, a6: C_ADDR}, + + // load + {i: 36, as: AMOVD, a1: C_LAUTO, a6: C_REG}, + {i: 36, as: AMOVW, a1: C_LAUTO, a6: C_REG}, + {i: 36, as: AMOVWZ, a1: C_LAUTO, a6: C_REG}, + {i: 36, as: AMOVBZ, a1: C_LAUTO, a6: C_REG}, + {i: 36, as: AMOVB, a1: C_LAUTO, a6: C_REG}, + {i: 36, as: AMOVDBR, a1: C_LAUTO, a6: C_REG}, + {i: 36, as: AMOVHBR, a1: C_LAUTO, a6: C_REG}, + {i: 36, as: AMOVD, a1: C_LOREG, a6: C_REG}, + {i: 36, as: AMOVW, a1: C_LOREG, a6: C_REG}, + {i: 36, as: AMOVWZ, a1: C_LOREG, a6: C_REG}, + {i: 36, as: AMOVBZ, a1: C_LOREG, a6: C_REG}, + {i: 36, as: AMOVB, a1: C_LOREG, a6: C_REG}, + {i: 36, as: AMOVDBR, a1: C_LOREG, a6: C_REG}, + {i: 36, as: AMOVHBR, a1: C_LOREG, a6: C_REG}, + {i: 75, as: AMOVD, a1: C_ADDR, a6: C_REG}, + {i: 75, as: AMOVW, a1: C_ADDR, a6: C_REG}, + {i: 75, as: AMOVWZ, a1: C_ADDR, a6: C_REG}, + {i: 75, as: AMOVBZ, a1: C_ADDR, a6: C_REG}, + {i: 75, as: AMOVB, a1: C_ADDR, a6: C_REG}, + + // interlocked load and op + {i: 99, as: ALAAG, a1: C_REG, a2: C_REG, a6: C_LOREG}, + + // integer arithmetic + {i: 2, as: AADD, a1: C_REG, a2: C_REG, a6: C_REG}, + {i: 2, as: AADD, a1: C_REG, a6: C_REG}, + {i: 22, as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG}, + {i: 22, as: AADD, a1: C_LCON, a6: C_REG}, + {i: 12, as: AADD, a1: C_LOREG, a6: C_REG}, + {i: 12, as: AADD, a1: C_LAUTO, a6: C_REG}, + {i: 21, as: ASUB, a1: C_LCON, a2: C_REG, a6: C_REG}, + {i: 21, as: ASUB, a1: C_LCON, a6: C_REG}, + {i: 12, as: ASUB, a1: C_LOREG, a6: C_REG}, + {i: 12, as: ASUB, a1: C_LAUTO, a6: C_REG}, + {i: 4, as: AMULHD, a1: C_REG, a6: C_REG}, + {i: 4, as: AMULHD, a1: C_REG, a2: C_REG, a6: C_REG}, + {i: 62, as: AMLGR, a1: C_REG, a6: C_REG}, + {i: 2, as: ADIVW, a1: C_REG, a2: C_REG, a6: C_REG}, + {i: 2, as: ADIVW, a1: C_REG, a6: C_REG}, + {i: 10, as: ASUB, a1: C_REG, a2: C_REG, a6: C_REG}, + {i: 10, as: ASUB, a1: C_REG, a6: C_REG}, + {i: 47, as: ANEG, a1: C_REG, a6: C_REG}, + {i: 47, as: ANEG, a6: C_REG}, + + // integer logical + {i: 6, as: AAND, a1: C_REG, a2: C_REG, a6: C_REG}, + {i: 6, as: AAND, a1: C_REG, a6: C_REG}, + {i: 23, as: AAND, a1: C_LCON, a6: C_REG}, + {i: 12, as: AAND, a1: C_LOREG, a6: C_REG}, + {i: 12, as: AAND, a1: C_LAUTO, a6: C_REG}, + {i: 6, as: AANDW, a1: C_REG, a2: C_REG, a6: C_REG}, + {i: 6, as: AANDW, a1: C_REG, a6: C_REG}, + {i: 24, as: AANDW, a1: C_LCON, a6: C_REG}, + {i: 12, as: AANDW, a1: C_LOREG, a6: C_REG}, + {i: 12, as: AANDW, a1: C_LAUTO, a6: C_REG}, + {i: 7, as: ASLD, a1: C_REG, a6: C_REG}, + {i: 7, as: ASLD, a1: C_REG, a2: C_REG, a6: C_REG}, + {i: 7, as: ASLD, a1: C_SCON, a2: C_REG, a6: C_REG}, + {i: 7, as: ASLD, a1: C_SCON, a6: C_REG}, + {i: 13, as: ARNSBG, a1: C_SCON, a3: C_SCON, a4: C_SCON, a5: C_REG, a6: C_REG}, + + // compare and swap + {i: 79, as: ACSG, a1: C_REG, a2: C_REG, a6: C_SOREG}, + + // floating point + {i: 32, as: AFADD, a1: C_FREG, a6: C_FREG}, + {i: 33, as: AFABS, a1: C_FREG, a6: C_FREG}, + {i: 33, as: AFABS, a6: C_FREG}, + {i: 34, as: AFMADD, a1: C_FREG, a2: C_FREG, a6: C_FREG}, + {i: 32, as: AFMUL, a1: C_FREG, a6: C_FREG}, + {i: 36, as: AFMOVD, a1: C_LAUTO, a6: C_FREG}, + {i: 36, as: AFMOVD, a1: C_LOREG, a6: C_FREG}, + {i: 75, as: AFMOVD, a1: C_ADDR, a6: C_FREG}, + {i: 35, as: AFMOVD, a1: C_FREG, a6: C_LAUTO}, + {i: 35, as: AFMOVD, a1: C_FREG, a6: C_LOREG}, + {i: 74, as: AFMOVD, a1: C_FREG, a6: C_ADDR}, + {i: 67, as: AFMOVD, a1: C_ZCON, a6: C_FREG}, + {i: 81, as: ALDGR, a1: C_REG, a6: C_FREG}, + {i: 81, as: ALGDR, a1: C_FREG, a6: C_REG}, + {i: 82, as: ACEFBRA, a1: C_REG, a6: C_FREG}, + {i: 83, as: ACFEBRA, a1: C_FREG, a6: C_REG}, + {i: 48, as: AFIEBR, a1: C_SCON, a2: C_FREG, a6: C_FREG}, + {i: 49, as: ACPSDR, a1: C_FREG, a2: C_FREG, a6: C_FREG}, + {i: 50, as: ALTDBR, a1: C_FREG, a6: C_FREG}, + {i: 51, as: ATCDB, a1: C_FREG, a6: C_SCON}, + + // load symbol address (plus offset) + {i: 19, as: AMOVD, a1: C_SYMADDR, a6: C_REG}, + {i: 93, as: AMOVD, a1: C_GOTADDR, a6: C_REG}, + {i: 94, as: AMOVD, a1: C_TLS_LE, a6: C_REG}, + {i: 95, as: AMOVD, a1: C_TLS_IE, a6: C_REG}, + + // system call + {i: 5, as: ASYSCALL}, + {i: 77, as: ASYSCALL, a1: C_SCON}, + + // branch + {i: 16, as: ABEQ, a6: C_SBRA}, + {i: 16, as: ABRC, a1: C_SCON, a6: C_SBRA}, + {i: 11, as: ABR, a6: C_LBRA}, + {i: 16, as: ABC, a1: C_SCON, a2: C_REG, a6: C_LBRA}, + {i: 18, as: ABR, a6: C_REG}, + {i: 18, as: ABR, a1: C_REG, a6: C_REG}, + {i: 15, as: ABR, a6: C_ZOREG}, + {i: 15, as: ABC, a6: C_ZOREG}, + + // compare and branch + {i: 89, as: ACGRJ, a1: C_SCON, a2: C_REG, a3: C_REG, a6: C_SBRA}, + {i: 89, as: ACMPBEQ, a1: C_REG, a2: C_REG, a6: C_SBRA}, + {i: 89, as: ACLGRJ, a1: C_SCON, a2: C_REG, a3: C_REG, a6: C_SBRA}, + {i: 89, as: ACMPUBEQ, a1: C_REG, a2: C_REG, a6: C_SBRA}, + {i: 90, as: ACGIJ, a1: C_SCON, a2: C_REG, a3: C_ADDCON, a6: C_SBRA}, + {i: 90, as: ACGIJ, a1: C_SCON, a2: C_REG, a3: C_SCON, a6: C_SBRA}, + {i: 90, as: ACMPBEQ, a1: C_REG, a3: C_ADDCON, a6: C_SBRA}, + {i: 90, as: ACMPBEQ, a1: C_REG, a3: C_SCON, a6: C_SBRA}, + {i: 90, as: ACLGIJ, a1: C_SCON, a2: C_REG, a3: C_ADDCON, a6: C_SBRA}, + {i: 90, as: ACMPUBEQ, a1: C_REG, a3: C_ANDCON, a6: C_SBRA}, + + // branch on count + {i: 41, as: ABRCT, a1: C_REG, a6: C_SBRA}, + {i: 41, as: ABRCTG, a1: C_REG, a6: C_SBRA}, + + // move on condition + {i: 17, as: AMOVDEQ, a1: C_REG, a6: C_REG}, + + // load on condition + {i: 25, as: ALOCGR, a1: C_SCON, a2: C_REG, a6: C_REG}, + + // find leftmost one + {i: 8, as: AFLOGR, a1: C_REG, a6: C_REG}, + + // population count + {i: 9, as: APOPCNT, a1: C_REG, a6: C_REG}, + + // compare + {i: 70, as: ACMP, a1: C_REG, a6: C_REG}, + {i: 71, as: ACMP, a1: C_REG, a6: C_LCON}, + {i: 70, as: ACMPU, a1: C_REG, a6: C_REG}, + {i: 71, as: ACMPU, a1: C_REG, a6: C_LCON}, + {i: 70, as: AFCMPO, a1: C_FREG, a6: C_FREG}, + {i: 70, as: AFCMPO, a1: C_FREG, a2: C_REG, a6: C_FREG}, + + // test under mask + {i: 91, as: ATMHH, a1: C_REG, a6: C_ANDCON}, + + // insert program mask + {i: 92, as: AIPM, a1: C_REG}, + + // set program mask + {i: 76, as: ASPM, a1: C_REG}, + + // 32-bit access registers + {i: 68, as: AMOVW, a1: C_AREG, a6: C_REG}, + {i: 68, as: AMOVWZ, a1: C_AREG, a6: C_REG}, + {i: 69, as: AMOVW, a1: C_REG, a6: C_AREG}, + {i: 69, as: AMOVWZ, a1: C_REG, a6: C_AREG}, + + // macros + {i: 96, as: ACLEAR, a1: C_LCON, a6: C_LOREG}, + {i: 96, as: ACLEAR, a1: C_LCON, a6: C_LAUTO}, + + // load/store multiple + {i: 97, as: ASTMG, a1: C_REG, a2: C_REG, a6: C_LOREG}, + {i: 97, as: ASTMG, a1: C_REG, a2: C_REG, a6: C_LAUTO}, + {i: 98, as: ALMG, a1: C_LOREG, a2: C_REG, a6: C_REG}, + {i: 98, as: ALMG, a1: C_LAUTO, a2: C_REG, a6: C_REG}, + + // bytes + {i: 40, as: ABYTE, a1: C_SCON}, + {i: 40, as: AWORD, a1: C_LCON}, + {i: 31, as: ADWORD, a1: C_LCON}, + {i: 31, as: ADWORD, a1: C_DCON}, + + // fast synchronization + {i: 80, as: ASYNC}, + + // store clock + {i: 88, as: ASTCK, a6: C_SAUTO}, + {i: 88, as: ASTCK, a6: C_SOREG}, + + // storage and storage + {i: 84, as: AMVC, a1: C_SCON, a3: C_LOREG, a6: C_LOREG}, + {i: 84, as: AMVC, a1: C_SCON, a3: C_LOREG, a6: C_LAUTO}, + {i: 84, as: AMVC, a1: C_SCON, a3: C_LAUTO, a6: C_LAUTO}, + + // address + {i: 85, as: ALARL, a1: C_LCON, a6: C_REG}, + {i: 85, as: ALARL, a1: C_SYMADDR, a6: C_REG}, + {i: 86, as: ALA, a1: C_SOREG, a6: C_REG}, + {i: 86, as: ALA, a1: C_SAUTO, a6: C_REG}, + {i: 87, as: AEXRL, a1: C_SYMADDR, a6: C_REG}, + + // undefined (deliberate illegal instruction) + {i: 78, as: obj.AUNDEF}, + + // 2 byte no-operation + {i: 66, as: ANOPH}, + + // vector instructions + + // VRX store + {i: 100, as: AVST, a1: C_VREG, a6: C_SOREG}, + {i: 100, as: AVST, a1: C_VREG, a6: C_SAUTO}, + {i: 100, as: AVSTEG, a1: C_SCON, a2: C_VREG, a6: C_SOREG}, + {i: 100, as: AVSTEG, a1: C_SCON, a2: C_VREG, a6: C_SAUTO}, + + // VRX load + {i: 101, as: AVL, a1: C_SOREG, a6: C_VREG}, + {i: 101, as: AVL, a1: C_SAUTO, a6: C_VREG}, + {i: 101, as: AVLEG, a1: C_SCON, a3: C_SOREG, a6: C_VREG}, + {i: 101, as: AVLEG, a1: C_SCON, a3: C_SAUTO, a6: C_VREG}, + + // VRV scatter + {i: 102, as: AVSCEG, a1: C_SCON, a2: C_VREG, a6: C_SOREG}, + {i: 102, as: AVSCEG, a1: C_SCON, a2: C_VREG, a6: C_SAUTO}, + + // VRV gather + {i: 103, as: AVGEG, a1: C_SCON, a3: C_SOREG, a6: C_VREG}, + {i: 103, as: AVGEG, a1: C_SCON, a3: C_SAUTO, a6: C_VREG}, + + // VRS element shift/rotate and load gr to/from vr element + {i: 104, as: AVESLG, a1: C_SCON, a2: C_VREG, a6: C_VREG}, + {i: 104, as: AVESLG, a1: C_REG, a2: C_VREG, a6: C_VREG}, + {i: 104, as: AVESLG, a1: C_SCON, a6: C_VREG}, + {i: 104, as: AVESLG, a1: C_REG, a6: C_VREG}, + {i: 104, as: AVLGVG, a1: C_SCON, a2: C_VREG, a6: C_REG}, + {i: 104, as: AVLGVG, a1: C_REG, a2: C_VREG, a6: C_REG}, + {i: 104, as: AVLVGG, a1: C_SCON, a2: C_REG, a6: C_VREG}, + {i: 104, as: AVLVGG, a1: C_REG, a2: C_REG, a6: C_VREG}, + + // VRS store multiple + {i: 105, as: AVSTM, a1: C_VREG, a2: C_VREG, a6: C_SOREG}, + {i: 105, as: AVSTM, a1: C_VREG, a2: C_VREG, a6: C_SAUTO}, + + // VRS load multiple + {i: 106, as: AVLM, a1: C_SOREG, a2: C_VREG, a6: C_VREG}, + {i: 106, as: AVLM, a1: C_SAUTO, a2: C_VREG, a6: C_VREG}, + + // VRS store with length + {i: 107, as: AVSTL, a1: C_REG, a2: C_VREG, a6: C_SOREG}, + {i: 107, as: AVSTL, a1: C_REG, a2: C_VREG, a6: C_SAUTO}, + + // VRS load with length + {i: 108, as: AVLL, a1: C_REG, a3: C_SOREG, a6: C_VREG}, + {i: 108, as: AVLL, a1: C_REG, a3: C_SAUTO, a6: C_VREG}, + + // VRI-a + {i: 109, as: AVGBM, a1: C_ANDCON, a6: C_VREG}, + {i: 109, as: AVZERO, a6: C_VREG}, + {i: 109, as: AVREPIG, a1: C_ADDCON, a6: C_VREG}, + {i: 109, as: AVREPIG, a1: C_SCON, a6: C_VREG}, + {i: 109, as: AVLEIG, a1: C_SCON, a3: C_ADDCON, a6: C_VREG}, + {i: 109, as: AVLEIG, a1: C_SCON, a3: C_SCON, a6: C_VREG}, + + // VRI-b generate mask + {i: 110, as: AVGMG, a1: C_SCON, a3: C_SCON, a6: C_VREG}, + + // VRI-c replicate + {i: 111, as: AVREPG, a1: C_UCON, a2: C_VREG, a6: C_VREG}, + + // VRI-d element rotate and insert under mask and + // shift left double by byte + {i: 112, as: AVERIMG, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG}, + {i: 112, as: AVSLDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG}, + + // VRI-d fp test data class immediate + {i: 113, as: AVFTCIDB, a1: C_SCON, a2: C_VREG, a6: C_VREG}, + + // VRR-a load reg + {i: 114, as: AVLR, a1: C_VREG, a6: C_VREG}, + + // VRR-a compare + {i: 115, as: AVECG, a1: C_VREG, a6: C_VREG}, + + // VRR-b + {i: 117, as: AVCEQG, a1: C_VREG, a2: C_VREG, a6: C_VREG}, + {i: 117, as: AVFAEF, a1: C_VREG, a2: C_VREG, a6: C_VREG}, + {i: 117, as: AVPKSG, a1: C_VREG, a2: C_VREG, a6: C_VREG}, + + // VRR-c + {i: 118, as: AVAQ, a1: C_VREG, a2: C_VREG, a6: C_VREG}, + {i: 118, as: AVAQ, a1: C_VREG, a6: C_VREG}, + {i: 118, as: AVNOT, a1: C_VREG, a6: C_VREG}, + {i: 123, as: AVPDI, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG}, + + // VRR-c shifts + {i: 119, as: AVERLLVG, a1: C_VREG, a2: C_VREG, a6: C_VREG}, + {i: 119, as: AVERLLVG, a1: C_VREG, a6: C_VREG}, + + // VRR-d + {i: 120, as: AVACQ, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG}, + + // VRR-e + {i: 121, as: AVSEL, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG}, + + // VRR-f + {i: 122, as: AVLVGP, a1: C_REG, a2: C_REG, a6: C_VREG}, +} + +var oprange [ALAST & obj.AMask][]Optab + +var xcmp [C_NCLASS][C_NCLASS]bool + +func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on s390x") + ctxt.Retpoline = false // don't keep printing + } + + p := cursym.Func().Text + if p == nil || p.Link == nil { // handle external functions and ELF section symbols + return + } + + if oprange[AORW&obj.AMask] == nil { + ctxt.Diag("s390x ops not initialized, call s390x.buildop first") + } + + c := ctxtz{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)} + + buffer := make([]byte, 0) + changed := true + loop := 0 + nrelocs0 := len(c.cursym.R) + for changed { + if loop > 100 { + c.ctxt.Diag("stuck in spanz loop") + break + } + changed = false + buffer = buffer[:0] + for i := range c.cursym.R[nrelocs0:] { + c.cursym.R[nrelocs0+i] = obj.Reloc{} + } + c.cursym.R = c.cursym.R[:nrelocs0] // preserve marker relocations generated by the compiler + for p := c.cursym.Func().Text; p != nil; p = p.Link { + pc := int64(len(buffer)) + if pc != p.Pc { + changed = true + } + p.Pc = pc + c.pc = p.Pc + c.asmout(p, &buffer) + if pc == int64(len(buffer)) { + switch p.As { + case obj.ANOP, obj.AFUNCDATA, obj.APCDATA, obj.ATEXT: + // ok + default: + c.ctxt.Diag("zero-width instruction\n%v", p) + } + } + } + loop++ + } + + c.cursym.Size = int64(len(buffer)) + if c.cursym.Size%funcAlign != 0 { + c.cursym.Size += funcAlign - (c.cursym.Size % funcAlign) + } + c.cursym.Grow(c.cursym.Size) + copy(c.cursym.P, buffer) + + // Mark nonpreemptible instruction sequences. + // We use REGTMP as a scratch register during call injection, + // so instruction sequences that use REGTMP are unsafe to + // preempt asynchronously. + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, nil) +} + +// Return whether p is an unsafe point. +func (c *ctxtz) isUnsafePoint(p *obj.Prog) bool { + if p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP { + return true + } + for _, a := range p.RestArgs { + if a.Reg == REGTMP { + return true + } + } + return p.Mark&USETMP != 0 +} + +func isint32(v int64) bool { + return int64(int32(v)) == v +} + +func isuint32(v uint64) bool { + return uint64(uint32(v)) == v +} + +func (c *ctxtz) aclass(a *obj.Addr) int { + switch a.Type { + case obj.TYPE_NONE: + return C_NONE + + case obj.TYPE_REG: + if REG_R0 <= a.Reg && a.Reg <= REG_R15 { + return C_REG + } + if REG_F0 <= a.Reg && a.Reg <= REG_F15 { + return C_FREG + } + if REG_AR0 <= a.Reg && a.Reg <= REG_AR15 { + return C_AREG + } + if REG_V0 <= a.Reg && a.Reg <= REG_V31 { + return C_VREG + } + return C_GOK + + case obj.TYPE_MEM: + switch a.Name { + case obj.NAME_EXTERN, + obj.NAME_STATIC: + if a.Sym == nil { + // must have a symbol + break + } + c.instoffset = a.Offset + if a.Sym.Type == objabi.STLSBSS { + if c.ctxt.Flag_shared { + return C_TLS_IE // initial exec model + } + return C_TLS_LE // local exec model + } + return C_ADDR + + case obj.NAME_GOTREF: + return C_GOTADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SAUTO + } + return C_LAUTO + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SAUTO + } + return C_LAUTO + + case obj.NAME_NONE: + c.instoffset = a.Offset + if c.instoffset == 0 { + return C_ZOREG + } + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SOREG + } + return C_LOREG + } + + return C_GOK + + case obj.TYPE_TEXTSIZE: + return C_TEXTSIZE + + case obj.TYPE_FCONST: + if f64, ok := a.Val.(float64); ok && math.Float64bits(f64) == 0 { + return C_ZCON + } + c.ctxt.Diag("cannot handle the floating point constant %v", a.Val) + + case obj.TYPE_CONST, + obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Reg != 0 { + if -BIG <= c.instoffset && c.instoffset <= BIG { + return C_SACON + } + if isint32(c.instoffset) { + return C_LACON + } + return C_DACON + } + + case obj.NAME_EXTERN, + obj.NAME_STATIC: + s := a.Sym + if s == nil { + return C_GOK + } + c.instoffset = a.Offset + + return C_SYMADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SACON + } + return C_LACON + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize + if c.instoffset >= -BIG && c.instoffset < BIG { + return C_SACON + } + return C_LACON + + default: + return C_GOK + } + + if c.instoffset == 0 { + return C_ZCON + } + if c.instoffset >= 0 { + if c.instoffset <= 0x7fff { + return C_SCON + } + if c.instoffset <= 0xffff { + return C_ANDCON + } + if c.instoffset&0xffff == 0 && isuint32(uint64(c.instoffset)) { /* && (instoffset & (1<<31)) == 0) */ + return C_UCON + } + if isint32(c.instoffset) || isuint32(uint64(c.instoffset)) { + return C_LCON + } + return C_DCON + } + + if c.instoffset >= -0x8000 { + return C_ADDCON + } + if c.instoffset&0xffff == 0 && isint32(c.instoffset) { + return C_UCON + } + if isint32(c.instoffset) { + return C_LCON + } + return C_DCON + + case obj.TYPE_BRANCH: + return C_SBRA + } + + return C_GOK +} + +func (c *ctxtz) oplook(p *obj.Prog) *Optab { + // Return cached optab entry if available. + if p.Optab != 0 { + return &optab[p.Optab-1] + } + if len(p.RestArgs) > 3 { + c.ctxt.Diag("too many RestArgs: got %v, maximum is 3\n", len(p.RestArgs)) + return nil + } + + // Initialize classes for all arguments. + p.From.Class = int8(c.aclass(&p.From) + 1) + p.To.Class = int8(c.aclass(&p.To) + 1) + for i := range p.RestArgs { + p.RestArgs[i].Addr.Class = int8(c.aclass(&p.RestArgs[i].Addr) + 1) + } + + // Mirrors the argument list in Optab. + args := [...]int8{ + p.From.Class - 1, + C_NONE, // p.Reg + C_NONE, // p.RestArgs[0] + C_NONE, // p.RestArgs[1] + C_NONE, // p.RestArgs[2] + p.To.Class - 1, + } + // Fill in argument class for p.Reg. + switch { + case REG_R0 <= p.Reg && p.Reg <= REG_R15: + args[1] = C_REG + case REG_V0 <= p.Reg && p.Reg <= REG_V31: + args[1] = C_VREG + case REG_F0 <= p.Reg && p.Reg <= REG_F15: + args[1] = C_FREG + case REG_AR0 <= p.Reg && p.Reg <= REG_AR15: + args[1] = C_AREG + } + // Fill in argument classes for p.RestArgs. + for i, a := range p.RestArgs { + args[2+i] = a.Class - 1 + } + + // Lookup op in optab. + ops := oprange[p.As&obj.AMask] + cmp := [len(args)]*[C_NCLASS]bool{} + for i := range cmp { + cmp[i] = &xcmp[args[i]] + } + for i := range ops { + op := &ops[i] + if cmp[0][op.a1] && cmp[1][op.a2] && + cmp[2][op.a3] && cmp[3][op.a4] && + cmp[4][op.a5] && cmp[5][op.a6] { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } + } + + // Cannot find a case; abort. + s := "" + for _, a := range args { + s += fmt.Sprintf(" %v", DRconv(int(a))) + } + c.ctxt.Diag("illegal combination %v%v\n", p.As, s) + c.ctxt.Diag("prog: %v\n", p) + return nil +} + +func cmp(a int, b int) bool { + if a == b { + return true + } + switch a { + case C_DCON: + if b == C_LCON { + return true + } + fallthrough + case C_LCON: + if b == C_ZCON || b == C_SCON || b == C_UCON || b == C_ADDCON || b == C_ANDCON { + return true + } + + case C_ADDCON: + if b == C_ZCON || b == C_SCON { + return true + } + + case C_ANDCON: + if b == C_ZCON || b == C_SCON { + return true + } + + case C_UCON: + if b == C_ZCON || b == C_SCON { + return true + } + + case C_SCON: + if b == C_ZCON { + return true + } + + case C_LACON: + if b == C_SACON { + return true + } + + case C_LBRA: + if b == C_SBRA { + return true + } + + case C_LAUTO: + if b == C_SAUTO { + return true + } + + case C_LOREG: + if b == C_ZOREG || b == C_SOREG { + return true + } + + case C_SOREG: + if b == C_ZOREG { + return true + } + + case C_ANY: + return true + } + + return false +} + +type ocmp []Optab + +func (x ocmp) Len() int { + return len(x) +} + +func (x ocmp) Swap(i, j int) { + x[i], x[j] = x[j], x[i] +} + +func (x ocmp) Less(i, j int) bool { + p1 := &x[i] + p2 := &x[j] + n := int(p1.as) - int(p2.as) + if n != 0 { + return n < 0 + } + n = int(p1.a1) - int(p2.a1) + if n != 0 { + return n < 0 + } + n = int(p1.a2) - int(p2.a2) + if n != 0 { + return n < 0 + } + n = int(p1.a3) - int(p2.a3) + if n != 0 { + return n < 0 + } + n = int(p1.a4) - int(p2.a4) + if n != 0 { + return n < 0 + } + return false +} +func opset(a, b obj.As) { + oprange[a&obj.AMask] = oprange[b&obj.AMask] +} + +func buildop(ctxt *obj.Link) { + if oprange[AORW&obj.AMask] != nil { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + for i := 0; i < C_NCLASS; i++ { + for n := 0; n < C_NCLASS; n++ { + if cmp(n, i) { + xcmp[i][n] = true + } + } + } + sort.Sort(ocmp(optab)) + for i := 0; i < len(optab); i++ { + r := optab[i].as + start := i + for ; i+1 < len(optab); i++ { + if optab[i+1].as != r { + break + } + } + oprange[r&obj.AMask] = optab[start : i+1] + + // opset() aliases optab ranges for similar instructions, to reduce the number of optabs in the array. + // oprange[] is used by oplook() to find the Optab entry that applies to a given Prog. + switch r { + case AADD: + opset(AADDC, r) + opset(AADDW, r) + opset(AADDE, r) + opset(AMULLD, r) + opset(AMULLW, r) + case ADIVW: + opset(ADIVD, r) + opset(ADIVDU, r) + opset(ADIVWU, r) + opset(AMODD, r) + opset(AMODDU, r) + opset(AMODW, r) + opset(AMODWU, r) + case AMULHD: + opset(AMULHDU, r) + case AMOVBZ: + opset(AMOVH, r) + opset(AMOVHZ, r) + case ALA: + opset(ALAY, r) + case AMVC: + opset(AMVCIN, r) + opset(ACLC, r) + opset(AXC, r) + opset(AOC, r) + opset(ANC, r) + case ASTCK: + opset(ASTCKC, r) + opset(ASTCKE, r) + opset(ASTCKF, r) + case ALAAG: + opset(ALAA, r) + opset(ALAAL, r) + opset(ALAALG, r) + opset(ALAN, r) + opset(ALANG, r) + opset(ALAX, r) + opset(ALAXG, r) + opset(ALAO, r) + opset(ALAOG, r) + case ASTMG: + opset(ASTMY, r) + case ALMG: + opset(ALMY, r) + case ABEQ: + opset(ABGE, r) + opset(ABGT, r) + opset(ABLE, r) + opset(ABLT, r) + opset(ABNE, r) + opset(ABVC, r) + opset(ABVS, r) + opset(ABLEU, r) + opset(ABLTU, r) + case ABR: + opset(ABL, r) + case ABC: + opset(ABCL, r) + case AFABS: + opset(AFNABS, r) + opset(ALPDFR, r) + opset(ALNDFR, r) + opset(AFNEG, r) + opset(AFNEGS, r) + opset(ALEDBR, r) + opset(ALDEBR, r) + opset(AFSQRT, r) + opset(AFSQRTS, r) + case AFADD: + opset(AFADDS, r) + opset(AFDIV, r) + opset(AFDIVS, r) + opset(AFSUB, r) + opset(AFSUBS, r) + case AFMADD: + opset(AFMADDS, r) + opset(AFMSUB, r) + opset(AFMSUBS, r) + case AFMUL: + opset(AFMULS, r) + case AFCMPO: + opset(AFCMPU, r) + opset(ACEBR, r) + case AAND: + opset(AOR, r) + opset(AXOR, r) + case AANDW: + opset(AORW, r) + opset(AXORW, r) + case ASLD: + opset(ASRD, r) + opset(ASLW, r) + opset(ASRW, r) + opset(ASRAD, r) + opset(ASRAW, r) + opset(ARLL, r) + opset(ARLLG, r) + case ARNSBG: + opset(ARXSBG, r) + opset(AROSBG, r) + opset(ARNSBGT, r) + opset(ARXSBGT, r) + opset(AROSBGT, r) + opset(ARISBG, r) + opset(ARISBGN, r) + opset(ARISBGZ, r) + opset(ARISBGNZ, r) + opset(ARISBHG, r) + opset(ARISBLG, r) + opset(ARISBHGZ, r) + opset(ARISBLGZ, r) + case ACSG: + opset(ACS, r) + case ASUB: + opset(ASUBC, r) + opset(ASUBE, r) + opset(ASUBW, r) + case ANEG: + opset(ANEGW, r) + case AFMOVD: + opset(AFMOVS, r) + case AMOVDBR: + opset(AMOVWBR, r) + case ACMP: + opset(ACMPW, r) + case ACMPU: + opset(ACMPWU, r) + case ATMHH: + opset(ATMHL, r) + opset(ATMLH, r) + opset(ATMLL, r) + case ACEFBRA: + opset(ACDFBRA, r) + opset(ACEGBRA, r) + opset(ACDGBRA, r) + opset(ACELFBR, r) + opset(ACDLFBR, r) + opset(ACELGBR, r) + opset(ACDLGBR, r) + case ACFEBRA: + opset(ACFDBRA, r) + opset(ACGEBRA, r) + opset(ACGDBRA, r) + opset(ACLFEBR, r) + opset(ACLFDBR, r) + opset(ACLGEBR, r) + opset(ACLGDBR, r) + case AFIEBR: + opset(AFIDBR, r) + case ACMPBEQ: + opset(ACMPBGE, r) + opset(ACMPBGT, r) + opset(ACMPBLE, r) + opset(ACMPBLT, r) + opset(ACMPBNE, r) + case ACMPUBEQ: + opset(ACMPUBGE, r) + opset(ACMPUBGT, r) + opset(ACMPUBLE, r) + opset(ACMPUBLT, r) + opset(ACMPUBNE, r) + case ACGRJ: + opset(ACRJ, r) + case ACLGRJ: + opset(ACLRJ, r) + case ACGIJ: + opset(ACIJ, r) + case ACLGIJ: + opset(ACLIJ, r) + case AMOVDEQ: + opset(AMOVDGE, r) + opset(AMOVDGT, r) + opset(AMOVDLE, r) + opset(AMOVDLT, r) + opset(AMOVDNE, r) + case ALOCGR: + opset(ALOCR, r) + case ALTDBR: + opset(ALTEBR, r) + case ATCDB: + opset(ATCEB, r) + case AVL: + opset(AVLLEZB, r) + opset(AVLLEZH, r) + opset(AVLLEZF, r) + opset(AVLLEZG, r) + opset(AVLREPB, r) + opset(AVLREPH, r) + opset(AVLREPF, r) + opset(AVLREPG, r) + case AVLEG: + opset(AVLBB, r) + opset(AVLEB, r) + opset(AVLEH, r) + opset(AVLEF, r) + opset(AVLEG, r) + opset(AVLREP, r) + case AVSTEG: + opset(AVSTEB, r) + opset(AVSTEH, r) + opset(AVSTEF, r) + case AVSCEG: + opset(AVSCEF, r) + case AVGEG: + opset(AVGEF, r) + case AVESLG: + opset(AVESLB, r) + opset(AVESLH, r) + opset(AVESLF, r) + opset(AVERLLB, r) + opset(AVERLLH, r) + opset(AVERLLF, r) + opset(AVERLLG, r) + opset(AVESRAB, r) + opset(AVESRAH, r) + opset(AVESRAF, r) + opset(AVESRAG, r) + opset(AVESRLB, r) + opset(AVESRLH, r) + opset(AVESRLF, r) + opset(AVESRLG, r) + case AVLGVG: + opset(AVLGVB, r) + opset(AVLGVH, r) + opset(AVLGVF, r) + case AVLVGG: + opset(AVLVGB, r) + opset(AVLVGH, r) + opset(AVLVGF, r) + case AVZERO: + opset(AVONE, r) + case AVREPIG: + opset(AVREPIB, r) + opset(AVREPIH, r) + opset(AVREPIF, r) + case AVLEIG: + opset(AVLEIB, r) + opset(AVLEIH, r) + opset(AVLEIF, r) + case AVGMG: + opset(AVGMB, r) + opset(AVGMH, r) + opset(AVGMF, r) + case AVREPG: + opset(AVREPB, r) + opset(AVREPH, r) + opset(AVREPF, r) + case AVERIMG: + opset(AVERIMB, r) + opset(AVERIMH, r) + opset(AVERIMF, r) + case AVFTCIDB: + opset(AWFTCIDB, r) + case AVLR: + opset(AVUPHB, r) + opset(AVUPHH, r) + opset(AVUPHF, r) + opset(AVUPLHB, r) + opset(AVUPLHH, r) + opset(AVUPLHF, r) + opset(AVUPLB, r) + opset(AVUPLHW, r) + opset(AVUPLF, r) + opset(AVUPLLB, r) + opset(AVUPLLH, r) + opset(AVUPLLF, r) + opset(AVCLZB, r) + opset(AVCLZH, r) + opset(AVCLZF, r) + opset(AVCLZG, r) + opset(AVCTZB, r) + opset(AVCTZH, r) + opset(AVCTZF, r) + opset(AVCTZG, r) + opset(AVLDEB, r) + opset(AWLDEB, r) + opset(AVFLCDB, r) + opset(AWFLCDB, r) + opset(AVFLNDB, r) + opset(AWFLNDB, r) + opset(AVFLPDB, r) + opset(AWFLPDB, r) + opset(AVFSQDB, r) + opset(AWFSQDB, r) + opset(AVISTRB, r) + opset(AVISTRH, r) + opset(AVISTRF, r) + opset(AVISTRBS, r) + opset(AVISTRHS, r) + opset(AVISTRFS, r) + opset(AVLCB, r) + opset(AVLCH, r) + opset(AVLCF, r) + opset(AVLCG, r) + opset(AVLPB, r) + opset(AVLPH, r) + opset(AVLPF, r) + opset(AVLPG, r) + opset(AVPOPCT, r) + opset(AVSEGB, r) + opset(AVSEGH, r) + opset(AVSEGF, r) + case AVECG: + opset(AVECB, r) + opset(AVECH, r) + opset(AVECF, r) + opset(AVECLB, r) + opset(AVECLH, r) + opset(AVECLF, r) + opset(AVECLG, r) + opset(AWFCDB, r) + opset(AWFKDB, r) + case AVCEQG: + opset(AVCEQB, r) + opset(AVCEQH, r) + opset(AVCEQF, r) + opset(AVCEQBS, r) + opset(AVCEQHS, r) + opset(AVCEQFS, r) + opset(AVCEQGS, r) + opset(AVCHB, r) + opset(AVCHH, r) + opset(AVCHF, r) + opset(AVCHG, r) + opset(AVCHBS, r) + opset(AVCHHS, r) + opset(AVCHFS, r) + opset(AVCHGS, r) + opset(AVCHLB, r) + opset(AVCHLH, r) + opset(AVCHLF, r) + opset(AVCHLG, r) + opset(AVCHLBS, r) + opset(AVCHLHS, r) + opset(AVCHLFS, r) + opset(AVCHLGS, r) + case AVFAEF: + opset(AVFAEB, r) + opset(AVFAEH, r) + opset(AVFAEBS, r) + opset(AVFAEHS, r) + opset(AVFAEFS, r) + opset(AVFAEZB, r) + opset(AVFAEZH, r) + opset(AVFAEZF, r) + opset(AVFAEZBS, r) + opset(AVFAEZHS, r) + opset(AVFAEZFS, r) + opset(AVFEEB, r) + opset(AVFEEH, r) + opset(AVFEEF, r) + opset(AVFEEBS, r) + opset(AVFEEHS, r) + opset(AVFEEFS, r) + opset(AVFEEZB, r) + opset(AVFEEZH, r) + opset(AVFEEZF, r) + opset(AVFEEZBS, r) + opset(AVFEEZHS, r) + opset(AVFEEZFS, r) + opset(AVFENEB, r) + opset(AVFENEH, r) + opset(AVFENEF, r) + opset(AVFENEBS, r) + opset(AVFENEHS, r) + opset(AVFENEFS, r) + opset(AVFENEZB, r) + opset(AVFENEZH, r) + opset(AVFENEZF, r) + opset(AVFENEZBS, r) + opset(AVFENEZHS, r) + opset(AVFENEZFS, r) + case AVPKSG: + opset(AVPKSH, r) + opset(AVPKSF, r) + opset(AVPKSHS, r) + opset(AVPKSFS, r) + opset(AVPKSGS, r) + opset(AVPKLSH, r) + opset(AVPKLSF, r) + opset(AVPKLSG, r) + opset(AVPKLSHS, r) + opset(AVPKLSFS, r) + opset(AVPKLSGS, r) + case AVAQ: + opset(AVAB, r) + opset(AVAH, r) + opset(AVAF, r) + opset(AVAG, r) + opset(AVACCB, r) + opset(AVACCH, r) + opset(AVACCF, r) + opset(AVACCG, r) + opset(AVACCQ, r) + opset(AVN, r) + opset(AVNC, r) + opset(AVAVGB, r) + opset(AVAVGH, r) + opset(AVAVGF, r) + opset(AVAVGG, r) + opset(AVAVGLB, r) + opset(AVAVGLH, r) + opset(AVAVGLF, r) + opset(AVAVGLG, r) + opset(AVCKSM, r) + opset(AVX, r) + opset(AVFADB, r) + opset(AWFADB, r) + opset(AVFCEDB, r) + opset(AVFCEDBS, r) + opset(AWFCEDB, r) + opset(AWFCEDBS, r) + opset(AVFCHDB, r) + opset(AVFCHDBS, r) + opset(AWFCHDB, r) + opset(AWFCHDBS, r) + opset(AVFCHEDB, r) + opset(AVFCHEDBS, r) + opset(AWFCHEDB, r) + opset(AWFCHEDBS, r) + opset(AVFMDB, r) + opset(AWFMDB, r) + opset(AVGFMB, r) + opset(AVGFMH, r) + opset(AVGFMF, r) + opset(AVGFMG, r) + opset(AVMXB, r) + opset(AVMXH, r) + opset(AVMXF, r) + opset(AVMXG, r) + opset(AVMXLB, r) + opset(AVMXLH, r) + opset(AVMXLF, r) + opset(AVMXLG, r) + opset(AVMNB, r) + opset(AVMNH, r) + opset(AVMNF, r) + opset(AVMNG, r) + opset(AVMNLB, r) + opset(AVMNLH, r) + opset(AVMNLF, r) + opset(AVMNLG, r) + opset(AVMRHB, r) + opset(AVMRHH, r) + opset(AVMRHF, r) + opset(AVMRHG, r) + opset(AVMRLB, r) + opset(AVMRLH, r) + opset(AVMRLF, r) + opset(AVMRLG, r) + opset(AVMEB, r) + opset(AVMEH, r) + opset(AVMEF, r) + opset(AVMLEB, r) + opset(AVMLEH, r) + opset(AVMLEF, r) + opset(AVMOB, r) + opset(AVMOH, r) + opset(AVMOF, r) + opset(AVMLOB, r) + opset(AVMLOH, r) + opset(AVMLOF, r) + opset(AVMHB, r) + opset(AVMHH, r) + opset(AVMHF, r) + opset(AVMLHB, r) + opset(AVMLHH, r) + opset(AVMLHF, r) + opset(AVMLH, r) + opset(AVMLHW, r) + opset(AVMLF, r) + opset(AVNO, r) + opset(AVO, r) + opset(AVPKH, r) + opset(AVPKF, r) + opset(AVPKG, r) + opset(AVSUMGH, r) + opset(AVSUMGF, r) + opset(AVSUMQF, r) + opset(AVSUMQG, r) + opset(AVSUMB, r) + opset(AVSUMH, r) + case AVERLLVG: + opset(AVERLLVB, r) + opset(AVERLLVH, r) + opset(AVERLLVF, r) + opset(AVESLVB, r) + opset(AVESLVH, r) + opset(AVESLVF, r) + opset(AVESLVG, r) + opset(AVESRAVB, r) + opset(AVESRAVH, r) + opset(AVESRAVF, r) + opset(AVESRAVG, r) + opset(AVESRLVB, r) + opset(AVESRLVH, r) + opset(AVESRLVF, r) + opset(AVESRLVG, r) + opset(AVFDDB, r) + opset(AWFDDB, r) + opset(AVFSDB, r) + opset(AWFSDB, r) + opset(AVSL, r) + opset(AVSLB, r) + opset(AVSRA, r) + opset(AVSRAB, r) + opset(AVSRL, r) + opset(AVSRLB, r) + opset(AVSB, r) + opset(AVSH, r) + opset(AVSF, r) + opset(AVSG, r) + opset(AVSQ, r) + opset(AVSCBIB, r) + opset(AVSCBIH, r) + opset(AVSCBIF, r) + opset(AVSCBIG, r) + opset(AVSCBIQ, r) + case AVACQ: + opset(AVACCCQ, r) + opset(AVGFMAB, r) + opset(AVGFMAH, r) + opset(AVGFMAF, r) + opset(AVGFMAG, r) + opset(AVMALB, r) + opset(AVMALHW, r) + opset(AVMALF, r) + opset(AVMAHB, r) + opset(AVMAHH, r) + opset(AVMAHF, r) + opset(AVMALHB, r) + opset(AVMALHH, r) + opset(AVMALHF, r) + opset(AVMAEB, r) + opset(AVMAEH, r) + opset(AVMAEF, r) + opset(AVMALEB, r) + opset(AVMALEH, r) + opset(AVMALEF, r) + opset(AVMAOB, r) + opset(AVMAOH, r) + opset(AVMAOF, r) + opset(AVMALOB, r) + opset(AVMALOH, r) + opset(AVMALOF, r) + opset(AVSTRCB, r) + opset(AVSTRCH, r) + opset(AVSTRCF, r) + opset(AVSTRCBS, r) + opset(AVSTRCHS, r) + opset(AVSTRCFS, r) + opset(AVSTRCZB, r) + opset(AVSTRCZH, r) + opset(AVSTRCZF, r) + opset(AVSTRCZBS, r) + opset(AVSTRCZHS, r) + opset(AVSTRCZFS, r) + opset(AVSBCBIQ, r) + opset(AVSBIQ, r) + opset(AVMSLG, r) + opset(AVMSLEG, r) + opset(AVMSLOG, r) + opset(AVMSLEOG, r) + case AVSEL: + opset(AVFMADB, r) + opset(AWFMADB, r) + opset(AVFMSDB, r) + opset(AWFMSDB, r) + opset(AVPERM, r) + } + } +} + +const ( + op_A uint32 = 0x5A00 // FORMAT_RX1 ADD (32) + op_AD uint32 = 0x6A00 // FORMAT_RX1 ADD NORMALIZED (long HFP) + op_ADB uint32 = 0xED1A // FORMAT_RXE ADD (long BFP) + op_ADBR uint32 = 0xB31A // FORMAT_RRE ADD (long BFP) + op_ADR uint32 = 0x2A00 // FORMAT_RR ADD NORMALIZED (long HFP) + op_ADTR uint32 = 0xB3D2 // FORMAT_RRF1 ADD (long DFP) + op_ADTRA uint32 = 0xB3D2 // FORMAT_RRF1 ADD (long DFP) + op_AE uint32 = 0x7A00 // FORMAT_RX1 ADD NORMALIZED (short HFP) + op_AEB uint32 = 0xED0A // FORMAT_RXE ADD (short BFP) + op_AEBR uint32 = 0xB30A // FORMAT_RRE ADD (short BFP) + op_AER uint32 = 0x3A00 // FORMAT_RR ADD NORMALIZED (short HFP) + op_AFI uint32 = 0xC209 // FORMAT_RIL1 ADD IMMEDIATE (32) + op_AG uint32 = 0xE308 // FORMAT_RXY1 ADD (64) + op_AGF uint32 = 0xE318 // FORMAT_RXY1 ADD (64<-32) + op_AGFI uint32 = 0xC208 // FORMAT_RIL1 ADD IMMEDIATE (64<-32) + op_AGFR uint32 = 0xB918 // FORMAT_RRE ADD (64<-32) + op_AGHI uint32 = 0xA70B // FORMAT_RI1 ADD HALFWORD IMMEDIATE (64) + op_AGHIK uint32 = 0xECD9 // FORMAT_RIE4 ADD IMMEDIATE (64<-16) + op_AGR uint32 = 0xB908 // FORMAT_RRE ADD (64) + op_AGRK uint32 = 0xB9E8 // FORMAT_RRF1 ADD (64) + op_AGSI uint32 = 0xEB7A // FORMAT_SIY ADD IMMEDIATE (64<-8) + op_AH uint32 = 0x4A00 // FORMAT_RX1 ADD HALFWORD + op_AHHHR uint32 = 0xB9C8 // FORMAT_RRF1 ADD HIGH (32) + op_AHHLR uint32 = 0xB9D8 // FORMAT_RRF1 ADD HIGH (32) + op_AHI uint32 = 0xA70A // FORMAT_RI1 ADD HALFWORD IMMEDIATE (32) + op_AHIK uint32 = 0xECD8 // FORMAT_RIE4 ADD IMMEDIATE (32<-16) + op_AHY uint32 = 0xE37A // FORMAT_RXY1 ADD HALFWORD + op_AIH uint32 = 0xCC08 // FORMAT_RIL1 ADD IMMEDIATE HIGH (32) + op_AL uint32 = 0x5E00 // FORMAT_RX1 ADD LOGICAL (32) + op_ALC uint32 = 0xE398 // FORMAT_RXY1 ADD LOGICAL WITH CARRY (32) + op_ALCG uint32 = 0xE388 // FORMAT_RXY1 ADD LOGICAL WITH CARRY (64) + op_ALCGR uint32 = 0xB988 // FORMAT_RRE ADD LOGICAL WITH CARRY (64) + op_ALCR uint32 = 0xB998 // FORMAT_RRE ADD LOGICAL WITH CARRY (32) + op_ALFI uint32 = 0xC20B // FORMAT_RIL1 ADD LOGICAL IMMEDIATE (32) + op_ALG uint32 = 0xE30A // FORMAT_RXY1 ADD LOGICAL (64) + op_ALGF uint32 = 0xE31A // FORMAT_RXY1 ADD LOGICAL (64<-32) + op_ALGFI uint32 = 0xC20A // FORMAT_RIL1 ADD LOGICAL IMMEDIATE (64<-32) + op_ALGFR uint32 = 0xB91A // FORMAT_RRE ADD LOGICAL (64<-32) + op_ALGHSIK uint32 = 0xECDB // FORMAT_RIE4 ADD LOGICAL WITH SIGNED IMMEDIATE (64<-16) + op_ALGR uint32 = 0xB90A // FORMAT_RRE ADD LOGICAL (64) + op_ALGRK uint32 = 0xB9EA // FORMAT_RRF1 ADD LOGICAL (64) + op_ALGSI uint32 = 0xEB7E // FORMAT_SIY ADD LOGICAL WITH SIGNED IMMEDIATE (64<-8) + op_ALHHHR uint32 = 0xB9CA // FORMAT_RRF1 ADD LOGICAL HIGH (32) + op_ALHHLR uint32 = 0xB9DA // FORMAT_RRF1 ADD LOGICAL HIGH (32) + op_ALHSIK uint32 = 0xECDA // FORMAT_RIE4 ADD LOGICAL WITH SIGNED IMMEDIATE (32<-16) + op_ALR uint32 = 0x1E00 // FORMAT_RR ADD LOGICAL (32) + op_ALRK uint32 = 0xB9FA // FORMAT_RRF1 ADD LOGICAL (32) + op_ALSI uint32 = 0xEB6E // FORMAT_SIY ADD LOGICAL WITH SIGNED IMMEDIATE (32<-8) + op_ALSIH uint32 = 0xCC0A // FORMAT_RIL1 ADD LOGICAL WITH SIGNED IMMEDIATE HIGH (32) + op_ALSIHN uint32 = 0xCC0B // FORMAT_RIL1 ADD LOGICAL WITH SIGNED IMMEDIATE HIGH (32) + op_ALY uint32 = 0xE35E // FORMAT_RXY1 ADD LOGICAL (32) + op_AP uint32 = 0xFA00 // FORMAT_SS2 ADD DECIMAL + op_AR uint32 = 0x1A00 // FORMAT_RR ADD (32) + op_ARK uint32 = 0xB9F8 // FORMAT_RRF1 ADD (32) + op_ASI uint32 = 0xEB6A // FORMAT_SIY ADD IMMEDIATE (32<-8) + op_AU uint32 = 0x7E00 // FORMAT_RX1 ADD UNNORMALIZED (short HFP) + op_AUR uint32 = 0x3E00 // FORMAT_RR ADD UNNORMALIZED (short HFP) + op_AW uint32 = 0x6E00 // FORMAT_RX1 ADD UNNORMALIZED (long HFP) + op_AWR uint32 = 0x2E00 // FORMAT_RR ADD UNNORMALIZED (long HFP) + op_AXBR uint32 = 0xB34A // FORMAT_RRE ADD (extended BFP) + op_AXR uint32 = 0x3600 // FORMAT_RR ADD NORMALIZED (extended HFP) + op_AXTR uint32 = 0xB3DA // FORMAT_RRF1 ADD (extended DFP) + op_AXTRA uint32 = 0xB3DA // FORMAT_RRF1 ADD (extended DFP) + op_AY uint32 = 0xE35A // FORMAT_RXY1 ADD (32) + op_BAKR uint32 = 0xB240 // FORMAT_RRE BRANCH AND STACK + op_BAL uint32 = 0x4500 // FORMAT_RX1 BRANCH AND LINK + op_BALR uint32 = 0x0500 // FORMAT_RR BRANCH AND LINK + op_BAS uint32 = 0x4D00 // FORMAT_RX1 BRANCH AND SAVE + op_BASR uint32 = 0x0D00 // FORMAT_RR BRANCH AND SAVE + op_BASSM uint32 = 0x0C00 // FORMAT_RR BRANCH AND SAVE AND SET MODE + op_BC uint32 = 0x4700 // FORMAT_RX2 BRANCH ON CONDITION + op_BCR uint32 = 0x0700 // FORMAT_RR BRANCH ON CONDITION + op_BCT uint32 = 0x4600 // FORMAT_RX1 BRANCH ON COUNT (32) + op_BCTG uint32 = 0xE346 // FORMAT_RXY1 BRANCH ON COUNT (64) + op_BCTGR uint32 = 0xB946 // FORMAT_RRE BRANCH ON COUNT (64) + op_BCTR uint32 = 0x0600 // FORMAT_RR BRANCH ON COUNT (32) + op_BPP uint32 = 0xC700 // FORMAT_SMI BRANCH PREDICTION PRELOAD + op_BPRP uint32 = 0xC500 // FORMAT_MII BRANCH PREDICTION RELATIVE PRELOAD + op_BRAS uint32 = 0xA705 // FORMAT_RI2 BRANCH RELATIVE AND SAVE + op_BRASL uint32 = 0xC005 // FORMAT_RIL2 BRANCH RELATIVE AND SAVE LONG + op_BRC uint32 = 0xA704 // FORMAT_RI3 BRANCH RELATIVE ON CONDITION + op_BRCL uint32 = 0xC004 // FORMAT_RIL3 BRANCH RELATIVE ON CONDITION LONG + op_BRCT uint32 = 0xA706 // FORMAT_RI2 BRANCH RELATIVE ON COUNT (32) + op_BRCTG uint32 = 0xA707 // FORMAT_RI2 BRANCH RELATIVE ON COUNT (64) + op_BRCTH uint32 = 0xCC06 // FORMAT_RIL2 BRANCH RELATIVE ON COUNT HIGH (32) + op_BRXH uint32 = 0x8400 // FORMAT_RSI BRANCH RELATIVE ON INDEX HIGH (32) + op_BRXHG uint32 = 0xEC44 // FORMAT_RIE5 BRANCH RELATIVE ON INDEX HIGH (64) + op_BRXLE uint32 = 0x8500 // FORMAT_RSI BRANCH RELATIVE ON INDEX LOW OR EQ. (32) + op_BRXLG uint32 = 0xEC45 // FORMAT_RIE5 BRANCH RELATIVE ON INDEX LOW OR EQ. (64) + op_BSA uint32 = 0xB25A // FORMAT_RRE BRANCH AND SET AUTHORITY + op_BSG uint32 = 0xB258 // FORMAT_RRE BRANCH IN SUBSPACE GROUP + op_BSM uint32 = 0x0B00 // FORMAT_RR BRANCH AND SET MODE + op_BXH uint32 = 0x8600 // FORMAT_RS1 BRANCH ON INDEX HIGH (32) + op_BXHG uint32 = 0xEB44 // FORMAT_RSY1 BRANCH ON INDEX HIGH (64) + op_BXLE uint32 = 0x8700 // FORMAT_RS1 BRANCH ON INDEX LOW OR EQUAL (32) + op_BXLEG uint32 = 0xEB45 // FORMAT_RSY1 BRANCH ON INDEX LOW OR EQUAL (64) + op_C uint32 = 0x5900 // FORMAT_RX1 COMPARE (32) + op_CD uint32 = 0x6900 // FORMAT_RX1 COMPARE (long HFP) + op_CDB uint32 = 0xED19 // FORMAT_RXE COMPARE (long BFP) + op_CDBR uint32 = 0xB319 // FORMAT_RRE COMPARE (long BFP) + op_CDFBR uint32 = 0xB395 // FORMAT_RRE CONVERT FROM FIXED (32 to long BFP) + op_CDFBRA uint32 = 0xB395 // FORMAT_RRF5 CONVERT FROM FIXED (32 to long BFP) + op_CDFR uint32 = 0xB3B5 // FORMAT_RRE CONVERT FROM FIXED (32 to long HFP) + op_CDFTR uint32 = 0xB951 // FORMAT_RRE CONVERT FROM FIXED (32 to long DFP) + op_CDGBR uint32 = 0xB3A5 // FORMAT_RRE CONVERT FROM FIXED (64 to long BFP) + op_CDGBRA uint32 = 0xB3A5 // FORMAT_RRF5 CONVERT FROM FIXED (64 to long BFP) + op_CDGR uint32 = 0xB3C5 // FORMAT_RRE CONVERT FROM FIXED (64 to long HFP) + op_CDGTR uint32 = 0xB3F1 // FORMAT_RRE CONVERT FROM FIXED (64 to long DFP) + op_CDGTRA uint32 = 0xB3F1 // FORMAT_RRF5 CONVERT FROM FIXED (64 to long DFP) + op_CDLFBR uint32 = 0xB391 // FORMAT_RRF5 CONVERT FROM LOGICAL (32 to long BFP) + op_CDLFTR uint32 = 0xB953 // FORMAT_RRF5 CONVERT FROM LOGICAL (32 to long DFP) + op_CDLGBR uint32 = 0xB3A1 // FORMAT_RRF5 CONVERT FROM LOGICAL (64 to long BFP) + op_CDLGTR uint32 = 0xB952 // FORMAT_RRF5 CONVERT FROM LOGICAL (64 to long DFP) + op_CDR uint32 = 0x2900 // FORMAT_RR COMPARE (long HFP) + op_CDS uint32 = 0xBB00 // FORMAT_RS1 COMPARE DOUBLE AND SWAP (32) + op_CDSG uint32 = 0xEB3E // FORMAT_RSY1 COMPARE DOUBLE AND SWAP (64) + op_CDSTR uint32 = 0xB3F3 // FORMAT_RRE CONVERT FROM SIGNED PACKED (64 to long DFP) + op_CDSY uint32 = 0xEB31 // FORMAT_RSY1 COMPARE DOUBLE AND SWAP (32) + op_CDTR uint32 = 0xB3E4 // FORMAT_RRE COMPARE (long DFP) + op_CDUTR uint32 = 0xB3F2 // FORMAT_RRE CONVERT FROM UNSIGNED PACKED (64 to long DFP) + op_CDZT uint32 = 0xEDAA // FORMAT_RSL CONVERT FROM ZONED (to long DFP) + op_CE uint32 = 0x7900 // FORMAT_RX1 COMPARE (short HFP) + op_CEB uint32 = 0xED09 // FORMAT_RXE COMPARE (short BFP) + op_CEBR uint32 = 0xB309 // FORMAT_RRE COMPARE (short BFP) + op_CEDTR uint32 = 0xB3F4 // FORMAT_RRE COMPARE BIASED EXPONENT (long DFP) + op_CEFBR uint32 = 0xB394 // FORMAT_RRE CONVERT FROM FIXED (32 to short BFP) + op_CEFBRA uint32 = 0xB394 // FORMAT_RRF5 CONVERT FROM FIXED (32 to short BFP) + op_CEFR uint32 = 0xB3B4 // FORMAT_RRE CONVERT FROM FIXED (32 to short HFP) + op_CEGBR uint32 = 0xB3A4 // FORMAT_RRE CONVERT FROM FIXED (64 to short BFP) + op_CEGBRA uint32 = 0xB3A4 // FORMAT_RRF5 CONVERT FROM FIXED (64 to short BFP) + op_CEGR uint32 = 0xB3C4 // FORMAT_RRE CONVERT FROM FIXED (64 to short HFP) + op_CELFBR uint32 = 0xB390 // FORMAT_RRF5 CONVERT FROM LOGICAL (32 to short BFP) + op_CELGBR uint32 = 0xB3A0 // FORMAT_RRF5 CONVERT FROM LOGICAL (64 to short BFP) + op_CER uint32 = 0x3900 // FORMAT_RR COMPARE (short HFP) + op_CEXTR uint32 = 0xB3FC // FORMAT_RRE COMPARE BIASED EXPONENT (extended DFP) + op_CFC uint32 = 0xB21A // FORMAT_S COMPARE AND FORM CODEWORD + op_CFDBR uint32 = 0xB399 // FORMAT_RRF5 CONVERT TO FIXED (long BFP to 32) + op_CFDBRA uint32 = 0xB399 // FORMAT_RRF5 CONVERT TO FIXED (long BFP to 32) + op_CFDR uint32 = 0xB3B9 // FORMAT_RRF5 CONVERT TO FIXED (long HFP to 32) + op_CFDTR uint32 = 0xB941 // FORMAT_RRF5 CONVERT TO FIXED (long DFP to 32) + op_CFEBR uint32 = 0xB398 // FORMAT_RRF5 CONVERT TO FIXED (short BFP to 32) + op_CFEBRA uint32 = 0xB398 // FORMAT_RRF5 CONVERT TO FIXED (short BFP to 32) + op_CFER uint32 = 0xB3B8 // FORMAT_RRF5 CONVERT TO FIXED (short HFP to 32) + op_CFI uint32 = 0xC20D // FORMAT_RIL1 COMPARE IMMEDIATE (32) + op_CFXBR uint32 = 0xB39A // FORMAT_RRF5 CONVERT TO FIXED (extended BFP to 32) + op_CFXBRA uint32 = 0xB39A // FORMAT_RRF5 CONVERT TO FIXED (extended BFP to 32) + op_CFXR uint32 = 0xB3BA // FORMAT_RRF5 CONVERT TO FIXED (extended HFP to 32) + op_CFXTR uint32 = 0xB949 // FORMAT_RRF5 CONVERT TO FIXED (extended DFP to 32) + op_CG uint32 = 0xE320 // FORMAT_RXY1 COMPARE (64) + op_CGDBR uint32 = 0xB3A9 // FORMAT_RRF5 CONVERT TO FIXED (long BFP to 64) + op_CGDBRA uint32 = 0xB3A9 // FORMAT_RRF5 CONVERT TO FIXED (long BFP to 64) + op_CGDR uint32 = 0xB3C9 // FORMAT_RRF5 CONVERT TO FIXED (long HFP to 64) + op_CGDTR uint32 = 0xB3E1 // FORMAT_RRF5 CONVERT TO FIXED (long DFP to 64) + op_CGDTRA uint32 = 0xB3E1 // FORMAT_RRF5 CONVERT TO FIXED (long DFP to 64) + op_CGEBR uint32 = 0xB3A8 // FORMAT_RRF5 CONVERT TO FIXED (short BFP to 64) + op_CGEBRA uint32 = 0xB3A8 // FORMAT_RRF5 CONVERT TO FIXED (short BFP to 64) + op_CGER uint32 = 0xB3C8 // FORMAT_RRF5 CONVERT TO FIXED (short HFP to 64) + op_CGF uint32 = 0xE330 // FORMAT_RXY1 COMPARE (64<-32) + op_CGFI uint32 = 0xC20C // FORMAT_RIL1 COMPARE IMMEDIATE (64<-32) + op_CGFR uint32 = 0xB930 // FORMAT_RRE COMPARE (64<-32) + op_CGFRL uint32 = 0xC60C // FORMAT_RIL2 COMPARE RELATIVE LONG (64<-32) + op_CGH uint32 = 0xE334 // FORMAT_RXY1 COMPARE HALFWORD (64<-16) + op_CGHI uint32 = 0xA70F // FORMAT_RI1 COMPARE HALFWORD IMMEDIATE (64<-16) + op_CGHRL uint32 = 0xC604 // FORMAT_RIL2 COMPARE HALFWORD RELATIVE LONG (64<-16) + op_CGHSI uint32 = 0xE558 // FORMAT_SIL COMPARE HALFWORD IMMEDIATE (64<-16) + op_CGIB uint32 = 0xECFC // FORMAT_RIS COMPARE IMMEDIATE AND BRANCH (64<-8) + op_CGIJ uint32 = 0xEC7C // FORMAT_RIE3 COMPARE IMMEDIATE AND BRANCH RELATIVE (64<-8) + op_CGIT uint32 = 0xEC70 // FORMAT_RIE1 COMPARE IMMEDIATE AND TRAP (64<-16) + op_CGR uint32 = 0xB920 // FORMAT_RRE COMPARE (64) + op_CGRB uint32 = 0xECE4 // FORMAT_RRS COMPARE AND BRANCH (64) + op_CGRJ uint32 = 0xEC64 // FORMAT_RIE2 COMPARE AND BRANCH RELATIVE (64) + op_CGRL uint32 = 0xC608 // FORMAT_RIL2 COMPARE RELATIVE LONG (64) + op_CGRT uint32 = 0xB960 // FORMAT_RRF3 COMPARE AND TRAP (64) + op_CGXBR uint32 = 0xB3AA // FORMAT_RRF5 CONVERT TO FIXED (extended BFP to 64) + op_CGXBRA uint32 = 0xB3AA // FORMAT_RRF5 CONVERT TO FIXED (extended BFP to 64) + op_CGXR uint32 = 0xB3CA // FORMAT_RRF5 CONVERT TO FIXED (extended HFP to 64) + op_CGXTR uint32 = 0xB3E9 // FORMAT_RRF5 CONVERT TO FIXED (extended DFP to 64) + op_CGXTRA uint32 = 0xB3E9 // FORMAT_RRF5 CONVERT TO FIXED (extended DFP to 64) + op_CH uint32 = 0x4900 // FORMAT_RX1 COMPARE HALFWORD (32<-16) + op_CHF uint32 = 0xE3CD // FORMAT_RXY1 COMPARE HIGH (32) + op_CHHR uint32 = 0xB9CD // FORMAT_RRE COMPARE HIGH (32) + op_CHHSI uint32 = 0xE554 // FORMAT_SIL COMPARE HALFWORD IMMEDIATE (16) + op_CHI uint32 = 0xA70E // FORMAT_RI1 COMPARE HALFWORD IMMEDIATE (32<-16) + op_CHLR uint32 = 0xB9DD // FORMAT_RRE COMPARE HIGH (32) + op_CHRL uint32 = 0xC605 // FORMAT_RIL2 COMPARE HALFWORD RELATIVE LONG (32<-16) + op_CHSI uint32 = 0xE55C // FORMAT_SIL COMPARE HALFWORD IMMEDIATE (32<-16) + op_CHY uint32 = 0xE379 // FORMAT_RXY1 COMPARE HALFWORD (32<-16) + op_CIB uint32 = 0xECFE // FORMAT_RIS COMPARE IMMEDIATE AND BRANCH (32<-8) + op_CIH uint32 = 0xCC0D // FORMAT_RIL1 COMPARE IMMEDIATE HIGH (32) + op_CIJ uint32 = 0xEC7E // FORMAT_RIE3 COMPARE IMMEDIATE AND BRANCH RELATIVE (32<-8) + op_CIT uint32 = 0xEC72 // FORMAT_RIE1 COMPARE IMMEDIATE AND TRAP (32<-16) + op_CKSM uint32 = 0xB241 // FORMAT_RRE CHECKSUM + op_CL uint32 = 0x5500 // FORMAT_RX1 COMPARE LOGICAL (32) + op_CLC uint32 = 0xD500 // FORMAT_SS1 COMPARE LOGICAL (character) + op_CLCL uint32 = 0x0F00 // FORMAT_RR COMPARE LOGICAL LONG + op_CLCLE uint32 = 0xA900 // FORMAT_RS1 COMPARE LOGICAL LONG EXTENDED + op_CLCLU uint32 = 0xEB8F // FORMAT_RSY1 COMPARE LOGICAL LONG UNICODE + op_CLFDBR uint32 = 0xB39D // FORMAT_RRF5 CONVERT TO LOGICAL (long BFP to 32) + op_CLFDTR uint32 = 0xB943 // FORMAT_RRF5 CONVERT TO LOGICAL (long DFP to 32) + op_CLFEBR uint32 = 0xB39C // FORMAT_RRF5 CONVERT TO LOGICAL (short BFP to 32) + op_CLFHSI uint32 = 0xE55D // FORMAT_SIL COMPARE LOGICAL IMMEDIATE (32<-16) + op_CLFI uint32 = 0xC20F // FORMAT_RIL1 COMPARE LOGICAL IMMEDIATE (32) + op_CLFIT uint32 = 0xEC73 // FORMAT_RIE1 COMPARE LOGICAL IMMEDIATE AND TRAP (32<-16) + op_CLFXBR uint32 = 0xB39E // FORMAT_RRF5 CONVERT TO LOGICAL (extended BFP to 32) + op_CLFXTR uint32 = 0xB94B // FORMAT_RRF5 CONVERT TO LOGICAL (extended DFP to 32) + op_CLG uint32 = 0xE321 // FORMAT_RXY1 COMPARE LOGICAL (64) + op_CLGDBR uint32 = 0xB3AD // FORMAT_RRF5 CONVERT TO LOGICAL (long BFP to 64) + op_CLGDTR uint32 = 0xB942 // FORMAT_RRF5 CONVERT TO LOGICAL (long DFP to 64) + op_CLGEBR uint32 = 0xB3AC // FORMAT_RRF5 CONVERT TO LOGICAL (short BFP to 64) + op_CLGF uint32 = 0xE331 // FORMAT_RXY1 COMPARE LOGICAL (64<-32) + op_CLGFI uint32 = 0xC20E // FORMAT_RIL1 COMPARE LOGICAL IMMEDIATE (64<-32) + op_CLGFR uint32 = 0xB931 // FORMAT_RRE COMPARE LOGICAL (64<-32) + op_CLGFRL uint32 = 0xC60E // FORMAT_RIL2 COMPARE LOGICAL RELATIVE LONG (64<-32) + op_CLGHRL uint32 = 0xC606 // FORMAT_RIL2 COMPARE LOGICAL RELATIVE LONG (64<-16) + op_CLGHSI uint32 = 0xE559 // FORMAT_SIL COMPARE LOGICAL IMMEDIATE (64<-16) + op_CLGIB uint32 = 0xECFD // FORMAT_RIS COMPARE LOGICAL IMMEDIATE AND BRANCH (64<-8) + op_CLGIJ uint32 = 0xEC7D // FORMAT_RIE3 COMPARE LOGICAL IMMEDIATE AND BRANCH RELATIVE (64<-8) + op_CLGIT uint32 = 0xEC71 // FORMAT_RIE1 COMPARE LOGICAL IMMEDIATE AND TRAP (64<-16) + op_CLGR uint32 = 0xB921 // FORMAT_RRE COMPARE LOGICAL (64) + op_CLGRB uint32 = 0xECE5 // FORMAT_RRS COMPARE LOGICAL AND BRANCH (64) + op_CLGRJ uint32 = 0xEC65 // FORMAT_RIE2 COMPARE LOGICAL AND BRANCH RELATIVE (64) + op_CLGRL uint32 = 0xC60A // FORMAT_RIL2 COMPARE LOGICAL RELATIVE LONG (64) + op_CLGRT uint32 = 0xB961 // FORMAT_RRF3 COMPARE LOGICAL AND TRAP (64) + op_CLGT uint32 = 0xEB2B // FORMAT_RSY2 COMPARE LOGICAL AND TRAP (64) + op_CLGXBR uint32 = 0xB3AE // FORMAT_RRF5 CONVERT TO LOGICAL (extended BFP to 64) + op_CLGXTR uint32 = 0xB94A // FORMAT_RRF5 CONVERT TO LOGICAL (extended DFP to 64) + op_CLHF uint32 = 0xE3CF // FORMAT_RXY1 COMPARE LOGICAL HIGH (32) + op_CLHHR uint32 = 0xB9CF // FORMAT_RRE COMPARE LOGICAL HIGH (32) + op_CLHHSI uint32 = 0xE555 // FORMAT_SIL COMPARE LOGICAL IMMEDIATE (16) + op_CLHLR uint32 = 0xB9DF // FORMAT_RRE COMPARE LOGICAL HIGH (32) + op_CLHRL uint32 = 0xC607 // FORMAT_RIL2 COMPARE LOGICAL RELATIVE LONG (32<-16) + op_CLI uint32 = 0x9500 // FORMAT_SI COMPARE LOGICAL (immediate) + op_CLIB uint32 = 0xECFF // FORMAT_RIS COMPARE LOGICAL IMMEDIATE AND BRANCH (32<-8) + op_CLIH uint32 = 0xCC0F // FORMAT_RIL1 COMPARE LOGICAL IMMEDIATE HIGH (32) + op_CLIJ uint32 = 0xEC7F // FORMAT_RIE3 COMPARE LOGICAL IMMEDIATE AND BRANCH RELATIVE (32<-8) + op_CLIY uint32 = 0xEB55 // FORMAT_SIY COMPARE LOGICAL (immediate) + op_CLM uint32 = 0xBD00 // FORMAT_RS2 COMPARE LOGICAL CHAR. UNDER MASK (low) + op_CLMH uint32 = 0xEB20 // FORMAT_RSY2 COMPARE LOGICAL CHAR. UNDER MASK (high) + op_CLMY uint32 = 0xEB21 // FORMAT_RSY2 COMPARE LOGICAL CHAR. UNDER MASK (low) + op_CLR uint32 = 0x1500 // FORMAT_RR COMPARE LOGICAL (32) + op_CLRB uint32 = 0xECF7 // FORMAT_RRS COMPARE LOGICAL AND BRANCH (32) + op_CLRJ uint32 = 0xEC77 // FORMAT_RIE2 COMPARE LOGICAL AND BRANCH RELATIVE (32) + op_CLRL uint32 = 0xC60F // FORMAT_RIL2 COMPARE LOGICAL RELATIVE LONG (32) + op_CLRT uint32 = 0xB973 // FORMAT_RRF3 COMPARE LOGICAL AND TRAP (32) + op_CLST uint32 = 0xB25D // FORMAT_RRE COMPARE LOGICAL STRING + op_CLT uint32 = 0xEB23 // FORMAT_RSY2 COMPARE LOGICAL AND TRAP (32) + op_CLY uint32 = 0xE355 // FORMAT_RXY1 COMPARE LOGICAL (32) + op_CMPSC uint32 = 0xB263 // FORMAT_RRE COMPRESSION CALL + op_CP uint32 = 0xF900 // FORMAT_SS2 COMPARE DECIMAL + op_CPSDR uint32 = 0xB372 // FORMAT_RRF2 COPY SIGN (long) + op_CPYA uint32 = 0xB24D // FORMAT_RRE COPY ACCESS + op_CR uint32 = 0x1900 // FORMAT_RR COMPARE (32) + op_CRB uint32 = 0xECF6 // FORMAT_RRS COMPARE AND BRANCH (32) + op_CRDTE uint32 = 0xB98F // FORMAT_RRF2 COMPARE AND REPLACE DAT TABLE ENTRY + op_CRJ uint32 = 0xEC76 // FORMAT_RIE2 COMPARE AND BRANCH RELATIVE (32) + op_CRL uint32 = 0xC60D // FORMAT_RIL2 COMPARE RELATIVE LONG (32) + op_CRT uint32 = 0xB972 // FORMAT_RRF3 COMPARE AND TRAP (32) + op_CS uint32 = 0xBA00 // FORMAT_RS1 COMPARE AND SWAP (32) + op_CSCH uint32 = 0xB230 // FORMAT_S CLEAR SUBCHANNEL + op_CSDTR uint32 = 0xB3E3 // FORMAT_RRF4 CONVERT TO SIGNED PACKED (long DFP to 64) + op_CSG uint32 = 0xEB30 // FORMAT_RSY1 COMPARE AND SWAP (64) + op_CSP uint32 = 0xB250 // FORMAT_RRE COMPARE AND SWAP AND PURGE + op_CSPG uint32 = 0xB98A // FORMAT_RRE COMPARE AND SWAP AND PURGE + op_CSST uint32 = 0xC802 // FORMAT_SSF COMPARE AND SWAP AND STORE + op_CSXTR uint32 = 0xB3EB // FORMAT_RRF4 CONVERT TO SIGNED PACKED (extended DFP to 128) + op_CSY uint32 = 0xEB14 // FORMAT_RSY1 COMPARE AND SWAP (32) + op_CU12 uint32 = 0xB2A7 // FORMAT_RRF3 CONVERT UTF-8 TO UTF-16 + op_CU14 uint32 = 0xB9B0 // FORMAT_RRF3 CONVERT UTF-8 TO UTF-32 + op_CU21 uint32 = 0xB2A6 // FORMAT_RRF3 CONVERT UTF-16 TO UTF-8 + op_CU24 uint32 = 0xB9B1 // FORMAT_RRF3 CONVERT UTF-16 TO UTF-32 + op_CU41 uint32 = 0xB9B2 // FORMAT_RRE CONVERT UTF-32 TO UTF-8 + op_CU42 uint32 = 0xB9B3 // FORMAT_RRE CONVERT UTF-32 TO UTF-16 + op_CUDTR uint32 = 0xB3E2 // FORMAT_RRE CONVERT TO UNSIGNED PACKED (long DFP to 64) + op_CUSE uint32 = 0xB257 // FORMAT_RRE COMPARE UNTIL SUBSTRING EQUAL + op_CUTFU uint32 = 0xB2A7 // FORMAT_RRF3 CONVERT UTF-8 TO UNICODE + op_CUUTF uint32 = 0xB2A6 // FORMAT_RRF3 CONVERT UNICODE TO UTF-8 + op_CUXTR uint32 = 0xB3EA // FORMAT_RRE CONVERT TO UNSIGNED PACKED (extended DFP to 128) + op_CVB uint32 = 0x4F00 // FORMAT_RX1 CONVERT TO BINARY (32) + op_CVBG uint32 = 0xE30E // FORMAT_RXY1 CONVERT TO BINARY (64) + op_CVBY uint32 = 0xE306 // FORMAT_RXY1 CONVERT TO BINARY (32) + op_CVD uint32 = 0x4E00 // FORMAT_RX1 CONVERT TO DECIMAL (32) + op_CVDG uint32 = 0xE32E // FORMAT_RXY1 CONVERT TO DECIMAL (64) + op_CVDY uint32 = 0xE326 // FORMAT_RXY1 CONVERT TO DECIMAL (32) + op_CXBR uint32 = 0xB349 // FORMAT_RRE COMPARE (extended BFP) + op_CXFBR uint32 = 0xB396 // FORMAT_RRE CONVERT FROM FIXED (32 to extended BFP) + op_CXFBRA uint32 = 0xB396 // FORMAT_RRF5 CONVERT FROM FIXED (32 to extended BFP) + op_CXFR uint32 = 0xB3B6 // FORMAT_RRE CONVERT FROM FIXED (32 to extended HFP) + op_CXFTR uint32 = 0xB959 // FORMAT_RRE CONVERT FROM FIXED (32 to extended DFP) + op_CXGBR uint32 = 0xB3A6 // FORMAT_RRE CONVERT FROM FIXED (64 to extended BFP) + op_CXGBRA uint32 = 0xB3A6 // FORMAT_RRF5 CONVERT FROM FIXED (64 to extended BFP) + op_CXGR uint32 = 0xB3C6 // FORMAT_RRE CONVERT FROM FIXED (64 to extended HFP) + op_CXGTR uint32 = 0xB3F9 // FORMAT_RRE CONVERT FROM FIXED (64 to extended DFP) + op_CXGTRA uint32 = 0xB3F9 // FORMAT_RRF5 CONVERT FROM FIXED (64 to extended DFP) + op_CXLFBR uint32 = 0xB392 // FORMAT_RRF5 CONVERT FROM LOGICAL (32 to extended BFP) + op_CXLFTR uint32 = 0xB95B // FORMAT_RRF5 CONVERT FROM LOGICAL (32 to extended DFP) + op_CXLGBR uint32 = 0xB3A2 // FORMAT_RRF5 CONVERT FROM LOGICAL (64 to extended BFP) + op_CXLGTR uint32 = 0xB95A // FORMAT_RRF5 CONVERT FROM LOGICAL (64 to extended DFP) + op_CXR uint32 = 0xB369 // FORMAT_RRE COMPARE (extended HFP) + op_CXSTR uint32 = 0xB3FB // FORMAT_RRE CONVERT FROM SIGNED PACKED (128 to extended DFP) + op_CXTR uint32 = 0xB3EC // FORMAT_RRE COMPARE (extended DFP) + op_CXUTR uint32 = 0xB3FA // FORMAT_RRE CONVERT FROM UNSIGNED PACKED (128 to ext. DFP) + op_CXZT uint32 = 0xEDAB // FORMAT_RSL CONVERT FROM ZONED (to extended DFP) + op_CY uint32 = 0xE359 // FORMAT_RXY1 COMPARE (32) + op_CZDT uint32 = 0xEDA8 // FORMAT_RSL CONVERT TO ZONED (from long DFP) + op_CZXT uint32 = 0xEDA9 // FORMAT_RSL CONVERT TO ZONED (from extended DFP) + op_D uint32 = 0x5D00 // FORMAT_RX1 DIVIDE (32<-64) + op_DD uint32 = 0x6D00 // FORMAT_RX1 DIVIDE (long HFP) + op_DDB uint32 = 0xED1D // FORMAT_RXE DIVIDE (long BFP) + op_DDBR uint32 = 0xB31D // FORMAT_RRE DIVIDE (long BFP) + op_DDR uint32 = 0x2D00 // FORMAT_RR DIVIDE (long HFP) + op_DDTR uint32 = 0xB3D1 // FORMAT_RRF1 DIVIDE (long DFP) + op_DDTRA uint32 = 0xB3D1 // FORMAT_RRF1 DIVIDE (long DFP) + op_DE uint32 = 0x7D00 // FORMAT_RX1 DIVIDE (short HFP) + op_DEB uint32 = 0xED0D // FORMAT_RXE DIVIDE (short BFP) + op_DEBR uint32 = 0xB30D // FORMAT_RRE DIVIDE (short BFP) + op_DER uint32 = 0x3D00 // FORMAT_RR DIVIDE (short HFP) + op_DIDBR uint32 = 0xB35B // FORMAT_RRF2 DIVIDE TO INTEGER (long BFP) + op_DIEBR uint32 = 0xB353 // FORMAT_RRF2 DIVIDE TO INTEGER (short BFP) + op_DL uint32 = 0xE397 // FORMAT_RXY1 DIVIDE LOGICAL (32<-64) + op_DLG uint32 = 0xE387 // FORMAT_RXY1 DIVIDE LOGICAL (64<-128) + op_DLGR uint32 = 0xB987 // FORMAT_RRE DIVIDE LOGICAL (64<-128) + op_DLR uint32 = 0xB997 // FORMAT_RRE DIVIDE LOGICAL (32<-64) + op_DP uint32 = 0xFD00 // FORMAT_SS2 DIVIDE DECIMAL + op_DR uint32 = 0x1D00 // FORMAT_RR DIVIDE (32<-64) + op_DSG uint32 = 0xE30D // FORMAT_RXY1 DIVIDE SINGLE (64) + op_DSGF uint32 = 0xE31D // FORMAT_RXY1 DIVIDE SINGLE (64<-32) + op_DSGFR uint32 = 0xB91D // FORMAT_RRE DIVIDE SINGLE (64<-32) + op_DSGR uint32 = 0xB90D // FORMAT_RRE DIVIDE SINGLE (64) + op_DXBR uint32 = 0xB34D // FORMAT_RRE DIVIDE (extended BFP) + op_DXR uint32 = 0xB22D // FORMAT_RRE DIVIDE (extended HFP) + op_DXTR uint32 = 0xB3D9 // FORMAT_RRF1 DIVIDE (extended DFP) + op_DXTRA uint32 = 0xB3D9 // FORMAT_RRF1 DIVIDE (extended DFP) + op_EAR uint32 = 0xB24F // FORMAT_RRE EXTRACT ACCESS + op_ECAG uint32 = 0xEB4C // FORMAT_RSY1 EXTRACT CACHE ATTRIBUTE + op_ECTG uint32 = 0xC801 // FORMAT_SSF EXTRACT CPU TIME + op_ED uint32 = 0xDE00 // FORMAT_SS1 EDIT + op_EDMK uint32 = 0xDF00 // FORMAT_SS1 EDIT AND MARK + op_EEDTR uint32 = 0xB3E5 // FORMAT_RRE EXTRACT BIASED EXPONENT (long DFP to 64) + op_EEXTR uint32 = 0xB3ED // FORMAT_RRE EXTRACT BIASED EXPONENT (extended DFP to 64) + op_EFPC uint32 = 0xB38C // FORMAT_RRE EXTRACT FPC + op_EPAIR uint32 = 0xB99A // FORMAT_RRE EXTRACT PRIMARY ASN AND INSTANCE + op_EPAR uint32 = 0xB226 // FORMAT_RRE EXTRACT PRIMARY ASN + op_EPSW uint32 = 0xB98D // FORMAT_RRE EXTRACT PSW + op_EREG uint32 = 0xB249 // FORMAT_RRE EXTRACT STACKED REGISTERS (32) + op_EREGG uint32 = 0xB90E // FORMAT_RRE EXTRACT STACKED REGISTERS (64) + op_ESAIR uint32 = 0xB99B // FORMAT_RRE EXTRACT SECONDARY ASN AND INSTANCE + op_ESAR uint32 = 0xB227 // FORMAT_RRE EXTRACT SECONDARY ASN + op_ESDTR uint32 = 0xB3E7 // FORMAT_RRE EXTRACT SIGNIFICANCE (long DFP) + op_ESEA uint32 = 0xB99D // FORMAT_RRE EXTRACT AND SET EXTENDED AUTHORITY + op_ESTA uint32 = 0xB24A // FORMAT_RRE EXTRACT STACKED STATE + op_ESXTR uint32 = 0xB3EF // FORMAT_RRE EXTRACT SIGNIFICANCE (extended DFP) + op_ETND uint32 = 0xB2EC // FORMAT_RRE EXTRACT TRANSACTION NESTING DEPTH + op_EX uint32 = 0x4400 // FORMAT_RX1 EXECUTE + op_EXRL uint32 = 0xC600 // FORMAT_RIL2 EXECUTE RELATIVE LONG + op_FIDBR uint32 = 0xB35F // FORMAT_RRF5 LOAD FP INTEGER (long BFP) + op_FIDBRA uint32 = 0xB35F // FORMAT_RRF5 LOAD FP INTEGER (long BFP) + op_FIDR uint32 = 0xB37F // FORMAT_RRE LOAD FP INTEGER (long HFP) + op_FIDTR uint32 = 0xB3D7 // FORMAT_RRF5 LOAD FP INTEGER (long DFP) + op_FIEBR uint32 = 0xB357 // FORMAT_RRF5 LOAD FP INTEGER (short BFP) + op_FIEBRA uint32 = 0xB357 // FORMAT_RRF5 LOAD FP INTEGER (short BFP) + op_FIER uint32 = 0xB377 // FORMAT_RRE LOAD FP INTEGER (short HFP) + op_FIXBR uint32 = 0xB347 // FORMAT_RRF5 LOAD FP INTEGER (extended BFP) + op_FIXBRA uint32 = 0xB347 // FORMAT_RRF5 LOAD FP INTEGER (extended BFP) + op_FIXR uint32 = 0xB367 // FORMAT_RRE LOAD FP INTEGER (extended HFP) + op_FIXTR uint32 = 0xB3DF // FORMAT_RRF5 LOAD FP INTEGER (extended DFP) + op_FLOGR uint32 = 0xB983 // FORMAT_RRE FIND LEFTMOST ONE + op_HDR uint32 = 0x2400 // FORMAT_RR HALVE (long HFP) + op_HER uint32 = 0x3400 // FORMAT_RR HALVE (short HFP) + op_HSCH uint32 = 0xB231 // FORMAT_S HALT SUBCHANNEL + op_IAC uint32 = 0xB224 // FORMAT_RRE INSERT ADDRESS SPACE CONTROL + op_IC uint32 = 0x4300 // FORMAT_RX1 INSERT CHARACTER + op_ICM uint32 = 0xBF00 // FORMAT_RS2 INSERT CHARACTERS UNDER MASK (low) + op_ICMH uint32 = 0xEB80 // FORMAT_RSY2 INSERT CHARACTERS UNDER MASK (high) + op_ICMY uint32 = 0xEB81 // FORMAT_RSY2 INSERT CHARACTERS UNDER MASK (low) + op_ICY uint32 = 0xE373 // FORMAT_RXY1 INSERT CHARACTER + op_IDTE uint32 = 0xB98E // FORMAT_RRF2 INVALIDATE DAT TABLE ENTRY + op_IEDTR uint32 = 0xB3F6 // FORMAT_RRF2 INSERT BIASED EXPONENT (64 to long DFP) + op_IEXTR uint32 = 0xB3FE // FORMAT_RRF2 INSERT BIASED EXPONENT (64 to extended DFP) + op_IIHF uint32 = 0xC008 // FORMAT_RIL1 INSERT IMMEDIATE (high) + op_IIHH uint32 = 0xA500 // FORMAT_RI1 INSERT IMMEDIATE (high high) + op_IIHL uint32 = 0xA501 // FORMAT_RI1 INSERT IMMEDIATE (high low) + op_IILF uint32 = 0xC009 // FORMAT_RIL1 INSERT IMMEDIATE (low) + op_IILH uint32 = 0xA502 // FORMAT_RI1 INSERT IMMEDIATE (low high) + op_IILL uint32 = 0xA503 // FORMAT_RI1 INSERT IMMEDIATE (low low) + op_IPK uint32 = 0xB20B // FORMAT_S INSERT PSW KEY + op_IPM uint32 = 0xB222 // FORMAT_RRE INSERT PROGRAM MASK + op_IPTE uint32 = 0xB221 // FORMAT_RRF1 INVALIDATE PAGE TABLE ENTRY + op_ISKE uint32 = 0xB229 // FORMAT_RRE INSERT STORAGE KEY EXTENDED + op_IVSK uint32 = 0xB223 // FORMAT_RRE INSERT VIRTUAL STORAGE KEY + op_KDB uint32 = 0xED18 // FORMAT_RXE COMPARE AND SIGNAL (long BFP) + op_KDBR uint32 = 0xB318 // FORMAT_RRE COMPARE AND SIGNAL (long BFP) + op_KDTR uint32 = 0xB3E0 // FORMAT_RRE COMPARE AND SIGNAL (long DFP) + op_KEB uint32 = 0xED08 // FORMAT_RXE COMPARE AND SIGNAL (short BFP) + op_KEBR uint32 = 0xB308 // FORMAT_RRE COMPARE AND SIGNAL (short BFP) + op_KIMD uint32 = 0xB93E // FORMAT_RRE COMPUTE INTERMEDIATE MESSAGE DIGEST + op_KLMD uint32 = 0xB93F // FORMAT_RRE COMPUTE LAST MESSAGE DIGEST + op_KM uint32 = 0xB92E // FORMAT_RRE CIPHER MESSAGE + op_KMAC uint32 = 0xB91E // FORMAT_RRE COMPUTE MESSAGE AUTHENTICATION CODE + op_KMC uint32 = 0xB92F // FORMAT_RRE CIPHER MESSAGE WITH CHAINING + op_KMCTR uint32 = 0xB92D // FORMAT_RRF2 CIPHER MESSAGE WITH COUNTER + op_KMF uint32 = 0xB92A // FORMAT_RRE CIPHER MESSAGE WITH CFB + op_KMO uint32 = 0xB92B // FORMAT_RRE CIPHER MESSAGE WITH OFB + op_KXBR uint32 = 0xB348 // FORMAT_RRE COMPARE AND SIGNAL (extended BFP) + op_KXTR uint32 = 0xB3E8 // FORMAT_RRE COMPARE AND SIGNAL (extended DFP) + op_L uint32 = 0x5800 // FORMAT_RX1 LOAD (32) + op_LA uint32 = 0x4100 // FORMAT_RX1 LOAD ADDRESS + op_LAA uint32 = 0xEBF8 // FORMAT_RSY1 LOAD AND ADD (32) + op_LAAG uint32 = 0xEBE8 // FORMAT_RSY1 LOAD AND ADD (64) + op_LAAL uint32 = 0xEBFA // FORMAT_RSY1 LOAD AND ADD LOGICAL (32) + op_LAALG uint32 = 0xEBEA // FORMAT_RSY1 LOAD AND ADD LOGICAL (64) + op_LAE uint32 = 0x5100 // FORMAT_RX1 LOAD ADDRESS EXTENDED + op_LAEY uint32 = 0xE375 // FORMAT_RXY1 LOAD ADDRESS EXTENDED + op_LAM uint32 = 0x9A00 // FORMAT_RS1 LOAD ACCESS MULTIPLE + op_LAMY uint32 = 0xEB9A // FORMAT_RSY1 LOAD ACCESS MULTIPLE + op_LAN uint32 = 0xEBF4 // FORMAT_RSY1 LOAD AND AND (32) + op_LANG uint32 = 0xEBE4 // FORMAT_RSY1 LOAD AND AND (64) + op_LAO uint32 = 0xEBF6 // FORMAT_RSY1 LOAD AND OR (32) + op_LAOG uint32 = 0xEBE6 // FORMAT_RSY1 LOAD AND OR (64) + op_LARL uint32 = 0xC000 // FORMAT_RIL2 LOAD ADDRESS RELATIVE LONG + op_LASP uint32 = 0xE500 // FORMAT_SSE LOAD ADDRESS SPACE PARAMETERS + op_LAT uint32 = 0xE39F // FORMAT_RXY1 LOAD AND TRAP (32L<-32) + op_LAX uint32 = 0xEBF7 // FORMAT_RSY1 LOAD AND EXCLUSIVE OR (32) + op_LAXG uint32 = 0xEBE7 // FORMAT_RSY1 LOAD AND EXCLUSIVE OR (64) + op_LAY uint32 = 0xE371 // FORMAT_RXY1 LOAD ADDRESS + op_LB uint32 = 0xE376 // FORMAT_RXY1 LOAD BYTE (32) + op_LBH uint32 = 0xE3C0 // FORMAT_RXY1 LOAD BYTE HIGH (32<-8) + op_LBR uint32 = 0xB926 // FORMAT_RRE LOAD BYTE (32) + op_LCDBR uint32 = 0xB313 // FORMAT_RRE LOAD COMPLEMENT (long BFP) + op_LCDFR uint32 = 0xB373 // FORMAT_RRE LOAD COMPLEMENT (long) + op_LCDR uint32 = 0x2300 // FORMAT_RR LOAD COMPLEMENT (long HFP) + op_LCEBR uint32 = 0xB303 // FORMAT_RRE LOAD COMPLEMENT (short BFP) + op_LCER uint32 = 0x3300 // FORMAT_RR LOAD COMPLEMENT (short HFP) + op_LCGFR uint32 = 0xB913 // FORMAT_RRE LOAD COMPLEMENT (64<-32) + op_LCGR uint32 = 0xB903 // FORMAT_RRE LOAD COMPLEMENT (64) + op_LCR uint32 = 0x1300 // FORMAT_RR LOAD COMPLEMENT (32) + op_LCTL uint32 = 0xB700 // FORMAT_RS1 LOAD CONTROL (32) + op_LCTLG uint32 = 0xEB2F // FORMAT_RSY1 LOAD CONTROL (64) + op_LCXBR uint32 = 0xB343 // FORMAT_RRE LOAD COMPLEMENT (extended BFP) + op_LCXR uint32 = 0xB363 // FORMAT_RRE LOAD COMPLEMENT (extended HFP) + op_LD uint32 = 0x6800 // FORMAT_RX1 LOAD (long) + op_LDE uint32 = 0xED24 // FORMAT_RXE LOAD LENGTHENED (short to long HFP) + op_LDEB uint32 = 0xED04 // FORMAT_RXE LOAD LENGTHENED (short to long BFP) + op_LDEBR uint32 = 0xB304 // FORMAT_RRE LOAD LENGTHENED (short to long BFP) + op_LDER uint32 = 0xB324 // FORMAT_RRE LOAD LENGTHENED (short to long HFP) + op_LDETR uint32 = 0xB3D4 // FORMAT_RRF4 LOAD LENGTHENED (short to long DFP) + op_LDGR uint32 = 0xB3C1 // FORMAT_RRE LOAD FPR FROM GR (64 to long) + op_LDR uint32 = 0x2800 // FORMAT_RR LOAD (long) + op_LDXBR uint32 = 0xB345 // FORMAT_RRE LOAD ROUNDED (extended to long BFP) + op_LDXBRA uint32 = 0xB345 // FORMAT_RRF5 LOAD ROUNDED (extended to long BFP) + op_LDXR uint32 = 0x2500 // FORMAT_RR LOAD ROUNDED (extended to long HFP) + op_LDXTR uint32 = 0xB3DD // FORMAT_RRF5 LOAD ROUNDED (extended to long DFP) + op_LDY uint32 = 0xED65 // FORMAT_RXY1 LOAD (long) + op_LE uint32 = 0x7800 // FORMAT_RX1 LOAD (short) + op_LEDBR uint32 = 0xB344 // FORMAT_RRE LOAD ROUNDED (long to short BFP) + op_LEDBRA uint32 = 0xB344 // FORMAT_RRF5 LOAD ROUNDED (long to short BFP) + op_LEDR uint32 = 0x3500 // FORMAT_RR LOAD ROUNDED (long to short HFP) + op_LEDTR uint32 = 0xB3D5 // FORMAT_RRF5 LOAD ROUNDED (long to short DFP) + op_LER uint32 = 0x3800 // FORMAT_RR LOAD (short) + op_LEXBR uint32 = 0xB346 // FORMAT_RRE LOAD ROUNDED (extended to short BFP) + op_LEXBRA uint32 = 0xB346 // FORMAT_RRF5 LOAD ROUNDED (extended to short BFP) + op_LEXR uint32 = 0xB366 // FORMAT_RRE LOAD ROUNDED (extended to short HFP) + op_LEY uint32 = 0xED64 // FORMAT_RXY1 LOAD (short) + op_LFAS uint32 = 0xB2BD // FORMAT_S LOAD FPC AND SIGNAL + op_LFH uint32 = 0xE3CA // FORMAT_RXY1 LOAD HIGH (32) + op_LFHAT uint32 = 0xE3C8 // FORMAT_RXY1 LOAD HIGH AND TRAP (32H<-32) + op_LFPC uint32 = 0xB29D // FORMAT_S LOAD FPC + op_LG uint32 = 0xE304 // FORMAT_RXY1 LOAD (64) + op_LGAT uint32 = 0xE385 // FORMAT_RXY1 LOAD AND TRAP (64) + op_LGB uint32 = 0xE377 // FORMAT_RXY1 LOAD BYTE (64) + op_LGBR uint32 = 0xB906 // FORMAT_RRE LOAD BYTE (64) + op_LGDR uint32 = 0xB3CD // FORMAT_RRE LOAD GR FROM FPR (long to 64) + op_LGF uint32 = 0xE314 // FORMAT_RXY1 LOAD (64<-32) + op_LGFI uint32 = 0xC001 // FORMAT_RIL1 LOAD IMMEDIATE (64<-32) + op_LGFR uint32 = 0xB914 // FORMAT_RRE LOAD (64<-32) + op_LGFRL uint32 = 0xC40C // FORMAT_RIL2 LOAD RELATIVE LONG (64<-32) + op_LGH uint32 = 0xE315 // FORMAT_RXY1 LOAD HALFWORD (64) + op_LGHI uint32 = 0xA709 // FORMAT_RI1 LOAD HALFWORD IMMEDIATE (64) + op_LGHR uint32 = 0xB907 // FORMAT_RRE LOAD HALFWORD (64) + op_LGHRL uint32 = 0xC404 // FORMAT_RIL2 LOAD HALFWORD RELATIVE LONG (64<-16) + op_LGR uint32 = 0xB904 // FORMAT_RRE LOAD (64) + op_LGRL uint32 = 0xC408 // FORMAT_RIL2 LOAD RELATIVE LONG (64) + op_LH uint32 = 0x4800 // FORMAT_RX1 LOAD HALFWORD (32) + op_LHH uint32 = 0xE3C4 // FORMAT_RXY1 LOAD HALFWORD HIGH (32<-16) + op_LHI uint32 = 0xA708 // FORMAT_RI1 LOAD HALFWORD IMMEDIATE (32) + op_LHR uint32 = 0xB927 // FORMAT_RRE LOAD HALFWORD (32) + op_LHRL uint32 = 0xC405 // FORMAT_RIL2 LOAD HALFWORD RELATIVE LONG (32<-16) + op_LHY uint32 = 0xE378 // FORMAT_RXY1 LOAD HALFWORD (32) + op_LLC uint32 = 0xE394 // FORMAT_RXY1 LOAD LOGICAL CHARACTER (32) + op_LLCH uint32 = 0xE3C2 // FORMAT_RXY1 LOAD LOGICAL CHARACTER HIGH (32<-8) + op_LLCR uint32 = 0xB994 // FORMAT_RRE LOAD LOGICAL CHARACTER (32) + op_LLGC uint32 = 0xE390 // FORMAT_RXY1 LOAD LOGICAL CHARACTER (64) + op_LLGCR uint32 = 0xB984 // FORMAT_RRE LOAD LOGICAL CHARACTER (64) + op_LLGF uint32 = 0xE316 // FORMAT_RXY1 LOAD LOGICAL (64<-32) + op_LLGFAT uint32 = 0xE39D // FORMAT_RXY1 LOAD LOGICAL AND TRAP (64<-32) + op_LLGFR uint32 = 0xB916 // FORMAT_RRE LOAD LOGICAL (64<-32) + op_LLGFRL uint32 = 0xC40E // FORMAT_RIL2 LOAD LOGICAL RELATIVE LONG (64<-32) + op_LLGH uint32 = 0xE391 // FORMAT_RXY1 LOAD LOGICAL HALFWORD (64) + op_LLGHR uint32 = 0xB985 // FORMAT_RRE LOAD LOGICAL HALFWORD (64) + op_LLGHRL uint32 = 0xC406 // FORMAT_RIL2 LOAD LOGICAL HALFWORD RELATIVE LONG (64<-16) + op_LLGT uint32 = 0xE317 // FORMAT_RXY1 LOAD LOGICAL THIRTY ONE BITS + op_LLGTAT uint32 = 0xE39C // FORMAT_RXY1 LOAD LOGICAL THIRTY ONE BITS AND TRAP (64<-31) + op_LLGTR uint32 = 0xB917 // FORMAT_RRE LOAD LOGICAL THIRTY ONE BITS + op_LLH uint32 = 0xE395 // FORMAT_RXY1 LOAD LOGICAL HALFWORD (32) + op_LLHH uint32 = 0xE3C6 // FORMAT_RXY1 LOAD LOGICAL HALFWORD HIGH (32<-16) + op_LLHR uint32 = 0xB995 // FORMAT_RRE LOAD LOGICAL HALFWORD (32) + op_LLHRL uint32 = 0xC402 // FORMAT_RIL2 LOAD LOGICAL HALFWORD RELATIVE LONG (32<-16) + op_LLIHF uint32 = 0xC00E // FORMAT_RIL1 LOAD LOGICAL IMMEDIATE (high) + op_LLIHH uint32 = 0xA50C // FORMAT_RI1 LOAD LOGICAL IMMEDIATE (high high) + op_LLIHL uint32 = 0xA50D // FORMAT_RI1 LOAD LOGICAL IMMEDIATE (high low) + op_LLILF uint32 = 0xC00F // FORMAT_RIL1 LOAD LOGICAL IMMEDIATE (low) + op_LLILH uint32 = 0xA50E // FORMAT_RI1 LOAD LOGICAL IMMEDIATE (low high) + op_LLILL uint32 = 0xA50F // FORMAT_RI1 LOAD LOGICAL IMMEDIATE (low low) + op_LM uint32 = 0x9800 // FORMAT_RS1 LOAD MULTIPLE (32) + op_LMD uint32 = 0xEF00 // FORMAT_SS5 LOAD MULTIPLE DISJOINT + op_LMG uint32 = 0xEB04 // FORMAT_RSY1 LOAD MULTIPLE (64) + op_LMH uint32 = 0xEB96 // FORMAT_RSY1 LOAD MULTIPLE HIGH + op_LMY uint32 = 0xEB98 // FORMAT_RSY1 LOAD MULTIPLE (32) + op_LNDBR uint32 = 0xB311 // FORMAT_RRE LOAD NEGATIVE (long BFP) + op_LNDFR uint32 = 0xB371 // FORMAT_RRE LOAD NEGATIVE (long) + op_LNDR uint32 = 0x2100 // FORMAT_RR LOAD NEGATIVE (long HFP) + op_LNEBR uint32 = 0xB301 // FORMAT_RRE LOAD NEGATIVE (short BFP) + op_LNER uint32 = 0x3100 // FORMAT_RR LOAD NEGATIVE (short HFP) + op_LNGFR uint32 = 0xB911 // FORMAT_RRE LOAD NEGATIVE (64<-32) + op_LNGR uint32 = 0xB901 // FORMAT_RRE LOAD NEGATIVE (64) + op_LNR uint32 = 0x1100 // FORMAT_RR LOAD NEGATIVE (32) + op_LNXBR uint32 = 0xB341 // FORMAT_RRE LOAD NEGATIVE (extended BFP) + op_LNXR uint32 = 0xB361 // FORMAT_RRE LOAD NEGATIVE (extended HFP) + op_LOC uint32 = 0xEBF2 // FORMAT_RSY2 LOAD ON CONDITION (32) + op_LOCG uint32 = 0xEBE2 // FORMAT_RSY2 LOAD ON CONDITION (64) + op_LOCGR uint32 = 0xB9E2 // FORMAT_RRF3 LOAD ON CONDITION (64) + op_LOCR uint32 = 0xB9F2 // FORMAT_RRF3 LOAD ON CONDITION (32) + op_LPD uint32 = 0xC804 // FORMAT_SSF LOAD PAIR DISJOINT (32) + op_LPDBR uint32 = 0xB310 // FORMAT_RRE LOAD POSITIVE (long BFP) + op_LPDFR uint32 = 0xB370 // FORMAT_RRE LOAD POSITIVE (long) + op_LPDG uint32 = 0xC805 // FORMAT_SSF LOAD PAIR DISJOINT (64) + op_LPDR uint32 = 0x2000 // FORMAT_RR LOAD POSITIVE (long HFP) + op_LPEBR uint32 = 0xB300 // FORMAT_RRE LOAD POSITIVE (short BFP) + op_LPER uint32 = 0x3000 // FORMAT_RR LOAD POSITIVE (short HFP) + op_LPGFR uint32 = 0xB910 // FORMAT_RRE LOAD POSITIVE (64<-32) + op_LPGR uint32 = 0xB900 // FORMAT_RRE LOAD POSITIVE (64) + op_LPQ uint32 = 0xE38F // FORMAT_RXY1 LOAD PAIR FROM QUADWORD + op_LPR uint32 = 0x1000 // FORMAT_RR LOAD POSITIVE (32) + op_LPSW uint32 = 0x8200 // FORMAT_S LOAD PSW + op_LPSWE uint32 = 0xB2B2 // FORMAT_S LOAD PSW EXTENDED + op_LPTEA uint32 = 0xB9AA // FORMAT_RRF2 LOAD PAGE TABLE ENTRY ADDRESS + op_LPXBR uint32 = 0xB340 // FORMAT_RRE LOAD POSITIVE (extended BFP) + op_LPXR uint32 = 0xB360 // FORMAT_RRE LOAD POSITIVE (extended HFP) + op_LR uint32 = 0x1800 // FORMAT_RR LOAD (32) + op_LRA uint32 = 0xB100 // FORMAT_RX1 LOAD REAL ADDRESS (32) + op_LRAG uint32 = 0xE303 // FORMAT_RXY1 LOAD REAL ADDRESS (64) + op_LRAY uint32 = 0xE313 // FORMAT_RXY1 LOAD REAL ADDRESS (32) + op_LRDR uint32 = 0x2500 // FORMAT_RR LOAD ROUNDED (extended to long HFP) + op_LRER uint32 = 0x3500 // FORMAT_RR LOAD ROUNDED (long to short HFP) + op_LRL uint32 = 0xC40D // FORMAT_RIL2 LOAD RELATIVE LONG (32) + op_LRV uint32 = 0xE31E // FORMAT_RXY1 LOAD REVERSED (32) + op_LRVG uint32 = 0xE30F // FORMAT_RXY1 LOAD REVERSED (64) + op_LRVGR uint32 = 0xB90F // FORMAT_RRE LOAD REVERSED (64) + op_LRVH uint32 = 0xE31F // FORMAT_RXY1 LOAD REVERSED (16) + op_LRVR uint32 = 0xB91F // FORMAT_RRE LOAD REVERSED (32) + op_LT uint32 = 0xE312 // FORMAT_RXY1 LOAD AND TEST (32) + op_LTDBR uint32 = 0xB312 // FORMAT_RRE LOAD AND TEST (long BFP) + op_LTDR uint32 = 0x2200 // FORMAT_RR LOAD AND TEST (long HFP) + op_LTDTR uint32 = 0xB3D6 // FORMAT_RRE LOAD AND TEST (long DFP) + op_LTEBR uint32 = 0xB302 // FORMAT_RRE LOAD AND TEST (short BFP) + op_LTER uint32 = 0x3200 // FORMAT_RR LOAD AND TEST (short HFP) + op_LTG uint32 = 0xE302 // FORMAT_RXY1 LOAD AND TEST (64) + op_LTGF uint32 = 0xE332 // FORMAT_RXY1 LOAD AND TEST (64<-32) + op_LTGFR uint32 = 0xB912 // FORMAT_RRE LOAD AND TEST (64<-32) + op_LTGR uint32 = 0xB902 // FORMAT_RRE LOAD AND TEST (64) + op_LTR uint32 = 0x1200 // FORMAT_RR LOAD AND TEST (32) + op_LTXBR uint32 = 0xB342 // FORMAT_RRE LOAD AND TEST (extended BFP) + op_LTXR uint32 = 0xB362 // FORMAT_RRE LOAD AND TEST (extended HFP) + op_LTXTR uint32 = 0xB3DE // FORMAT_RRE LOAD AND TEST (extended DFP) + op_LURA uint32 = 0xB24B // FORMAT_RRE LOAD USING REAL ADDRESS (32) + op_LURAG uint32 = 0xB905 // FORMAT_RRE LOAD USING REAL ADDRESS (64) + op_LXD uint32 = 0xED25 // FORMAT_RXE LOAD LENGTHENED (long to extended HFP) + op_LXDB uint32 = 0xED05 // FORMAT_RXE LOAD LENGTHENED (long to extended BFP) + op_LXDBR uint32 = 0xB305 // FORMAT_RRE LOAD LENGTHENED (long to extended BFP) + op_LXDR uint32 = 0xB325 // FORMAT_RRE LOAD LENGTHENED (long to extended HFP) + op_LXDTR uint32 = 0xB3DC // FORMAT_RRF4 LOAD LENGTHENED (long to extended DFP) + op_LXE uint32 = 0xED26 // FORMAT_RXE LOAD LENGTHENED (short to extended HFP) + op_LXEB uint32 = 0xED06 // FORMAT_RXE LOAD LENGTHENED (short to extended BFP) + op_LXEBR uint32 = 0xB306 // FORMAT_RRE LOAD LENGTHENED (short to extended BFP) + op_LXER uint32 = 0xB326 // FORMAT_RRE LOAD LENGTHENED (short to extended HFP) + op_LXR uint32 = 0xB365 // FORMAT_RRE LOAD (extended) + op_LY uint32 = 0xE358 // FORMAT_RXY1 LOAD (32) + op_LZDR uint32 = 0xB375 // FORMAT_RRE LOAD ZERO (long) + op_LZER uint32 = 0xB374 // FORMAT_RRE LOAD ZERO (short) + op_LZXR uint32 = 0xB376 // FORMAT_RRE LOAD ZERO (extended) + op_M uint32 = 0x5C00 // FORMAT_RX1 MULTIPLY (64<-32) + op_MAD uint32 = 0xED3E // FORMAT_RXF MULTIPLY AND ADD (long HFP) + op_MADB uint32 = 0xED1E // FORMAT_RXF MULTIPLY AND ADD (long BFP) + op_MADBR uint32 = 0xB31E // FORMAT_RRD MULTIPLY AND ADD (long BFP) + op_MADR uint32 = 0xB33E // FORMAT_RRD MULTIPLY AND ADD (long HFP) + op_MAE uint32 = 0xED2E // FORMAT_RXF MULTIPLY AND ADD (short HFP) + op_MAEB uint32 = 0xED0E // FORMAT_RXF MULTIPLY AND ADD (short BFP) + op_MAEBR uint32 = 0xB30E // FORMAT_RRD MULTIPLY AND ADD (short BFP) + op_MAER uint32 = 0xB32E // FORMAT_RRD MULTIPLY AND ADD (short HFP) + op_MAY uint32 = 0xED3A // FORMAT_RXF MULTIPLY & ADD UNNORMALIZED (long to ext. HFP) + op_MAYH uint32 = 0xED3C // FORMAT_RXF MULTIPLY AND ADD UNNRM. (long to ext. high HFP) + op_MAYHR uint32 = 0xB33C // FORMAT_RRD MULTIPLY AND ADD UNNRM. (long to ext. high HFP) + op_MAYL uint32 = 0xED38 // FORMAT_RXF MULTIPLY AND ADD UNNRM. (long to ext. low HFP) + op_MAYLR uint32 = 0xB338 // FORMAT_RRD MULTIPLY AND ADD UNNRM. (long to ext. low HFP) + op_MAYR uint32 = 0xB33A // FORMAT_RRD MULTIPLY & ADD UNNORMALIZED (long to ext. HFP) + op_MC uint32 = 0xAF00 // FORMAT_SI MONITOR CALL + op_MD uint32 = 0x6C00 // FORMAT_RX1 MULTIPLY (long HFP) + op_MDB uint32 = 0xED1C // FORMAT_RXE MULTIPLY (long BFP) + op_MDBR uint32 = 0xB31C // FORMAT_RRE MULTIPLY (long BFP) + op_MDE uint32 = 0x7C00 // FORMAT_RX1 MULTIPLY (short to long HFP) + op_MDEB uint32 = 0xED0C // FORMAT_RXE MULTIPLY (short to long BFP) + op_MDEBR uint32 = 0xB30C // FORMAT_RRE MULTIPLY (short to long BFP) + op_MDER uint32 = 0x3C00 // FORMAT_RR MULTIPLY (short to long HFP) + op_MDR uint32 = 0x2C00 // FORMAT_RR MULTIPLY (long HFP) + op_MDTR uint32 = 0xB3D0 // FORMAT_RRF1 MULTIPLY (long DFP) + op_MDTRA uint32 = 0xB3D0 // FORMAT_RRF1 MULTIPLY (long DFP) + op_ME uint32 = 0x7C00 // FORMAT_RX1 MULTIPLY (short to long HFP) + op_MEE uint32 = 0xED37 // FORMAT_RXE MULTIPLY (short HFP) + op_MEEB uint32 = 0xED17 // FORMAT_RXE MULTIPLY (short BFP) + op_MEEBR uint32 = 0xB317 // FORMAT_RRE MULTIPLY (short BFP) + op_MEER uint32 = 0xB337 // FORMAT_RRE MULTIPLY (short HFP) + op_MER uint32 = 0x3C00 // FORMAT_RR MULTIPLY (short to long HFP) + op_MFY uint32 = 0xE35C // FORMAT_RXY1 MULTIPLY (64<-32) + op_MGHI uint32 = 0xA70D // FORMAT_RI1 MULTIPLY HALFWORD IMMEDIATE (64) + op_MH uint32 = 0x4C00 // FORMAT_RX1 MULTIPLY HALFWORD (32) + op_MHI uint32 = 0xA70C // FORMAT_RI1 MULTIPLY HALFWORD IMMEDIATE (32) + op_MHY uint32 = 0xE37C // FORMAT_RXY1 MULTIPLY HALFWORD (32) + op_ML uint32 = 0xE396 // FORMAT_RXY1 MULTIPLY LOGICAL (64<-32) + op_MLG uint32 = 0xE386 // FORMAT_RXY1 MULTIPLY LOGICAL (128<-64) + op_MLGR uint32 = 0xB986 // FORMAT_RRE MULTIPLY LOGICAL (128<-64) + op_MLR uint32 = 0xB996 // FORMAT_RRE MULTIPLY LOGICAL (64<-32) + op_MP uint32 = 0xFC00 // FORMAT_SS2 MULTIPLY DECIMAL + op_MR uint32 = 0x1C00 // FORMAT_RR MULTIPLY (64<-32) + op_MS uint32 = 0x7100 // FORMAT_RX1 MULTIPLY SINGLE (32) + op_MSCH uint32 = 0xB232 // FORMAT_S MODIFY SUBCHANNEL + op_MSD uint32 = 0xED3F // FORMAT_RXF MULTIPLY AND SUBTRACT (long HFP) + op_MSDB uint32 = 0xED1F // FORMAT_RXF MULTIPLY AND SUBTRACT (long BFP) + op_MSDBR uint32 = 0xB31F // FORMAT_RRD MULTIPLY AND SUBTRACT (long BFP) + op_MSDR uint32 = 0xB33F // FORMAT_RRD MULTIPLY AND SUBTRACT (long HFP) + op_MSE uint32 = 0xED2F // FORMAT_RXF MULTIPLY AND SUBTRACT (short HFP) + op_MSEB uint32 = 0xED0F // FORMAT_RXF MULTIPLY AND SUBTRACT (short BFP) + op_MSEBR uint32 = 0xB30F // FORMAT_RRD MULTIPLY AND SUBTRACT (short BFP) + op_MSER uint32 = 0xB32F // FORMAT_RRD MULTIPLY AND SUBTRACT (short HFP) + op_MSFI uint32 = 0xC201 // FORMAT_RIL1 MULTIPLY SINGLE IMMEDIATE (32) + op_MSG uint32 = 0xE30C // FORMAT_RXY1 MULTIPLY SINGLE (64) + op_MSGF uint32 = 0xE31C // FORMAT_RXY1 MULTIPLY SINGLE (64<-32) + op_MSGFI uint32 = 0xC200 // FORMAT_RIL1 MULTIPLY SINGLE IMMEDIATE (64<-32) + op_MSGFR uint32 = 0xB91C // FORMAT_RRE MULTIPLY SINGLE (64<-32) + op_MSGR uint32 = 0xB90C // FORMAT_RRE MULTIPLY SINGLE (64) + op_MSR uint32 = 0xB252 // FORMAT_RRE MULTIPLY SINGLE (32) + op_MSTA uint32 = 0xB247 // FORMAT_RRE MODIFY STACKED STATE + op_MSY uint32 = 0xE351 // FORMAT_RXY1 MULTIPLY SINGLE (32) + op_MVC uint32 = 0xD200 // FORMAT_SS1 MOVE (character) + op_MVCDK uint32 = 0xE50F // FORMAT_SSE MOVE WITH DESTINATION KEY + op_MVCIN uint32 = 0xE800 // FORMAT_SS1 MOVE INVERSE + op_MVCK uint32 = 0xD900 // FORMAT_SS4 MOVE WITH KEY + op_MVCL uint32 = 0x0E00 // FORMAT_RR MOVE LONG + op_MVCLE uint32 = 0xA800 // FORMAT_RS1 MOVE LONG EXTENDED + op_MVCLU uint32 = 0xEB8E // FORMAT_RSY1 MOVE LONG UNICODE + op_MVCOS uint32 = 0xC800 // FORMAT_SSF MOVE WITH OPTIONAL SPECIFICATIONS + op_MVCP uint32 = 0xDA00 // FORMAT_SS4 MOVE TO PRIMARY + op_MVCS uint32 = 0xDB00 // FORMAT_SS4 MOVE TO SECONDARY + op_MVCSK uint32 = 0xE50E // FORMAT_SSE MOVE WITH SOURCE KEY + op_MVGHI uint32 = 0xE548 // FORMAT_SIL MOVE (64<-16) + op_MVHHI uint32 = 0xE544 // FORMAT_SIL MOVE (16<-16) + op_MVHI uint32 = 0xE54C // FORMAT_SIL MOVE (32<-16) + op_MVI uint32 = 0x9200 // FORMAT_SI MOVE (immediate) + op_MVIY uint32 = 0xEB52 // FORMAT_SIY MOVE (immediate) + op_MVN uint32 = 0xD100 // FORMAT_SS1 MOVE NUMERICS + op_MVO uint32 = 0xF100 // FORMAT_SS2 MOVE WITH OFFSET + op_MVPG uint32 = 0xB254 // FORMAT_RRE MOVE PAGE + op_MVST uint32 = 0xB255 // FORMAT_RRE MOVE STRING + op_MVZ uint32 = 0xD300 // FORMAT_SS1 MOVE ZONES + op_MXBR uint32 = 0xB34C // FORMAT_RRE MULTIPLY (extended BFP) + op_MXD uint32 = 0x6700 // FORMAT_RX1 MULTIPLY (long to extended HFP) + op_MXDB uint32 = 0xED07 // FORMAT_RXE MULTIPLY (long to extended BFP) + op_MXDBR uint32 = 0xB307 // FORMAT_RRE MULTIPLY (long to extended BFP) + op_MXDR uint32 = 0x2700 // FORMAT_RR MULTIPLY (long to extended HFP) + op_MXR uint32 = 0x2600 // FORMAT_RR MULTIPLY (extended HFP) + op_MXTR uint32 = 0xB3D8 // FORMAT_RRF1 MULTIPLY (extended DFP) + op_MXTRA uint32 = 0xB3D8 // FORMAT_RRF1 MULTIPLY (extended DFP) + op_MY uint32 = 0xED3B // FORMAT_RXF MULTIPLY UNNORMALIZED (long to ext. HFP) + op_MYH uint32 = 0xED3D // FORMAT_RXF MULTIPLY UNNORM. (long to ext. high HFP) + op_MYHR uint32 = 0xB33D // FORMAT_RRD MULTIPLY UNNORM. (long to ext. high HFP) + op_MYL uint32 = 0xED39 // FORMAT_RXF MULTIPLY UNNORM. (long to ext. low HFP) + op_MYLR uint32 = 0xB339 // FORMAT_RRD MULTIPLY UNNORM. (long to ext. low HFP) + op_MYR uint32 = 0xB33B // FORMAT_RRD MULTIPLY UNNORMALIZED (long to ext. HFP) + op_N uint32 = 0x5400 // FORMAT_RX1 AND (32) + op_NC uint32 = 0xD400 // FORMAT_SS1 AND (character) + op_NG uint32 = 0xE380 // FORMAT_RXY1 AND (64) + op_NGR uint32 = 0xB980 // FORMAT_RRE AND (64) + op_NGRK uint32 = 0xB9E4 // FORMAT_RRF1 AND (64) + op_NI uint32 = 0x9400 // FORMAT_SI AND (immediate) + op_NIAI uint32 = 0xB2FA // FORMAT_IE NEXT INSTRUCTION ACCESS INTENT + op_NIHF uint32 = 0xC00A // FORMAT_RIL1 AND IMMEDIATE (high) + op_NIHH uint32 = 0xA504 // FORMAT_RI1 AND IMMEDIATE (high high) + op_NIHL uint32 = 0xA505 // FORMAT_RI1 AND IMMEDIATE (high low) + op_NILF uint32 = 0xC00B // FORMAT_RIL1 AND IMMEDIATE (low) + op_NILH uint32 = 0xA506 // FORMAT_RI1 AND IMMEDIATE (low high) + op_NILL uint32 = 0xA507 // FORMAT_RI1 AND IMMEDIATE (low low) + op_NIY uint32 = 0xEB54 // FORMAT_SIY AND (immediate) + op_NR uint32 = 0x1400 // FORMAT_RR AND (32) + op_NRK uint32 = 0xB9F4 // FORMAT_RRF1 AND (32) + op_NTSTG uint32 = 0xE325 // FORMAT_RXY1 NONTRANSACTIONAL STORE + op_NY uint32 = 0xE354 // FORMAT_RXY1 AND (32) + op_O uint32 = 0x5600 // FORMAT_RX1 OR (32) + op_OC uint32 = 0xD600 // FORMAT_SS1 OR (character) + op_OG uint32 = 0xE381 // FORMAT_RXY1 OR (64) + op_OGR uint32 = 0xB981 // FORMAT_RRE OR (64) + op_OGRK uint32 = 0xB9E6 // FORMAT_RRF1 OR (64) + op_OI uint32 = 0x9600 // FORMAT_SI OR (immediate) + op_OIHF uint32 = 0xC00C // FORMAT_RIL1 OR IMMEDIATE (high) + op_OIHH uint32 = 0xA508 // FORMAT_RI1 OR IMMEDIATE (high high) + op_OIHL uint32 = 0xA509 // FORMAT_RI1 OR IMMEDIATE (high low) + op_OILF uint32 = 0xC00D // FORMAT_RIL1 OR IMMEDIATE (low) + op_OILH uint32 = 0xA50A // FORMAT_RI1 OR IMMEDIATE (low high) + op_OILL uint32 = 0xA50B // FORMAT_RI1 OR IMMEDIATE (low low) + op_OIY uint32 = 0xEB56 // FORMAT_SIY OR (immediate) + op_OR uint32 = 0x1600 // FORMAT_RR OR (32) + op_ORK uint32 = 0xB9F6 // FORMAT_RRF1 OR (32) + op_OY uint32 = 0xE356 // FORMAT_RXY1 OR (32) + op_PACK uint32 = 0xF200 // FORMAT_SS2 PACK + op_PALB uint32 = 0xB248 // FORMAT_RRE PURGE ALB + op_PC uint32 = 0xB218 // FORMAT_S PROGRAM CALL + op_PCC uint32 = 0xB92C // FORMAT_RRE PERFORM CRYPTOGRAPHIC COMPUTATION + op_PCKMO uint32 = 0xB928 // FORMAT_RRE PERFORM CRYPTOGRAPHIC KEY MGMT. OPERATIONS + op_PFD uint32 = 0xE336 // FORMAT_RXY2 PREFETCH DATA + op_PFDRL uint32 = 0xC602 // FORMAT_RIL3 PREFETCH DATA RELATIVE LONG + op_PFMF uint32 = 0xB9AF // FORMAT_RRE PERFORM FRAME MANAGEMENT FUNCTION + op_PFPO uint32 = 0x010A // FORMAT_E PERFORM FLOATING-POINT OPERATION + op_PGIN uint32 = 0xB22E // FORMAT_RRE PAGE IN + op_PGOUT uint32 = 0xB22F // FORMAT_RRE PAGE OUT + op_PKA uint32 = 0xE900 // FORMAT_SS6 PACK ASCII + op_PKU uint32 = 0xE100 // FORMAT_SS6 PACK UNICODE + op_PLO uint32 = 0xEE00 // FORMAT_SS5 PERFORM LOCKED OPERATION + op_POPCNT uint32 = 0xB9E1 // FORMAT_RRE POPULATION COUNT + op_PPA uint32 = 0xB2E8 // FORMAT_RRF3 PERFORM PROCESSOR ASSIST + op_PR uint32 = 0x0101 // FORMAT_E PROGRAM RETURN + op_PT uint32 = 0xB228 // FORMAT_RRE PROGRAM TRANSFER + op_PTF uint32 = 0xB9A2 // FORMAT_RRE PERFORM TOPOLOGY FUNCTION + op_PTFF uint32 = 0x0104 // FORMAT_E PERFORM TIMING FACILITY FUNCTION + op_PTI uint32 = 0xB99E // FORMAT_RRE PROGRAM TRANSFER WITH INSTANCE + op_PTLB uint32 = 0xB20D // FORMAT_S PURGE TLB + op_QADTR uint32 = 0xB3F5 // FORMAT_RRF2 QUANTIZE (long DFP) + op_QAXTR uint32 = 0xB3FD // FORMAT_RRF2 QUANTIZE (extended DFP) + op_RCHP uint32 = 0xB23B // FORMAT_S RESET CHANNEL PATH + op_RISBG uint32 = 0xEC55 // FORMAT_RIE6 ROTATE THEN INSERT SELECTED BITS + op_RISBGN uint32 = 0xEC59 // FORMAT_RIE6 ROTATE THEN INSERT SELECTED BITS + op_RISBHG uint32 = 0xEC5D // FORMAT_RIE6 ROTATE THEN INSERT SELECTED BITS HIGH + op_RISBLG uint32 = 0xEC51 // FORMAT_RIE6 ROTATE THEN INSERT SELECTED BITS LOW + op_RLL uint32 = 0xEB1D // FORMAT_RSY1 ROTATE LEFT SINGLE LOGICAL (32) + op_RLLG uint32 = 0xEB1C // FORMAT_RSY1 ROTATE LEFT SINGLE LOGICAL (64) + op_RNSBG uint32 = 0xEC54 // FORMAT_RIE6 ROTATE THEN AND SELECTED BITS + op_ROSBG uint32 = 0xEC56 // FORMAT_RIE6 ROTATE THEN OR SELECTED BITS + op_RP uint32 = 0xB277 // FORMAT_S RESUME PROGRAM + op_RRBE uint32 = 0xB22A // FORMAT_RRE RESET REFERENCE BIT EXTENDED + op_RRBM uint32 = 0xB9AE // FORMAT_RRE RESET REFERENCE BITS MULTIPLE + op_RRDTR uint32 = 0xB3F7 // FORMAT_RRF2 REROUND (long DFP) + op_RRXTR uint32 = 0xB3FF // FORMAT_RRF2 REROUND (extended DFP) + op_RSCH uint32 = 0xB238 // FORMAT_S RESUME SUBCHANNEL + op_RXSBG uint32 = 0xEC57 // FORMAT_RIE6 ROTATE THEN EXCLUSIVE OR SELECTED BITS + op_S uint32 = 0x5B00 // FORMAT_RX1 SUBTRACT (32) + op_SAC uint32 = 0xB219 // FORMAT_S SET ADDRESS SPACE CONTROL + op_SACF uint32 = 0xB279 // FORMAT_S SET ADDRESS SPACE CONTROL FAST + op_SAL uint32 = 0xB237 // FORMAT_S SET ADDRESS LIMIT + op_SAM24 uint32 = 0x010C // FORMAT_E SET ADDRESSING MODE (24) + op_SAM31 uint32 = 0x010D // FORMAT_E SET ADDRESSING MODE (31) + op_SAM64 uint32 = 0x010E // FORMAT_E SET ADDRESSING MODE (64) + op_SAR uint32 = 0xB24E // FORMAT_RRE SET ACCESS + op_SCHM uint32 = 0xB23C // FORMAT_S SET CHANNEL MONITOR + op_SCK uint32 = 0xB204 // FORMAT_S SET CLOCK + op_SCKC uint32 = 0xB206 // FORMAT_S SET CLOCK COMPARATOR + op_SCKPF uint32 = 0x0107 // FORMAT_E SET CLOCK PROGRAMMABLE FIELD + op_SD uint32 = 0x6B00 // FORMAT_RX1 SUBTRACT NORMALIZED (long HFP) + op_SDB uint32 = 0xED1B // FORMAT_RXE SUBTRACT (long BFP) + op_SDBR uint32 = 0xB31B // FORMAT_RRE SUBTRACT (long BFP) + op_SDR uint32 = 0x2B00 // FORMAT_RR SUBTRACT NORMALIZED (long HFP) + op_SDTR uint32 = 0xB3D3 // FORMAT_RRF1 SUBTRACT (long DFP) + op_SDTRA uint32 = 0xB3D3 // FORMAT_RRF1 SUBTRACT (long DFP) + op_SE uint32 = 0x7B00 // FORMAT_RX1 SUBTRACT NORMALIZED (short HFP) + op_SEB uint32 = 0xED0B // FORMAT_RXE SUBTRACT (short BFP) + op_SEBR uint32 = 0xB30B // FORMAT_RRE SUBTRACT (short BFP) + op_SER uint32 = 0x3B00 // FORMAT_RR SUBTRACT NORMALIZED (short HFP) + op_SFASR uint32 = 0xB385 // FORMAT_RRE SET FPC AND SIGNAL + op_SFPC uint32 = 0xB384 // FORMAT_RRE SET FPC + op_SG uint32 = 0xE309 // FORMAT_RXY1 SUBTRACT (64) + op_SGF uint32 = 0xE319 // FORMAT_RXY1 SUBTRACT (64<-32) + op_SGFR uint32 = 0xB919 // FORMAT_RRE SUBTRACT (64<-32) + op_SGR uint32 = 0xB909 // FORMAT_RRE SUBTRACT (64) + op_SGRK uint32 = 0xB9E9 // FORMAT_RRF1 SUBTRACT (64) + op_SH uint32 = 0x4B00 // FORMAT_RX1 SUBTRACT HALFWORD + op_SHHHR uint32 = 0xB9C9 // FORMAT_RRF1 SUBTRACT HIGH (32) + op_SHHLR uint32 = 0xB9D9 // FORMAT_RRF1 SUBTRACT HIGH (32) + op_SHY uint32 = 0xE37B // FORMAT_RXY1 SUBTRACT HALFWORD + op_SIGP uint32 = 0xAE00 // FORMAT_RS1 SIGNAL PROCESSOR + op_SL uint32 = 0x5F00 // FORMAT_RX1 SUBTRACT LOGICAL (32) + op_SLA uint32 = 0x8B00 // FORMAT_RS1 SHIFT LEFT SINGLE (32) + op_SLAG uint32 = 0xEB0B // FORMAT_RSY1 SHIFT LEFT SINGLE (64) + op_SLAK uint32 = 0xEBDD // FORMAT_RSY1 SHIFT LEFT SINGLE (32) + op_SLB uint32 = 0xE399 // FORMAT_RXY1 SUBTRACT LOGICAL WITH BORROW (32) + op_SLBG uint32 = 0xE389 // FORMAT_RXY1 SUBTRACT LOGICAL WITH BORROW (64) + op_SLBGR uint32 = 0xB989 // FORMAT_RRE SUBTRACT LOGICAL WITH BORROW (64) + op_SLBR uint32 = 0xB999 // FORMAT_RRE SUBTRACT LOGICAL WITH BORROW (32) + op_SLDA uint32 = 0x8F00 // FORMAT_RS1 SHIFT LEFT DOUBLE + op_SLDL uint32 = 0x8D00 // FORMAT_RS1 SHIFT LEFT DOUBLE LOGICAL + op_SLDT uint32 = 0xED40 // FORMAT_RXF SHIFT SIGNIFICAND LEFT (long DFP) + op_SLFI uint32 = 0xC205 // FORMAT_RIL1 SUBTRACT LOGICAL IMMEDIATE (32) + op_SLG uint32 = 0xE30B // FORMAT_RXY1 SUBTRACT LOGICAL (64) + op_SLGF uint32 = 0xE31B // FORMAT_RXY1 SUBTRACT LOGICAL (64<-32) + op_SLGFI uint32 = 0xC204 // FORMAT_RIL1 SUBTRACT LOGICAL IMMEDIATE (64<-32) + op_SLGFR uint32 = 0xB91B // FORMAT_RRE SUBTRACT LOGICAL (64<-32) + op_SLGR uint32 = 0xB90B // FORMAT_RRE SUBTRACT LOGICAL (64) + op_SLGRK uint32 = 0xB9EB // FORMAT_RRF1 SUBTRACT LOGICAL (64) + op_SLHHHR uint32 = 0xB9CB // FORMAT_RRF1 SUBTRACT LOGICAL HIGH (32) + op_SLHHLR uint32 = 0xB9DB // FORMAT_RRF1 SUBTRACT LOGICAL HIGH (32) + op_SLL uint32 = 0x8900 // FORMAT_RS1 SHIFT LEFT SINGLE LOGICAL (32) + op_SLLG uint32 = 0xEB0D // FORMAT_RSY1 SHIFT LEFT SINGLE LOGICAL (64) + op_SLLK uint32 = 0xEBDF // FORMAT_RSY1 SHIFT LEFT SINGLE LOGICAL (32) + op_SLR uint32 = 0x1F00 // FORMAT_RR SUBTRACT LOGICAL (32) + op_SLRK uint32 = 0xB9FB // FORMAT_RRF1 SUBTRACT LOGICAL (32) + op_SLXT uint32 = 0xED48 // FORMAT_RXF SHIFT SIGNIFICAND LEFT (extended DFP) + op_SLY uint32 = 0xE35F // FORMAT_RXY1 SUBTRACT LOGICAL (32) + op_SP uint32 = 0xFB00 // FORMAT_SS2 SUBTRACT DECIMAL + op_SPKA uint32 = 0xB20A // FORMAT_S SET PSW KEY FROM ADDRESS + op_SPM uint32 = 0x0400 // FORMAT_RR SET PROGRAM MASK + op_SPT uint32 = 0xB208 // FORMAT_S SET CPU TIMER + op_SPX uint32 = 0xB210 // FORMAT_S SET PREFIX + op_SQD uint32 = 0xED35 // FORMAT_RXE SQUARE ROOT (long HFP) + op_SQDB uint32 = 0xED15 // FORMAT_RXE SQUARE ROOT (long BFP) + op_SQDBR uint32 = 0xB315 // FORMAT_RRE SQUARE ROOT (long BFP) + op_SQDR uint32 = 0xB244 // FORMAT_RRE SQUARE ROOT (long HFP) + op_SQE uint32 = 0xED34 // FORMAT_RXE SQUARE ROOT (short HFP) + op_SQEB uint32 = 0xED14 // FORMAT_RXE SQUARE ROOT (short BFP) + op_SQEBR uint32 = 0xB314 // FORMAT_RRE SQUARE ROOT (short BFP) + op_SQER uint32 = 0xB245 // FORMAT_RRE SQUARE ROOT (short HFP) + op_SQXBR uint32 = 0xB316 // FORMAT_RRE SQUARE ROOT (extended BFP) + op_SQXR uint32 = 0xB336 // FORMAT_RRE SQUARE ROOT (extended HFP) + op_SR uint32 = 0x1B00 // FORMAT_RR SUBTRACT (32) + op_SRA uint32 = 0x8A00 // FORMAT_RS1 SHIFT RIGHT SINGLE (32) + op_SRAG uint32 = 0xEB0A // FORMAT_RSY1 SHIFT RIGHT SINGLE (64) + op_SRAK uint32 = 0xEBDC // FORMAT_RSY1 SHIFT RIGHT SINGLE (32) + op_SRDA uint32 = 0x8E00 // FORMAT_RS1 SHIFT RIGHT DOUBLE + op_SRDL uint32 = 0x8C00 // FORMAT_RS1 SHIFT RIGHT DOUBLE LOGICAL + op_SRDT uint32 = 0xED41 // FORMAT_RXF SHIFT SIGNIFICAND RIGHT (long DFP) + op_SRK uint32 = 0xB9F9 // FORMAT_RRF1 SUBTRACT (32) + op_SRL uint32 = 0x8800 // FORMAT_RS1 SHIFT RIGHT SINGLE LOGICAL (32) + op_SRLG uint32 = 0xEB0C // FORMAT_RSY1 SHIFT RIGHT SINGLE LOGICAL (64) + op_SRLK uint32 = 0xEBDE // FORMAT_RSY1 SHIFT RIGHT SINGLE LOGICAL (32) + op_SRNM uint32 = 0xB299 // FORMAT_S SET BFP ROUNDING MODE (2 bit) + op_SRNMB uint32 = 0xB2B8 // FORMAT_S SET BFP ROUNDING MODE (3 bit) + op_SRNMT uint32 = 0xB2B9 // FORMAT_S SET DFP ROUNDING MODE + op_SRP uint32 = 0xF000 // FORMAT_SS3 SHIFT AND ROUND DECIMAL + op_SRST uint32 = 0xB25E // FORMAT_RRE SEARCH STRING + op_SRSTU uint32 = 0xB9BE // FORMAT_RRE SEARCH STRING UNICODE + op_SRXT uint32 = 0xED49 // FORMAT_RXF SHIFT SIGNIFICAND RIGHT (extended DFP) + op_SSAIR uint32 = 0xB99F // FORMAT_RRE SET SECONDARY ASN WITH INSTANCE + op_SSAR uint32 = 0xB225 // FORMAT_RRE SET SECONDARY ASN + op_SSCH uint32 = 0xB233 // FORMAT_S START SUBCHANNEL + op_SSKE uint32 = 0xB22B // FORMAT_RRF3 SET STORAGE KEY EXTENDED + op_SSM uint32 = 0x8000 // FORMAT_S SET SYSTEM MASK + op_ST uint32 = 0x5000 // FORMAT_RX1 STORE (32) + op_STAM uint32 = 0x9B00 // FORMAT_RS1 STORE ACCESS MULTIPLE + op_STAMY uint32 = 0xEB9B // FORMAT_RSY1 STORE ACCESS MULTIPLE + op_STAP uint32 = 0xB212 // FORMAT_S STORE CPU ADDRESS + op_STC uint32 = 0x4200 // FORMAT_RX1 STORE CHARACTER + op_STCH uint32 = 0xE3C3 // FORMAT_RXY1 STORE CHARACTER HIGH (8) + op_STCK uint32 = 0xB205 // FORMAT_S STORE CLOCK + op_STCKC uint32 = 0xB207 // FORMAT_S STORE CLOCK COMPARATOR + op_STCKE uint32 = 0xB278 // FORMAT_S STORE CLOCK EXTENDED + op_STCKF uint32 = 0xB27C // FORMAT_S STORE CLOCK FAST + op_STCM uint32 = 0xBE00 // FORMAT_RS2 STORE CHARACTERS UNDER MASK (low) + op_STCMH uint32 = 0xEB2C // FORMAT_RSY2 STORE CHARACTERS UNDER MASK (high) + op_STCMY uint32 = 0xEB2D // FORMAT_RSY2 STORE CHARACTERS UNDER MASK (low) + op_STCPS uint32 = 0xB23A // FORMAT_S STORE CHANNEL PATH STATUS + op_STCRW uint32 = 0xB239 // FORMAT_S STORE CHANNEL REPORT WORD + op_STCTG uint32 = 0xEB25 // FORMAT_RSY1 STORE CONTROL (64) + op_STCTL uint32 = 0xB600 // FORMAT_RS1 STORE CONTROL (32) + op_STCY uint32 = 0xE372 // FORMAT_RXY1 STORE CHARACTER + op_STD uint32 = 0x6000 // FORMAT_RX1 STORE (long) + op_STDY uint32 = 0xED67 // FORMAT_RXY1 STORE (long) + op_STE uint32 = 0x7000 // FORMAT_RX1 STORE (short) + op_STEY uint32 = 0xED66 // FORMAT_RXY1 STORE (short) + op_STFH uint32 = 0xE3CB // FORMAT_RXY1 STORE HIGH (32) + op_STFL uint32 = 0xB2B1 // FORMAT_S STORE FACILITY LIST + op_STFLE uint32 = 0xB2B0 // FORMAT_S STORE FACILITY LIST EXTENDED + op_STFPC uint32 = 0xB29C // FORMAT_S STORE FPC + op_STG uint32 = 0xE324 // FORMAT_RXY1 STORE (64) + op_STGRL uint32 = 0xC40B // FORMAT_RIL2 STORE RELATIVE LONG (64) + op_STH uint32 = 0x4000 // FORMAT_RX1 STORE HALFWORD + op_STHH uint32 = 0xE3C7 // FORMAT_RXY1 STORE HALFWORD HIGH (16) + op_STHRL uint32 = 0xC407 // FORMAT_RIL2 STORE HALFWORD RELATIVE LONG + op_STHY uint32 = 0xE370 // FORMAT_RXY1 STORE HALFWORD + op_STIDP uint32 = 0xB202 // FORMAT_S STORE CPU ID + op_STM uint32 = 0x9000 // FORMAT_RS1 STORE MULTIPLE (32) + op_STMG uint32 = 0xEB24 // FORMAT_RSY1 STORE MULTIPLE (64) + op_STMH uint32 = 0xEB26 // FORMAT_RSY1 STORE MULTIPLE HIGH + op_STMY uint32 = 0xEB90 // FORMAT_RSY1 STORE MULTIPLE (32) + op_STNSM uint32 = 0xAC00 // FORMAT_SI STORE THEN AND SYSTEM MASK + op_STOC uint32 = 0xEBF3 // FORMAT_RSY2 STORE ON CONDITION (32) + op_STOCG uint32 = 0xEBE3 // FORMAT_RSY2 STORE ON CONDITION (64) + op_STOSM uint32 = 0xAD00 // FORMAT_SI STORE THEN OR SYSTEM MASK + op_STPQ uint32 = 0xE38E // FORMAT_RXY1 STORE PAIR TO QUADWORD + op_STPT uint32 = 0xB209 // FORMAT_S STORE CPU TIMER + op_STPX uint32 = 0xB211 // FORMAT_S STORE PREFIX + op_STRAG uint32 = 0xE502 // FORMAT_SSE STORE REAL ADDRESS + op_STRL uint32 = 0xC40F // FORMAT_RIL2 STORE RELATIVE LONG (32) + op_STRV uint32 = 0xE33E // FORMAT_RXY1 STORE REVERSED (32) + op_STRVG uint32 = 0xE32F // FORMAT_RXY1 STORE REVERSED (64) + op_STRVH uint32 = 0xE33F // FORMAT_RXY1 STORE REVERSED (16) + op_STSCH uint32 = 0xB234 // FORMAT_S STORE SUBCHANNEL + op_STSI uint32 = 0xB27D // FORMAT_S STORE SYSTEM INFORMATION + op_STURA uint32 = 0xB246 // FORMAT_RRE STORE USING REAL ADDRESS (32) + op_STURG uint32 = 0xB925 // FORMAT_RRE STORE USING REAL ADDRESS (64) + op_STY uint32 = 0xE350 // FORMAT_RXY1 STORE (32) + op_SU uint32 = 0x7F00 // FORMAT_RX1 SUBTRACT UNNORMALIZED (short HFP) + op_SUR uint32 = 0x3F00 // FORMAT_RR SUBTRACT UNNORMALIZED (short HFP) + op_SVC uint32 = 0x0A00 // FORMAT_I SUPERVISOR CALL + op_SW uint32 = 0x6F00 // FORMAT_RX1 SUBTRACT UNNORMALIZED (long HFP) + op_SWR uint32 = 0x2F00 // FORMAT_RR SUBTRACT UNNORMALIZED (long HFP) + op_SXBR uint32 = 0xB34B // FORMAT_RRE SUBTRACT (extended BFP) + op_SXR uint32 = 0x3700 // FORMAT_RR SUBTRACT NORMALIZED (extended HFP) + op_SXTR uint32 = 0xB3DB // FORMAT_RRF1 SUBTRACT (extended DFP) + op_SXTRA uint32 = 0xB3DB // FORMAT_RRF1 SUBTRACT (extended DFP) + op_SY uint32 = 0xE35B // FORMAT_RXY1 SUBTRACT (32) + op_TABORT uint32 = 0xB2FC // FORMAT_S TRANSACTION ABORT + op_TAM uint32 = 0x010B // FORMAT_E TEST ADDRESSING MODE + op_TAR uint32 = 0xB24C // FORMAT_RRE TEST ACCESS + op_TB uint32 = 0xB22C // FORMAT_RRE TEST BLOCK + op_TBDR uint32 = 0xB351 // FORMAT_RRF5 CONVERT HFP TO BFP (long) + op_TBEDR uint32 = 0xB350 // FORMAT_RRF5 CONVERT HFP TO BFP (long to short) + op_TBEGIN uint32 = 0xE560 // FORMAT_SIL TRANSACTION BEGIN + op_TBEGINC uint32 = 0xE561 // FORMAT_SIL TRANSACTION BEGIN + op_TCDB uint32 = 0xED11 // FORMAT_RXE TEST DATA CLASS (long BFP) + op_TCEB uint32 = 0xED10 // FORMAT_RXE TEST DATA CLASS (short BFP) + op_TCXB uint32 = 0xED12 // FORMAT_RXE TEST DATA CLASS (extended BFP) + op_TDCDT uint32 = 0xED54 // FORMAT_RXE TEST DATA CLASS (long DFP) + op_TDCET uint32 = 0xED50 // FORMAT_RXE TEST DATA CLASS (short DFP) + op_TDCXT uint32 = 0xED58 // FORMAT_RXE TEST DATA CLASS (extended DFP) + op_TDGDT uint32 = 0xED55 // FORMAT_RXE TEST DATA GROUP (long DFP) + op_TDGET uint32 = 0xED51 // FORMAT_RXE TEST DATA GROUP (short DFP) + op_TDGXT uint32 = 0xED59 // FORMAT_RXE TEST DATA GROUP (extended DFP) + op_TEND uint32 = 0xB2F8 // FORMAT_S TRANSACTION END + op_THDER uint32 = 0xB358 // FORMAT_RRE CONVERT BFP TO HFP (short to long) + op_THDR uint32 = 0xB359 // FORMAT_RRE CONVERT BFP TO HFP (long) + op_TM uint32 = 0x9100 // FORMAT_SI TEST UNDER MASK + op_TMH uint32 = 0xA700 // FORMAT_RI1 TEST UNDER MASK HIGH + op_TMHH uint32 = 0xA702 // FORMAT_RI1 TEST UNDER MASK (high high) + op_TMHL uint32 = 0xA703 // FORMAT_RI1 TEST UNDER MASK (high low) + op_TML uint32 = 0xA701 // FORMAT_RI1 TEST UNDER MASK LOW + op_TMLH uint32 = 0xA700 // FORMAT_RI1 TEST UNDER MASK (low high) + op_TMLL uint32 = 0xA701 // FORMAT_RI1 TEST UNDER MASK (low low) + op_TMY uint32 = 0xEB51 // FORMAT_SIY TEST UNDER MASK + op_TP uint32 = 0xEBC0 // FORMAT_RSL TEST DECIMAL + op_TPI uint32 = 0xB236 // FORMAT_S TEST PENDING INTERRUPTION + op_TPROT uint32 = 0xE501 // FORMAT_SSE TEST PROTECTION + op_TR uint32 = 0xDC00 // FORMAT_SS1 TRANSLATE + op_TRACE uint32 = 0x9900 // FORMAT_RS1 TRACE (32) + op_TRACG uint32 = 0xEB0F // FORMAT_RSY1 TRACE (64) + op_TRAP2 uint32 = 0x01FF // FORMAT_E TRAP + op_TRAP4 uint32 = 0xB2FF // FORMAT_S TRAP + op_TRE uint32 = 0xB2A5 // FORMAT_RRE TRANSLATE EXTENDED + op_TROO uint32 = 0xB993 // FORMAT_RRF3 TRANSLATE ONE TO ONE + op_TROT uint32 = 0xB992 // FORMAT_RRF3 TRANSLATE ONE TO TWO + op_TRT uint32 = 0xDD00 // FORMAT_SS1 TRANSLATE AND TEST + op_TRTE uint32 = 0xB9BF // FORMAT_RRF3 TRANSLATE AND TEST EXTENDED + op_TRTO uint32 = 0xB991 // FORMAT_RRF3 TRANSLATE TWO TO ONE + op_TRTR uint32 = 0xD000 // FORMAT_SS1 TRANSLATE AND TEST REVERSE + op_TRTRE uint32 = 0xB9BD // FORMAT_RRF3 TRANSLATE AND TEST REVERSE EXTENDED + op_TRTT uint32 = 0xB990 // FORMAT_RRF3 TRANSLATE TWO TO TWO + op_TS uint32 = 0x9300 // FORMAT_S TEST AND SET + op_TSCH uint32 = 0xB235 // FORMAT_S TEST SUBCHANNEL + op_UNPK uint32 = 0xF300 // FORMAT_SS2 UNPACK + op_UNPKA uint32 = 0xEA00 // FORMAT_SS1 UNPACK ASCII + op_UNPKU uint32 = 0xE200 // FORMAT_SS1 UNPACK UNICODE + op_UPT uint32 = 0x0102 // FORMAT_E UPDATE TREE + op_X uint32 = 0x5700 // FORMAT_RX1 EXCLUSIVE OR (32) + op_XC uint32 = 0xD700 // FORMAT_SS1 EXCLUSIVE OR (character) + op_XG uint32 = 0xE382 // FORMAT_RXY1 EXCLUSIVE OR (64) + op_XGR uint32 = 0xB982 // FORMAT_RRE EXCLUSIVE OR (64) + op_XGRK uint32 = 0xB9E7 // FORMAT_RRF1 EXCLUSIVE OR (64) + op_XI uint32 = 0x9700 // FORMAT_SI EXCLUSIVE OR (immediate) + op_XIHF uint32 = 0xC006 // FORMAT_RIL1 EXCLUSIVE OR IMMEDIATE (high) + op_XILF uint32 = 0xC007 // FORMAT_RIL1 EXCLUSIVE OR IMMEDIATE (low) + op_XIY uint32 = 0xEB57 // FORMAT_SIY EXCLUSIVE OR (immediate) + op_XR uint32 = 0x1700 // FORMAT_RR EXCLUSIVE OR (32) + op_XRK uint32 = 0xB9F7 // FORMAT_RRF1 EXCLUSIVE OR (32) + op_XSCH uint32 = 0xB276 // FORMAT_S CANCEL SUBCHANNEL + op_XY uint32 = 0xE357 // FORMAT_RXY1 EXCLUSIVE OR (32) + op_ZAP uint32 = 0xF800 // FORMAT_SS2 ZERO AND ADD + + // added in z13 + op_CXPT uint32 = 0xEDAF // RSL-b CONVERT FROM PACKED (to extended DFP) + op_CDPT uint32 = 0xEDAE // RSL-b CONVERT FROM PACKED (to long DFP) + op_CPXT uint32 = 0xEDAD // RSL-b CONVERT TO PACKED (from extended DFP) + op_CPDT uint32 = 0xEDAC // RSL-b CONVERT TO PACKED (from long DFP) + op_LZRF uint32 = 0xE33B // RXY-a LOAD AND ZERO RIGHTMOST BYTE (32) + op_LZRG uint32 = 0xE32A // RXY-a LOAD AND ZERO RIGHTMOST BYTE (64) + op_LCCB uint32 = 0xE727 // RXE LOAD COUNT TO BLOCK BOUNDARY + op_LOCHHI uint32 = 0xEC4E // RIE-g LOAD HALFWORD HIGH IMMEDIATE ON CONDITION (32←16) + op_LOCHI uint32 = 0xEC42 // RIE-g LOAD HALFWORD IMMEDIATE ON CONDITION (32←16) + op_LOCGHI uint32 = 0xEC46 // RIE-g LOAD HALFWORD IMMEDIATE ON CONDITION (64←16) + op_LOCFH uint32 = 0xEBE0 // RSY-b LOAD HIGH ON CONDITION (32) + op_LOCFHR uint32 = 0xB9E0 // RRF-c LOAD HIGH ON CONDITION (32) + op_LLZRGF uint32 = 0xE33A // RXY-a LOAD LOGICAL AND ZERO RIGHTMOST BYTE (64←32) + op_STOCFH uint32 = 0xEBE1 // RSY-b STORE HIGH ON CONDITION + op_VA uint32 = 0xE7F3 // VRR-c VECTOR ADD + op_VACC uint32 = 0xE7F1 // VRR-c VECTOR ADD COMPUTE CARRY + op_VAC uint32 = 0xE7BB // VRR-d VECTOR ADD WITH CARRY + op_VACCC uint32 = 0xE7B9 // VRR-d VECTOR ADD WITH CARRY COMPUTE CARRY + op_VN uint32 = 0xE768 // VRR-c VECTOR AND + op_VNC uint32 = 0xE769 // VRR-c VECTOR AND WITH COMPLEMENT + op_VAVG uint32 = 0xE7F2 // VRR-c VECTOR AVERAGE + op_VAVGL uint32 = 0xE7F0 // VRR-c VECTOR AVERAGE LOGICAL + op_VCKSM uint32 = 0xE766 // VRR-c VECTOR CHECKSUM + op_VCEQ uint32 = 0xE7F8 // VRR-b VECTOR COMPARE EQUAL + op_VCH uint32 = 0xE7FB // VRR-b VECTOR COMPARE HIGH + op_VCHL uint32 = 0xE7F9 // VRR-b VECTOR COMPARE HIGH LOGICAL + op_VCLZ uint32 = 0xE753 // VRR-a VECTOR COUNT LEADING ZEROS + op_VCTZ uint32 = 0xE752 // VRR-a VECTOR COUNT TRAILING ZEROS + op_VEC uint32 = 0xE7DB // VRR-a VECTOR ELEMENT COMPARE + op_VECL uint32 = 0xE7D9 // VRR-a VECTOR ELEMENT COMPARE LOGICAL + op_VERIM uint32 = 0xE772 // VRI-d VECTOR ELEMENT ROTATE AND INSERT UNDER MASK + op_VERLL uint32 = 0xE733 // VRS-a VECTOR ELEMENT ROTATE LEFT LOGICAL + op_VERLLV uint32 = 0xE773 // VRR-c VECTOR ELEMENT ROTATE LEFT LOGICAL + op_VESLV uint32 = 0xE770 // VRR-c VECTOR ELEMENT SHIFT LEFT + op_VESL uint32 = 0xE730 // VRS-a VECTOR ELEMENT SHIFT LEFT + op_VESRA uint32 = 0xE73A // VRS-a VECTOR ELEMENT SHIFT RIGHT ARITHMETIC + op_VESRAV uint32 = 0xE77A // VRR-c VECTOR ELEMENT SHIFT RIGHT ARITHMETIC + op_VESRL uint32 = 0xE738 // VRS-a VECTOR ELEMENT SHIFT RIGHT LOGICAL + op_VESRLV uint32 = 0xE778 // VRR-c VECTOR ELEMENT SHIFT RIGHT LOGICAL + op_VX uint32 = 0xE76D // VRR-c VECTOR EXCLUSIVE OR + op_VFAE uint32 = 0xE782 // VRR-b VECTOR FIND ANY ELEMENT EQUAL + op_VFEE uint32 = 0xE780 // VRR-b VECTOR FIND ELEMENT EQUAL + op_VFENE uint32 = 0xE781 // VRR-b VECTOR FIND ELEMENT NOT EQUAL + op_VFA uint32 = 0xE7E3 // VRR-c VECTOR FP ADD + op_WFK uint32 = 0xE7CA // VRR-a VECTOR FP COMPARE AND SIGNAL SCALAR + op_VFCE uint32 = 0xE7E8 // VRR-c VECTOR FP COMPARE EQUAL + op_VFCH uint32 = 0xE7EB // VRR-c VECTOR FP COMPARE HIGH + op_VFCHE uint32 = 0xE7EA // VRR-c VECTOR FP COMPARE HIGH OR EQUAL + op_WFC uint32 = 0xE7CB // VRR-a VECTOR FP COMPARE SCALAR + op_VCDG uint32 = 0xE7C3 // VRR-a VECTOR FP CONVERT FROM FIXED 64-BIT + op_VCDLG uint32 = 0xE7C1 // VRR-a VECTOR FP CONVERT FROM LOGICAL 64-BIT + op_VCGD uint32 = 0xE7C2 // VRR-a VECTOR FP CONVERT TO FIXED 64-BIT + op_VCLGD uint32 = 0xE7C0 // VRR-a VECTOR FP CONVERT TO LOGICAL 64-BIT + op_VFD uint32 = 0xE7E5 // VRR-c VECTOR FP DIVIDE + op_VLDE uint32 = 0xE7C4 // VRR-a VECTOR FP LOAD LENGTHENED + op_VLED uint32 = 0xE7C5 // VRR-a VECTOR FP LOAD ROUNDED + op_VFM uint32 = 0xE7E7 // VRR-c VECTOR FP MULTIPLY + op_VFMA uint32 = 0xE78F // VRR-e VECTOR FP MULTIPLY AND ADD + op_VFMS uint32 = 0xE78E // VRR-e VECTOR FP MULTIPLY AND SUBTRACT + op_VFPSO uint32 = 0xE7CC // VRR-a VECTOR FP PERFORM SIGN OPERATION + op_VFSQ uint32 = 0xE7CE // VRR-a VECTOR FP SQUARE ROOT + op_VFS uint32 = 0xE7E2 // VRR-c VECTOR FP SUBTRACT + op_VFTCI uint32 = 0xE74A // VRI-e VECTOR FP TEST DATA CLASS IMMEDIATE + op_VGFM uint32 = 0xE7B4 // VRR-c VECTOR GALOIS FIELD MULTIPLY SUM + op_VGFMA uint32 = 0xE7BC // VRR-d VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE + op_VGEF uint32 = 0xE713 // VRV VECTOR GATHER ELEMENT (32) + op_VGEG uint32 = 0xE712 // VRV VECTOR GATHER ELEMENT (64) + op_VGBM uint32 = 0xE744 // VRI-a VECTOR GENERATE BYTE MASK + op_VGM uint32 = 0xE746 // VRI-b VECTOR GENERATE MASK + op_VISTR uint32 = 0xE75C // VRR-a VECTOR ISOLATE STRING + op_VL uint32 = 0xE706 // VRX VECTOR LOAD + op_VLR uint32 = 0xE756 // VRR-a VECTOR LOAD + op_VLREP uint32 = 0xE705 // VRX VECTOR LOAD AND REPLICATE + op_VLC uint32 = 0xE7DE // VRR-a VECTOR LOAD COMPLEMENT + op_VLEH uint32 = 0xE701 // VRX VECTOR LOAD ELEMENT (16) + op_VLEF uint32 = 0xE703 // VRX VECTOR LOAD ELEMENT (32) + op_VLEG uint32 = 0xE702 // VRX VECTOR LOAD ELEMENT (64) + op_VLEB uint32 = 0xE700 // VRX VECTOR LOAD ELEMENT (8) + op_VLEIH uint32 = 0xE741 // VRI-a VECTOR LOAD ELEMENT IMMEDIATE (16) + op_VLEIF uint32 = 0xE743 // VRI-a VECTOR LOAD ELEMENT IMMEDIATE (32) + op_VLEIG uint32 = 0xE742 // VRI-a VECTOR LOAD ELEMENT IMMEDIATE (64) + op_VLEIB uint32 = 0xE740 // VRI-a VECTOR LOAD ELEMENT IMMEDIATE (8) + op_VFI uint32 = 0xE7C7 // VRR-a VECTOR LOAD FP INTEGER + op_VLGV uint32 = 0xE721 // VRS-c VECTOR LOAD GR FROM VR ELEMENT + op_VLLEZ uint32 = 0xE704 // VRX VECTOR LOAD LOGICAL ELEMENT AND ZERO + op_VLM uint32 = 0xE736 // VRS-a VECTOR LOAD MULTIPLE + op_VLP uint32 = 0xE7DF // VRR-a VECTOR LOAD POSITIVE + op_VLBB uint32 = 0xE707 // VRX VECTOR LOAD TO BLOCK BOUNDARY + op_VLVG uint32 = 0xE722 // VRS-b VECTOR LOAD VR ELEMENT FROM GR + op_VLVGP uint32 = 0xE762 // VRR-f VECTOR LOAD VR FROM GRS DISJOINT + op_VLL uint32 = 0xE737 // VRS-b VECTOR LOAD WITH LENGTH + op_VMX uint32 = 0xE7FF // VRR-c VECTOR MAXIMUM + op_VMXL uint32 = 0xE7FD // VRR-c VECTOR MAXIMUM LOGICAL + op_VMRH uint32 = 0xE761 // VRR-c VECTOR MERGE HIGH + op_VMRL uint32 = 0xE760 // VRR-c VECTOR MERGE LOW + op_VMN uint32 = 0xE7FE // VRR-c VECTOR MINIMUM + op_VMNL uint32 = 0xE7FC // VRR-c VECTOR MINIMUM LOGICAL + op_VMAE uint32 = 0xE7AE // VRR-d VECTOR MULTIPLY AND ADD EVEN + op_VMAH uint32 = 0xE7AB // VRR-d VECTOR MULTIPLY AND ADD HIGH + op_VMALE uint32 = 0xE7AC // VRR-d VECTOR MULTIPLY AND ADD LOGICAL EVEN + op_VMALH uint32 = 0xE7A9 // VRR-d VECTOR MULTIPLY AND ADD LOGICAL HIGH + op_VMALO uint32 = 0xE7AD // VRR-d VECTOR MULTIPLY AND ADD LOGICAL ODD + op_VMAL uint32 = 0xE7AA // VRR-d VECTOR MULTIPLY AND ADD LOW + op_VMAO uint32 = 0xE7AF // VRR-d VECTOR MULTIPLY AND ADD ODD + op_VME uint32 = 0xE7A6 // VRR-c VECTOR MULTIPLY EVEN + op_VMH uint32 = 0xE7A3 // VRR-c VECTOR MULTIPLY HIGH + op_VMLE uint32 = 0xE7A4 // VRR-c VECTOR MULTIPLY EVEN LOGICAL + op_VMLH uint32 = 0xE7A1 // VRR-c VECTOR MULTIPLY HIGH LOGICAL + op_VMLO uint32 = 0xE7A5 // VRR-c VECTOR MULTIPLY ODD LOGICAL + op_VML uint32 = 0xE7A2 // VRR-c VECTOR MULTIPLY LOW + op_VMO uint32 = 0xE7A7 // VRR-c VECTOR MULTIPLY ODD + op_VNO uint32 = 0xE76B // VRR-c VECTOR NOR + op_VO uint32 = 0xE76A // VRR-c VECTOR OR + op_VPK uint32 = 0xE794 // VRR-c VECTOR PACK + op_VPKLS uint32 = 0xE795 // VRR-b VECTOR PACK LOGICAL SATURATE + op_VPKS uint32 = 0xE797 // VRR-b VECTOR PACK SATURATE + op_VPERM uint32 = 0xE78C // VRR-e VECTOR PERMUTE + op_VPDI uint32 = 0xE784 // VRR-c VECTOR PERMUTE DOUBLEWORD IMMEDIATE + op_VPOPCT uint32 = 0xE750 // VRR-a VECTOR POPULATION COUNT + op_VREP uint32 = 0xE74D // VRI-c VECTOR REPLICATE + op_VREPI uint32 = 0xE745 // VRI-a VECTOR REPLICATE IMMEDIATE + op_VSCEF uint32 = 0xE71B // VRV VECTOR SCATTER ELEMENT (32) + op_VSCEG uint32 = 0xE71A // VRV VECTOR SCATTER ELEMENT (64) + op_VSEL uint32 = 0xE78D // VRR-e VECTOR SELECT + op_VSL uint32 = 0xE774 // VRR-c VECTOR SHIFT LEFT + op_VSLB uint32 = 0xE775 // VRR-c VECTOR SHIFT LEFT BY BYTE + op_VSLDB uint32 = 0xE777 // VRI-d VECTOR SHIFT LEFT DOUBLE BY BYTE + op_VSRA uint32 = 0xE77E // VRR-c VECTOR SHIFT RIGHT ARITHMETIC + op_VSRAB uint32 = 0xE77F // VRR-c VECTOR SHIFT RIGHT ARITHMETIC BY BYTE + op_VSRL uint32 = 0xE77C // VRR-c VECTOR SHIFT RIGHT LOGICAL + op_VSRLB uint32 = 0xE77D // VRR-c VECTOR SHIFT RIGHT LOGICAL BY BYTE + op_VSEG uint32 = 0xE75F // VRR-a VECTOR SIGN EXTEND TO DOUBLEWORD + op_VST uint32 = 0xE70E // VRX VECTOR STORE + op_VSTEH uint32 = 0xE709 // VRX VECTOR STORE ELEMENT (16) + op_VSTEF uint32 = 0xE70B // VRX VECTOR STORE ELEMENT (32) + op_VSTEG uint32 = 0xE70A // VRX VECTOR STORE ELEMENT (64) + op_VSTEB uint32 = 0xE708 // VRX VECTOR STORE ELEMENT (8) + op_VSTM uint32 = 0xE73E // VRS-a VECTOR STORE MULTIPLE + op_VSTL uint32 = 0xE73F // VRS-b VECTOR STORE WITH LENGTH + op_VSTRC uint32 = 0xE78A // VRR-d VECTOR STRING RANGE COMPARE + op_VS uint32 = 0xE7F7 // VRR-c VECTOR SUBTRACT + op_VSCBI uint32 = 0xE7F5 // VRR-c VECTOR SUBTRACT COMPUTE BORROW INDICATION + op_VSBCBI uint32 = 0xE7BD // VRR-d VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION + op_VSBI uint32 = 0xE7BF // VRR-d VECTOR SUBTRACT WITH BORROW INDICATION + op_VSUMG uint32 = 0xE765 // VRR-c VECTOR SUM ACROSS DOUBLEWORD + op_VSUMQ uint32 = 0xE767 // VRR-c VECTOR SUM ACROSS QUADWORD + op_VSUM uint32 = 0xE764 // VRR-c VECTOR SUM ACROSS WORD + op_VTM uint32 = 0xE7D8 // VRR-a VECTOR TEST UNDER MASK + op_VUPH uint32 = 0xE7D7 // VRR-a VECTOR UNPACK HIGH + op_VUPLH uint32 = 0xE7D5 // VRR-a VECTOR UNPACK LOGICAL HIGH + op_VUPLL uint32 = 0xE7D4 // VRR-a VECTOR UNPACK LOGICAL LOW + op_VUPL uint32 = 0xE7D6 // VRR-a VECTOR UNPACK LOW + op_VMSL uint32 = 0xE7B8 // VRR-d VECTOR MULTIPLY SUM LOGICAL +) + +func oclass(a *obj.Addr) int { + return int(a.Class) - 1 +} + +// Add a relocation for the immediate in a RIL style instruction. +// The addend will be adjusted as required. +func (c *ctxtz) addrilreloc(sym *obj.LSym, add int64) *obj.Reloc { + if sym == nil { + c.ctxt.Diag("require symbol to apply relocation") + } + offset := int64(2) // relocation offset from start of instruction + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc + offset) + rel.Siz = 4 + rel.Sym = sym + rel.Add = add + offset + int64(rel.Siz) + rel.Type = objabi.R_PCRELDBL + return rel +} + +func (c *ctxtz) addrilrelocoffset(sym *obj.LSym, add, offset int64) *obj.Reloc { + if sym == nil { + c.ctxt.Diag("require symbol to apply relocation") + } + offset += int64(2) // relocation offset from start of instruction + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc + offset) + rel.Siz = 4 + rel.Sym = sym + rel.Add = add + offset + int64(rel.Siz) + rel.Type = objabi.R_PCRELDBL + return rel +} + +// Add a CALL relocation for the immediate in a RIL style instruction. +// The addend will be adjusted as required. +func (c *ctxtz) addcallreloc(sym *obj.LSym, add int64) *obj.Reloc { + if sym == nil { + c.ctxt.Diag("require symbol to apply relocation") + } + offset := int64(2) // relocation offset from start of instruction + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc + offset) + rel.Siz = 4 + rel.Sym = sym + rel.Add = add + offset + int64(rel.Siz) + rel.Type = objabi.R_CALL + return rel +} + +func (c *ctxtz) branchMask(p *obj.Prog) CCMask { + switch p.As { + case ABRC, ALOCR, ALOCGR, + ACRJ, ACGRJ, ACIJ, ACGIJ, + ACLRJ, ACLGRJ, ACLIJ, ACLGIJ: + return CCMask(p.From.Offset) + case ABEQ, ACMPBEQ, ACMPUBEQ, AMOVDEQ: + return Equal + case ABGE, ACMPBGE, ACMPUBGE, AMOVDGE: + return GreaterOrEqual + case ABGT, ACMPBGT, ACMPUBGT, AMOVDGT: + return Greater + case ABLE, ACMPBLE, ACMPUBLE, AMOVDLE: + return LessOrEqual + case ABLT, ACMPBLT, ACMPUBLT, AMOVDLT: + return Less + case ABNE, ACMPBNE, ACMPUBNE, AMOVDNE: + return NotEqual + case ABLEU: // LE or unordered + return NotGreater + case ABLTU: // LT or unordered + return LessOrUnordered + case ABVC: + return Never // needs extra instruction + case ABVS: + return Unordered + } + c.ctxt.Diag("unknown conditional branch %v", p.As) + return Always +} + +func regtmp(p *obj.Prog) uint32 { + p.Mark |= USETMP + return REGTMP +} + +func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { + o := c.oplook(p) + + if o == nil { + return + } + + // If REGTMP is used in generated code, we need to set USETMP on p.Mark. + // So we use regtmp(p) for REGTMP. + + switch o.i { + default: + c.ctxt.Diag("unknown index %d", o.i) + + case 0: // PSEUDO OPS + break + + case 1: // mov reg reg + switch p.As { + default: + c.ctxt.Diag("unhandled operation: %v", p.As) + case AMOVD: + zRRE(op_LGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + // sign extend + case AMOVW: + zRRE(op_LGFR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + case AMOVH: + zRRE(op_LGHR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + case AMOVB: + zRRE(op_LGBR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + // zero extend + case AMOVWZ: + zRRE(op_LLGFR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + case AMOVHZ: + zRRE(op_LLGHR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + case AMOVBZ: + zRRE(op_LLGCR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + // reverse bytes + case AMOVDBR: + zRRE(op_LRVGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + case AMOVWBR: + zRRE(op_LRVR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + // floating point + case AFMOVD, AFMOVS: + zRR(op_LDR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } + + case 2: // arithmetic op reg [reg] reg + r := p.Reg + if r == 0 { + r = p.To.Reg + } + + var opcode uint32 + + switch p.As { + default: + c.ctxt.Diag("invalid opcode") + case AADD: + opcode = op_AGRK + case AADDC: + opcode = op_ALGRK + case AADDE: + opcode = op_ALCGR + case AADDW: + opcode = op_ARK + case AMULLW: + opcode = op_MSGFR + case AMULLD: + opcode = op_MSGR + case ADIVW, AMODW: + opcode = op_DSGFR + case ADIVWU, AMODWU: + opcode = op_DLR + case ADIVD, AMODD: + opcode = op_DSGR + case ADIVDU, AMODDU: + opcode = op_DLGR + } + + switch p.As { + default: + + case AADD, AADDC, AADDW: + if p.As == AADDW && r == p.To.Reg { + zRR(op_AR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } else { + zRRF(opcode, uint32(p.From.Reg), 0, uint32(p.To.Reg), uint32(r), asm) + } + + case AADDE, AMULLW, AMULLD: + if r == p.To.Reg { + zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } else if p.From.Reg == p.To.Reg { + zRRE(opcode, uint32(p.To.Reg), uint32(r), asm) + } else { + zRRE(op_LGR, uint32(p.To.Reg), uint32(r), asm) + zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } + + case ADIVW, ADIVWU, ADIVD, ADIVDU: + if p.As == ADIVWU || p.As == ADIVDU { + zRI(op_LGHI, regtmp(p), 0, asm) + } + zRRE(op_LGR, REGTMP2, uint32(r), asm) + zRRE(opcode, regtmp(p), uint32(p.From.Reg), asm) + zRRE(op_LGR, uint32(p.To.Reg), REGTMP2, asm) + + case AMODW, AMODWU, AMODD, AMODDU: + if p.As == AMODWU || p.As == AMODDU { + zRI(op_LGHI, regtmp(p), 0, asm) + } + zRRE(op_LGR, REGTMP2, uint32(r), asm) + zRRE(opcode, regtmp(p), uint32(p.From.Reg), asm) + zRRE(op_LGR, uint32(p.To.Reg), regtmp(p), asm) + + } + + case 3: // mov $constant reg + v := c.vregoff(&p.From) + switch p.As { + case AMOVBZ: + v = int64(uint8(v)) + case AMOVHZ: + v = int64(uint16(v)) + case AMOVWZ: + v = int64(uint32(v)) + case AMOVB: + v = int64(int8(v)) + case AMOVH: + v = int64(int16(v)) + case AMOVW: + v = int64(int32(v)) + } + if int64(int16(v)) == v { + zRI(op_LGHI, uint32(p.To.Reg), uint32(v), asm) + } else if v&0xffff0000 == v { + zRI(op_LLILH, uint32(p.To.Reg), uint32(v>>16), asm) + } else if v&0xffff00000000 == v { + zRI(op_LLIHL, uint32(p.To.Reg), uint32(v>>32), asm) + } else if uint64(v)&0xffff000000000000 == uint64(v) { + zRI(op_LLIHH, uint32(p.To.Reg), uint32(v>>48), asm) + } else if int64(int32(v)) == v { + zRIL(_a, op_LGFI, uint32(p.To.Reg), uint32(v), asm) + } else if int64(uint32(v)) == v { + zRIL(_a, op_LLILF, uint32(p.To.Reg), uint32(v), asm) + } else if uint64(v)&0xffffffff00000000 == uint64(v) { + zRIL(_a, op_LLIHF, uint32(p.To.Reg), uint32(v>>32), asm) + } else { + zRIL(_a, op_LLILF, uint32(p.To.Reg), uint32(v), asm) + zRIL(_a, op_IIHF, uint32(p.To.Reg), uint32(v>>32), asm) + } + + case 4: // multiply high (a*b)>>64 + r := p.Reg + if r == 0 { + r = p.To.Reg + } + zRRE(op_LGR, REGTMP2, uint32(r), asm) + zRRE(op_MLGR, regtmp(p), uint32(p.From.Reg), asm) + switch p.As { + case AMULHDU: + // Unsigned: move result into correct register. + zRRE(op_LGR, uint32(p.To.Reg), regtmp(p), asm) + case AMULHD: + // Signed: need to convert result. + // See Hacker's Delight 8-3. + zRSY(op_SRAG, REGTMP2, uint32(p.From.Reg), 0, 63, asm) + zRRE(op_NGR, REGTMP2, uint32(r), asm) + zRRE(op_SGR, regtmp(p), REGTMP2, asm) + zRSY(op_SRAG, REGTMP2, uint32(r), 0, 63, asm) + zRRE(op_NGR, REGTMP2, uint32(p.From.Reg), asm) + zRRF(op_SGRK, REGTMP2, 0, uint32(p.To.Reg), regtmp(p), asm) + } + + case 5: // syscall + zI(op_SVC, 0, asm) + + case 6: // logical op reg [reg] reg + var oprr, oprre, oprrf uint32 + switch p.As { + case AAND: + oprre = op_NGR + oprrf = op_NGRK + case AANDW: + oprr = op_NR + oprrf = op_NRK + case AOR: + oprre = op_OGR + oprrf = op_OGRK + case AORW: + oprr = op_OR + oprrf = op_ORK + case AXOR: + oprre = op_XGR + oprrf = op_XGRK + case AXORW: + oprr = op_XR + oprrf = op_XRK + } + if p.Reg == 0 { + if oprr != 0 { + zRR(oprr, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } else { + zRRE(oprre, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } + } else { + zRRF(oprrf, uint32(p.Reg), 0, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } + + case 7: // shift/rotate reg [reg] reg + d2 := c.vregoff(&p.From) + b2 := p.From.Reg + r3 := p.Reg + if r3 == 0 { + r3 = p.To.Reg + } + r1 := p.To.Reg + var opcode uint32 + switch p.As { + default: + case ASLD: + opcode = op_SLLG + case ASRD: + opcode = op_SRLG + case ASLW: + opcode = op_SLLK + case ASRW: + opcode = op_SRLK + case ARLL: + opcode = op_RLL + case ARLLG: + opcode = op_RLLG + case ASRAW: + opcode = op_SRAK + case ASRAD: + opcode = op_SRAG + } + zRSY(opcode, uint32(r1), uint32(r3), uint32(b2), uint32(d2), asm) + + case 8: // find leftmost one + if p.To.Reg&1 != 0 { + c.ctxt.Diag("target must be an even-numbered register") + } + // FLOGR also writes a mask to p.To.Reg+1. + zRRE(op_FLOGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + + case 9: // population count + zRRE(op_POPCNT, uint32(p.To.Reg), uint32(p.From.Reg), asm) + + case 10: // subtract reg [reg] reg + r := int(p.Reg) + + switch p.As { + default: + case ASUB: + if r == 0 { + zRRE(op_SGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } else { + zRRF(op_SGRK, uint32(p.From.Reg), 0, uint32(p.To.Reg), uint32(r), asm) + } + case ASUBC: + if r == 0 { + zRRE(op_SLGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } else { + zRRF(op_SLGRK, uint32(p.From.Reg), 0, uint32(p.To.Reg), uint32(r), asm) + } + case ASUBE: + if r == 0 { + r = int(p.To.Reg) + } + if r == int(p.To.Reg) { + zRRE(op_SLBGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } else if p.From.Reg == p.To.Reg { + zRRE(op_LGR, regtmp(p), uint32(p.From.Reg), asm) + zRRE(op_LGR, uint32(p.To.Reg), uint32(r), asm) + zRRE(op_SLBGR, uint32(p.To.Reg), regtmp(p), asm) + } else { + zRRE(op_LGR, uint32(p.To.Reg), uint32(r), asm) + zRRE(op_SLBGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } + case ASUBW: + if r == 0 { + zRR(op_SR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } else { + zRRF(op_SRK, uint32(p.From.Reg), 0, uint32(p.To.Reg), uint32(r), asm) + } + } + + case 11: // br/bl + v := int32(0) + + if p.To.Target() != nil { + v = int32((p.To.Target().Pc - p.Pc) >> 1) + } + + if p.As == ABR && p.To.Sym == nil && int32(int16(v)) == v { + zRI(op_BRC, 0xF, uint32(v), asm) + } else { + if p.As == ABL { + zRIL(_b, op_BRASL, uint32(REG_LR), uint32(v), asm) + } else { + zRIL(_c, op_BRCL, 0xF, uint32(v), asm) + } + if p.To.Sym != nil { + c.addcallreloc(p.To.Sym, p.To.Offset) + } + } + + case 12: + r1 := p.To.Reg + d2 := c.vregoff(&p.From) + b2 := p.From.Reg + if b2 == 0 { + b2 = REGSP + } + x2 := p.From.Index + if -DISP20/2 > d2 || d2 >= DISP20/2 { + zRIL(_a, op_LGFI, regtmp(p), uint32(d2), asm) + if x2 != 0 { + zRX(op_LA, regtmp(p), regtmp(p), uint32(x2), 0, asm) + } + x2 = int16(regtmp(p)) + d2 = 0 + } + var opx, opxy uint32 + switch p.As { + case AADD: + opxy = op_AG + case AADDC: + opxy = op_ALG + case AADDE: + opxy = op_ALCG + case AADDW: + opx = op_A + opxy = op_AY + case AMULLW: + opx = op_MS + opxy = op_MSY + case AMULLD: + opxy = op_MSG + case ASUB: + opxy = op_SG + case ASUBC: + opxy = op_SLG + case ASUBE: + opxy = op_SLBG + case ASUBW: + opx = op_S + opxy = op_SY + case AAND: + opxy = op_NG + case AANDW: + opx = op_N + opxy = op_NY + case AOR: + opxy = op_OG + case AORW: + opx = op_O + opxy = op_OY + case AXOR: + opxy = op_XG + case AXORW: + opx = op_X + opxy = op_XY + } + if opx != 0 && 0 <= d2 && d2 < DISP12 { + zRX(opx, uint32(r1), uint32(x2), uint32(b2), uint32(d2), asm) + } else { + zRXY(opxy, uint32(r1), uint32(x2), uint32(b2), uint32(d2), asm) + } + + case 13: // rotate, followed by operation + r1 := p.To.Reg + r2 := p.RestArgs[2].Reg + i3 := uint8(p.From.Offset) // start + i4 := uint8(p.RestArgs[0].Offset) // end + i5 := uint8(p.RestArgs[1].Offset) // rotate amount + switch p.As { + case ARNSBGT, ARXSBGT, AROSBGT: + i3 |= 0x80 // test-results + case ARISBGZ, ARISBGNZ, ARISBHGZ, ARISBLGZ: + i4 |= 0x80 // zero-remaining-bits + } + var opcode uint32 + switch p.As { + case ARNSBG, ARNSBGT: + opcode = op_RNSBG + case ARXSBG, ARXSBGT: + opcode = op_RXSBG + case AROSBG, AROSBGT: + opcode = op_ROSBG + case ARISBG, ARISBGZ: + opcode = op_RISBG + case ARISBGN, ARISBGNZ: + opcode = op_RISBGN + case ARISBHG, ARISBHGZ: + opcode = op_RISBHG + case ARISBLG, ARISBLGZ: + opcode = op_RISBLG + } + zRIE(_f, uint32(opcode), uint32(r1), uint32(r2), 0, uint32(i3), uint32(i4), 0, uint32(i5), asm) + + case 15: // br/bl (reg) + r := p.To.Reg + if p.As == ABCL || p.As == ABL { + zRR(op_BASR, uint32(REG_LR), uint32(r), asm) + } else { + zRR(op_BCR, uint32(Always), uint32(r), asm) + } + + case 16: // conditional branch + v := int32(0) + if p.To.Target() != nil { + v = int32((p.To.Target().Pc - p.Pc) >> 1) + } + mask := uint32(c.branchMask(p)) + if p.To.Sym == nil && int32(int16(v)) == v { + zRI(op_BRC, mask, uint32(v), asm) + } else { + zRIL(_c, op_BRCL, mask, uint32(v), asm) + } + if p.To.Sym != nil { + c.addrilreloc(p.To.Sym, p.To.Offset) + } + + case 17: // move on condition + m3 := uint32(c.branchMask(p)) + zRRF(op_LOCGR, m3, 0, uint32(p.To.Reg), uint32(p.From.Reg), asm) + + case 18: // br/bl reg + if p.As == ABL { + zRR(op_BASR, uint32(REG_LR), uint32(p.To.Reg), asm) + } else { + zRR(op_BCR, uint32(Always), uint32(p.To.Reg), asm) + } + + case 19: // mov $sym+n(SB) reg + d := c.vregoff(&p.From) + zRIL(_b, op_LARL, uint32(p.To.Reg), 0, asm) + if d&1 != 0 { + zRX(op_LA, uint32(p.To.Reg), uint32(p.To.Reg), 0, 1, asm) + d -= 1 + } + c.addrilreloc(p.From.Sym, d) + + case 21: // subtract $constant [reg] reg + v := c.vregoff(&p.From) + r := p.Reg + if r == 0 { + r = p.To.Reg + } + switch p.As { + case ASUB: + zRIL(_a, op_LGFI, uint32(regtmp(p)), uint32(v), asm) + zRRF(op_SLGRK, uint32(regtmp(p)), 0, uint32(p.To.Reg), uint32(r), asm) + case ASUBC: + if r != p.To.Reg { + zRRE(op_LGR, uint32(p.To.Reg), uint32(r), asm) + } + zRIL(_a, op_SLGFI, uint32(p.To.Reg), uint32(v), asm) + case ASUBW: + if r != p.To.Reg { + zRR(op_LR, uint32(p.To.Reg), uint32(r), asm) + } + zRIL(_a, op_SLFI, uint32(p.To.Reg), uint32(v), asm) + } + + case 22: // add/multiply $constant [reg] reg + v := c.vregoff(&p.From) + r := p.Reg + if r == 0 { + r = p.To.Reg + } + var opri, opril, oprie uint32 + switch p.As { + case AADD: + opri = op_AGHI + opril = op_AGFI + oprie = op_AGHIK + case AADDC: + opril = op_ALGFI + oprie = op_ALGHSIK + case AADDW: + opri = op_AHI + opril = op_AFI + oprie = op_AHIK + case AMULLW: + opri = op_MHI + opril = op_MSFI + case AMULLD: + opri = op_MGHI + opril = op_MSGFI + } + if r != p.To.Reg && (oprie == 0 || int64(int16(v)) != v) { + switch p.As { + case AADD, AADDC, AMULLD: + zRRE(op_LGR, uint32(p.To.Reg), uint32(r), asm) + case AADDW, AMULLW: + zRR(op_LR, uint32(p.To.Reg), uint32(r), asm) + } + r = p.To.Reg + } + if opri != 0 && r == p.To.Reg && int64(int16(v)) == v { + zRI(opri, uint32(p.To.Reg), uint32(v), asm) + } else if oprie != 0 && int64(int16(v)) == v { + zRIE(_d, oprie, uint32(p.To.Reg), uint32(r), uint32(v), 0, 0, 0, 0, asm) + } else { + zRIL(_a, opril, uint32(p.To.Reg), uint32(v), asm) + } + + case 23: // 64-bit logical op $constant reg + // TODO(mundaym): merge with case 24. + v := c.vregoff(&p.From) + switch p.As { + default: + c.ctxt.Diag("%v is not supported", p) + case AAND: + if v >= 0 { // needs zero extend + zRIL(_a, op_LGFI, regtmp(p), uint32(v), asm) + zRRE(op_NGR, uint32(p.To.Reg), regtmp(p), asm) + } else if int64(int16(v)) == v { + zRI(op_NILL, uint32(p.To.Reg), uint32(v), asm) + } else { // r.To.Reg & 0xffffffff00000000 & uint32(v) + zRIL(_a, op_NILF, uint32(p.To.Reg), uint32(v), asm) + } + case AOR: + if int64(uint32(v)) != v { // needs sign extend + zRIL(_a, op_LGFI, regtmp(p), uint32(v), asm) + zRRE(op_OGR, uint32(p.To.Reg), regtmp(p), asm) + } else if int64(uint16(v)) == v { + zRI(op_OILL, uint32(p.To.Reg), uint32(v), asm) + } else { + zRIL(_a, op_OILF, uint32(p.To.Reg), uint32(v), asm) + } + case AXOR: + if int64(uint32(v)) != v { // needs sign extend + zRIL(_a, op_LGFI, regtmp(p), uint32(v), asm) + zRRE(op_XGR, uint32(p.To.Reg), regtmp(p), asm) + } else { + zRIL(_a, op_XILF, uint32(p.To.Reg), uint32(v), asm) + } + } + + case 24: // 32-bit logical op $constant reg + v := c.vregoff(&p.From) + switch p.As { + case AANDW: + if uint32(v&0xffff0000) == 0xffff0000 { + zRI(op_NILL, uint32(p.To.Reg), uint32(v), asm) + } else if uint32(v&0x0000ffff) == 0x0000ffff { + zRI(op_NILH, uint32(p.To.Reg), uint32(v)>>16, asm) + } else { + zRIL(_a, op_NILF, uint32(p.To.Reg), uint32(v), asm) + } + case AORW: + if uint32(v&0xffff0000) == 0 { + zRI(op_OILL, uint32(p.To.Reg), uint32(v), asm) + } else if uint32(v&0x0000ffff) == 0 { + zRI(op_OILH, uint32(p.To.Reg), uint32(v)>>16, asm) + } else { + zRIL(_a, op_OILF, uint32(p.To.Reg), uint32(v), asm) + } + case AXORW: + zRIL(_a, op_XILF, uint32(p.To.Reg), uint32(v), asm) + } + + case 25: // load on condition (register) + m3 := uint32(c.branchMask(p)) + var opcode uint32 + switch p.As { + case ALOCR: + opcode = op_LOCR + case ALOCGR: + opcode = op_LOCGR + } + zRRF(opcode, m3, 0, uint32(p.To.Reg), uint32(p.Reg), asm) + + case 26: // MOVD $offset(base)(index), reg + v := c.regoff(&p.From) + r := p.From.Reg + if r == 0 { + r = REGSP + } + i := p.From.Index + if v >= 0 && v < DISP12 { + zRX(op_LA, uint32(p.To.Reg), uint32(r), uint32(i), uint32(v), asm) + } else if v >= -DISP20/2 && v < DISP20/2 { + zRXY(op_LAY, uint32(p.To.Reg), uint32(r), uint32(i), uint32(v), asm) + } else { + zRIL(_a, op_LGFI, regtmp(p), uint32(v), asm) + zRX(op_LA, uint32(p.To.Reg), uint32(r), regtmp(p), uint32(i), asm) + } + + case 31: // dword + wd := uint64(c.vregoff(&p.From)) + *asm = append(*asm, + uint8(wd>>56), + uint8(wd>>48), + uint8(wd>>40), + uint8(wd>>32), + uint8(wd>>24), + uint8(wd>>16), + uint8(wd>>8), + uint8(wd)) + + case 32: // float op freg freg + var opcode uint32 + switch p.As { + default: + c.ctxt.Diag("invalid opcode") + case AFADD: + opcode = op_ADBR + case AFADDS: + opcode = op_AEBR + case AFDIV: + opcode = op_DDBR + case AFDIVS: + opcode = op_DEBR + case AFMUL: + opcode = op_MDBR + case AFMULS: + opcode = op_MEEBR + case AFSUB: + opcode = op_SDBR + case AFSUBS: + opcode = op_SEBR + } + zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm) + + case 33: // float op [freg] freg + r := p.From.Reg + if oclass(&p.From) == C_NONE { + r = p.To.Reg + } + var opcode uint32 + switch p.As { + default: + case AFABS: + opcode = op_LPDBR + case AFNABS: + opcode = op_LNDBR + case ALPDFR: + opcode = op_LPDFR + case ALNDFR: + opcode = op_LNDFR + case AFNEG: + opcode = op_LCDFR + case AFNEGS: + opcode = op_LCEBR + case ALEDBR: + opcode = op_LEDBR + case ALDEBR: + opcode = op_LDEBR + case AFSQRT: + opcode = op_SQDBR + case AFSQRTS: + opcode = op_SQEBR + } + zRRE(opcode, uint32(p.To.Reg), uint32(r), asm) + + case 34: // float multiply-add freg freg freg + var opcode uint32 + switch p.As { + default: + c.ctxt.Diag("invalid opcode") + case AFMADD: + opcode = op_MADBR + case AFMADDS: + opcode = op_MAEBR + case AFMSUB: + opcode = op_MSDBR + case AFMSUBS: + opcode = op_MSEBR + } + zRRD(opcode, uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), asm) + + case 35: // mov reg mem (no relocation) + d2 := c.regoff(&p.To) + b2 := p.To.Reg + if b2 == 0 { + b2 = REGSP + } + x2 := p.To.Index + if d2 < -DISP20/2 || d2 >= DISP20/2 { + zRIL(_a, op_LGFI, regtmp(p), uint32(d2), asm) + if x2 != 0 { + zRX(op_LA, regtmp(p), regtmp(p), uint32(x2), 0, asm) + } + x2 = int16(regtmp(p)) + d2 = 0 + } + // Emits an RX instruction if an appropriate one exists and the displacement fits in 12 bits. Otherwise use an RXY instruction. + if op, ok := c.zopstore12(p.As); ok && isU12(d2) { + zRX(op, uint32(p.From.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + } else { + zRXY(c.zopstore(p.As), uint32(p.From.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + } + + case 36: // mov mem reg (no relocation) + d2 := c.regoff(&p.From) + b2 := p.From.Reg + if b2 == 0 { + b2 = REGSP + } + x2 := p.From.Index + if d2 < -DISP20/2 || d2 >= DISP20/2 { + zRIL(_a, op_LGFI, regtmp(p), uint32(d2), asm) + if x2 != 0 { + zRX(op_LA, regtmp(p), regtmp(p), uint32(x2), 0, asm) + } + x2 = int16(regtmp(p)) + d2 = 0 + } + // Emits an RX instruction if an appropriate one exists and the displacement fits in 12 bits. Otherwise use an RXY instruction. + if op, ok := c.zopload12(p.As); ok && isU12(d2) { + zRX(op, uint32(p.To.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + } else { + zRXY(c.zopload(p.As), uint32(p.To.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + } + + case 40: // word/byte + wd := uint32(c.regoff(&p.From)) + if p.As == AWORD { //WORD + *asm = append(*asm, uint8(wd>>24), uint8(wd>>16), uint8(wd>>8), uint8(wd)) + } else { //BYTE + *asm = append(*asm, uint8(wd)) + } + + case 41: // branch on count + r1 := p.From.Reg + ri2 := (p.To.Target().Pc - p.Pc) >> 1 + if int64(int16(ri2)) != ri2 { + c.ctxt.Diag("branch target too far away") + } + var opcode uint32 + switch p.As { + case ABRCT: + opcode = op_BRCT + case ABRCTG: + opcode = op_BRCTG + } + zRI(opcode, uint32(r1), uint32(ri2), asm) + + case 47: // negate [reg] reg + r := p.From.Reg + if r == 0 { + r = p.To.Reg + } + switch p.As { + case ANEG: + zRRE(op_LCGR, uint32(p.To.Reg), uint32(r), asm) + case ANEGW: + zRRE(op_LCGFR, uint32(p.To.Reg), uint32(r), asm) + } + + case 48: // floating-point round to integer + m3 := c.vregoff(&p.From) + if 0 > m3 || m3 > 7 { + c.ctxt.Diag("mask (%v) must be in the range [0, 7]", m3) + } + var opcode uint32 + switch p.As { + case AFIEBR: + opcode = op_FIEBR + case AFIDBR: + opcode = op_FIDBR + } + zRRF(opcode, uint32(m3), 0, uint32(p.To.Reg), uint32(p.Reg), asm) + + case 49: // copysign + zRRF(op_CPSDR, uint32(p.From.Reg), 0, uint32(p.To.Reg), uint32(p.Reg), asm) + + case 50: // load and test + var opcode uint32 + switch p.As { + case ALTEBR: + opcode = op_LTEBR + case ALTDBR: + opcode = op_LTDBR + } + zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm) + + case 51: // test data class (immediate only) + var opcode uint32 + switch p.As { + case ATCEB: + opcode = op_TCEB + case ATCDB: + opcode = op_TCDB + } + d2 := c.regoff(&p.To) + zRXE(opcode, uint32(p.From.Reg), 0, 0, uint32(d2), 0, asm) + + case 62: // equivalent of Mul64 in math/bits + zRRE(op_MLGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + + case 66: + zRR(op_BCR, uint32(Never), 0, asm) + + case 67: // fmov $0 freg + var opcode uint32 + switch p.As { + case AFMOVS: + opcode = op_LZER + case AFMOVD: + opcode = op_LZDR + } + zRRE(opcode, uint32(p.To.Reg), 0, asm) + + case 68: // movw areg reg + zRRE(op_EAR, uint32(p.To.Reg), uint32(p.From.Reg-REG_AR0), asm) + + case 69: // movw reg areg + zRRE(op_SAR, uint32(p.To.Reg-REG_AR0), uint32(p.From.Reg), asm) + + case 70: // cmp reg reg + if p.As == ACMPW || p.As == ACMPWU { + zRR(c.zoprr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), asm) + } else { + zRRE(c.zoprre(p.As), uint32(p.From.Reg), uint32(p.To.Reg), asm) + } + + case 71: // cmp reg $constant + v := c.vregoff(&p.To) + switch p.As { + case ACMP, ACMPW: + if int64(int32(v)) != v { + c.ctxt.Diag("%v overflows an int32", v) + } + case ACMPU, ACMPWU: + if int64(uint32(v)) != v { + c.ctxt.Diag("%v overflows a uint32", v) + } + } + if p.As == ACMP && int64(int16(v)) == v { + zRI(op_CGHI, uint32(p.From.Reg), uint32(v), asm) + } else if p.As == ACMPW && int64(int16(v)) == v { + zRI(op_CHI, uint32(p.From.Reg), uint32(v), asm) + } else { + zRIL(_a, c.zopril(p.As), uint32(p.From.Reg), uint32(v), asm) + } + + case 72: // mov $constant mem + v := c.regoff(&p.From) + d := c.regoff(&p.To) + r := p.To.Reg + if p.To.Index != 0 { + c.ctxt.Diag("cannot use index register") + } + if r == 0 { + r = REGSP + } + var opcode uint32 + switch p.As { + case AMOVD: + opcode = op_MVGHI + case AMOVW, AMOVWZ: + opcode = op_MVHI + case AMOVH, AMOVHZ: + opcode = op_MVHHI + case AMOVB, AMOVBZ: + opcode = op_MVI + } + if d < 0 || d >= DISP12 { + if r == int16(regtmp(p)) { + c.ctxt.Diag("displacement must be in range [0, 4096) to use %v", r) + } + if d >= -DISP20/2 && d < DISP20/2 { + if opcode == op_MVI { + opcode = op_MVIY + } else { + zRXY(op_LAY, uint32(regtmp(p)), 0, uint32(r), uint32(d), asm) + r = int16(regtmp(p)) + d = 0 + } + } else { + zRIL(_a, op_LGFI, regtmp(p), uint32(d), asm) + zRX(op_LA, regtmp(p), regtmp(p), uint32(r), 0, asm) + r = int16(regtmp(p)) + d = 0 + } + } + switch opcode { + case op_MVI: + zSI(opcode, uint32(v), uint32(r), uint32(d), asm) + case op_MVIY: + zSIY(opcode, uint32(v), uint32(r), uint32(d), asm) + default: + zSIL(opcode, uint32(r), uint32(d), uint32(v), asm) + } + + case 74: // mov reg addr (including relocation) + i2 := c.regoff(&p.To) + switch p.As { + case AMOVD: + zRIL(_b, op_STGRL, uint32(p.From.Reg), 0, asm) + case AMOVW, AMOVWZ: // The zero extension doesn't affect store instructions + zRIL(_b, op_STRL, uint32(p.From.Reg), 0, asm) + case AMOVH, AMOVHZ: // The zero extension doesn't affect store instructions + zRIL(_b, op_STHRL, uint32(p.From.Reg), 0, asm) + case AMOVB, AMOVBZ: // The zero extension doesn't affect store instructions + zRIL(_b, op_LARL, regtmp(p), 0, asm) + adj := uint32(0) // adjustment needed for odd addresses + if i2&1 != 0 { + i2 -= 1 + adj = 1 + } + zRX(op_STC, uint32(p.From.Reg), 0, regtmp(p), adj, asm) + case AFMOVD: + zRIL(_b, op_LARL, regtmp(p), 0, asm) + zRX(op_STD, uint32(p.From.Reg), 0, regtmp(p), 0, asm) + case AFMOVS: + zRIL(_b, op_LARL, regtmp(p), 0, asm) + zRX(op_STE, uint32(p.From.Reg), 0, regtmp(p), 0, asm) + } + c.addrilreloc(p.To.Sym, int64(i2)) + + case 75: // mov addr reg (including relocation) + i2 := c.regoff(&p.From) + switch p.As { + case AMOVD: + if i2&1 != 0 { + zRIL(_b, op_LARL, regtmp(p), 0, asm) + zRXY(op_LG, uint32(p.To.Reg), regtmp(p), 0, 1, asm) + i2 -= 1 + } else { + zRIL(_b, op_LGRL, uint32(p.To.Reg), 0, asm) + } + case AMOVW: + zRIL(_b, op_LGFRL, uint32(p.To.Reg), 0, asm) + case AMOVWZ: + zRIL(_b, op_LLGFRL, uint32(p.To.Reg), 0, asm) + case AMOVH: + zRIL(_b, op_LGHRL, uint32(p.To.Reg), 0, asm) + case AMOVHZ: + zRIL(_b, op_LLGHRL, uint32(p.To.Reg), 0, asm) + case AMOVB, AMOVBZ: + zRIL(_b, op_LARL, regtmp(p), 0, asm) + adj := uint32(0) // adjustment needed for odd addresses + if i2&1 != 0 { + i2 -= 1 + adj = 1 + } + switch p.As { + case AMOVB: + zRXY(op_LGB, uint32(p.To.Reg), 0, regtmp(p), adj, asm) + case AMOVBZ: + zRXY(op_LLGC, uint32(p.To.Reg), 0, regtmp(p), adj, asm) + } + case AFMOVD: + zRIL(_a, op_LARL, regtmp(p), 0, asm) + zRX(op_LD, uint32(p.To.Reg), 0, regtmp(p), 0, asm) + case AFMOVS: + zRIL(_a, op_LARL, regtmp(p), 0, asm) + zRX(op_LE, uint32(p.To.Reg), 0, regtmp(p), 0, asm) + } + c.addrilreloc(p.From.Sym, int64(i2)) + + case 76: // set program mask + zRR(op_SPM, uint32(p.From.Reg), 0, asm) + + case 77: // syscall $constant + if p.From.Offset > 255 || p.From.Offset < 1 { + c.ctxt.Diag("illegal system call; system call number out of range: %v", p) + zE(op_TRAP2, asm) // trap always + } else { + zI(op_SVC, uint32(p.From.Offset), asm) + } + + case 78: // undef + // "An instruction consisting entirely of binary 0s is guaranteed + // always to be an illegal instruction." + *asm = append(*asm, 0, 0, 0, 0) + + case 79: // compare and swap reg reg reg + v := c.regoff(&p.To) + if v < 0 { + v = 0 + } + if p.As == ACS { + zRS(op_CS, uint32(p.From.Reg), uint32(p.Reg), uint32(p.To.Reg), uint32(v), asm) + } else if p.As == ACSG { + zRSY(op_CSG, uint32(p.From.Reg), uint32(p.Reg), uint32(p.To.Reg), uint32(v), asm) + } + + case 80: // sync + zRR(op_BCR, 14, 0, asm) // fast-BCR-serialization + + case 81: // float to fixed and fixed to float moves (no conversion) + switch p.As { + case ALDGR: + zRRE(op_LDGR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + case ALGDR: + zRRE(op_LGDR, uint32(p.To.Reg), uint32(p.From.Reg), asm) + } + + case 82: // fixed to float conversion + var opcode uint32 + switch p.As { + default: + log.Fatalf("unexpected opcode %v", p.As) + case ACEFBRA: + opcode = op_CEFBRA + case ACDFBRA: + opcode = op_CDFBRA + case ACEGBRA: + opcode = op_CEGBRA + case ACDGBRA: + opcode = op_CDGBRA + case ACELFBR: + opcode = op_CELFBR + case ACDLFBR: + opcode = op_CDLFBR + case ACELGBR: + opcode = op_CELGBR + case ACDLGBR: + opcode = op_CDLGBR + } + // set immediate operand M3 to 0 to use the default BFP rounding mode + // (usually round to nearest, ties to even) + // TODO(mundaym): should this be fixed at round to nearest, ties to even? + // M4 is reserved and must be 0 + zRRF(opcode, 0, 0, uint32(p.To.Reg), uint32(p.From.Reg), asm) + + case 83: // float to fixed conversion + var opcode uint32 + switch p.As { + default: + log.Fatalf("unexpected opcode %v", p.As) + case ACFEBRA: + opcode = op_CFEBRA + case ACFDBRA: + opcode = op_CFDBRA + case ACGEBRA: + opcode = op_CGEBRA + case ACGDBRA: + opcode = op_CGDBRA + case ACLFEBR: + opcode = op_CLFEBR + case ACLFDBR: + opcode = op_CLFDBR + case ACLGEBR: + opcode = op_CLGEBR + case ACLGDBR: + opcode = op_CLGDBR + } + // set immediate operand M3 to 5 for rounding toward zero (required by Go spec) + // M4 is reserved and must be 0 + zRRF(opcode, 5, 0, uint32(p.To.Reg), uint32(p.From.Reg), asm) + + case 84: // storage-and-storage operations $length mem mem + l := c.regoff(&p.From) + if l < 1 || l > 256 { + c.ctxt.Diag("number of bytes (%v) not in range [1,256]", l) + } + if p.GetFrom3().Index != 0 || p.To.Index != 0 { + c.ctxt.Diag("cannot use index reg") + } + b1 := p.To.Reg + b2 := p.GetFrom3().Reg + if b1 == 0 { + b1 = REGSP + } + if b2 == 0 { + b2 = REGSP + } + d1 := c.regoff(&p.To) + d2 := c.regoff(p.GetFrom3()) + if d1 < 0 || d1 >= DISP12 { + if b2 == int16(regtmp(p)) { + c.ctxt.Diag("regtmp(p) conflict") + } + if b1 != int16(regtmp(p)) { + zRRE(op_LGR, regtmp(p), uint32(b1), asm) + } + zRIL(_a, op_AGFI, regtmp(p), uint32(d1), asm) + if d1 == d2 && b1 == b2 { + d2 = 0 + b2 = int16(regtmp(p)) + } + d1 = 0 + b1 = int16(regtmp(p)) + } + if d2 < 0 || d2 >= DISP12 { + if b1 == REGTMP2 { + c.ctxt.Diag("REGTMP2 conflict") + } + if b2 != REGTMP2 { + zRRE(op_LGR, REGTMP2, uint32(b2), asm) + } + zRIL(_a, op_AGFI, REGTMP2, uint32(d2), asm) + d2 = 0 + b2 = REGTMP2 + } + var opcode uint32 + switch p.As { + default: + c.ctxt.Diag("unexpected opcode %v", p.As) + case AMVC: + opcode = op_MVC + case AMVCIN: + opcode = op_MVCIN + case ACLC: + opcode = op_CLC + // swap operand order for CLC so that it matches CMP + b1, b2 = b2, b1 + d1, d2 = d2, d1 + case AXC: + opcode = op_XC + case AOC: + opcode = op_OC + case ANC: + opcode = op_NC + } + zSS(_a, opcode, uint32(l-1), 0, uint32(b1), uint32(d1), uint32(b2), uint32(d2), asm) + + case 85: // load address relative long + v := c.regoff(&p.From) + if p.From.Sym == nil { + if (v & 1) != 0 { + c.ctxt.Diag("cannot use LARL with odd offset: %v", v) + } + } else { + c.addrilreloc(p.From.Sym, int64(v)) + v = 0 + } + zRIL(_b, op_LARL, uint32(p.To.Reg), uint32(v>>1), asm) + + case 86: // load address + d := c.vregoff(&p.From) + x := p.From.Index + b := p.From.Reg + if b == 0 { + b = REGSP + } + switch p.As { + case ALA: + zRX(op_LA, uint32(p.To.Reg), uint32(x), uint32(b), uint32(d), asm) + case ALAY: + zRXY(op_LAY, uint32(p.To.Reg), uint32(x), uint32(b), uint32(d), asm) + } + + case 87: // execute relative long + v := c.vregoff(&p.From) + if p.From.Sym == nil { + if v&1 != 0 { + c.ctxt.Diag("cannot use EXRL with odd offset: %v", v) + } + } else { + c.addrilreloc(p.From.Sym, v) + v = 0 + } + zRIL(_b, op_EXRL, uint32(p.To.Reg), uint32(v>>1), asm) + + case 88: // store clock + var opcode uint32 + switch p.As { + case ASTCK: + opcode = op_STCK + case ASTCKC: + opcode = op_STCKC + case ASTCKE: + opcode = op_STCKE + case ASTCKF: + opcode = op_STCKF + } + v := c.vregoff(&p.To) + r := p.To.Reg + if r == 0 { + r = REGSP + } + zS(opcode, uint32(r), uint32(v), asm) + + case 89: // compare and branch reg reg + var v int32 + if p.To.Target() != nil { + v = int32((p.To.Target().Pc - p.Pc) >> 1) + } + + // Some instructions take a mask as the first argument. + r1, r2 := p.From.Reg, p.Reg + if p.From.Type == obj.TYPE_CONST { + r1, r2 = p.Reg, p.RestArgs[0].Reg + } + m3 := uint32(c.branchMask(p)) + + var opcode uint32 + switch p.As { + case ACRJ: + // COMPARE AND BRANCH RELATIVE (32) + opcode = op_CRJ + case ACGRJ, ACMPBEQ, ACMPBGE, ACMPBGT, ACMPBLE, ACMPBLT, ACMPBNE: + // COMPARE AND BRANCH RELATIVE (64) + opcode = op_CGRJ + case ACLRJ: + // COMPARE LOGICAL AND BRANCH RELATIVE (32) + opcode = op_CLRJ + case ACLGRJ, ACMPUBEQ, ACMPUBGE, ACMPUBGT, ACMPUBLE, ACMPUBLT, ACMPUBNE: + // COMPARE LOGICAL AND BRANCH RELATIVE (64) + opcode = op_CLGRJ + } + + if int32(int16(v)) != v { + // The branch is too far for one instruction so crack + // `CMPBEQ x, y, target` into: + // + // CMPBNE x, y, 2(PC) + // BR target + // + // Note that the instruction sequence MUST NOT clobber + // the condition code. + m3 ^= 0xe // invert 3-bit mask + zRIE(_b, opcode, uint32(r1), uint32(r2), uint32(sizeRIE+sizeRIL)/2, 0, 0, m3, 0, asm) + zRIL(_c, op_BRCL, uint32(Always), uint32(v-sizeRIE/2), asm) + } else { + zRIE(_b, opcode, uint32(r1), uint32(r2), uint32(v), 0, 0, m3, 0, asm) + } + + case 90: // compare and branch reg $constant + var v int32 + if p.To.Target() != nil { + v = int32((p.To.Target().Pc - p.Pc) >> 1) + } + + // Some instructions take a mask as the first argument. + r1, i2 := p.From.Reg, p.RestArgs[0].Offset + if p.From.Type == obj.TYPE_CONST { + r1 = p.Reg + } + m3 := uint32(c.branchMask(p)) + + var opcode uint32 + switch p.As { + case ACIJ: + opcode = op_CIJ + case ACGIJ, ACMPBEQ, ACMPBGE, ACMPBGT, ACMPBLE, ACMPBLT, ACMPBNE: + opcode = op_CGIJ + case ACLIJ: + opcode = op_CLIJ + case ACLGIJ, ACMPUBEQ, ACMPUBGE, ACMPUBGT, ACMPUBLE, ACMPUBLT, ACMPUBNE: + opcode = op_CLGIJ + } + if int32(int16(v)) != v { + // The branch is too far for one instruction so crack + // `CMPBEQ x, $0, target` into: + // + // CMPBNE x, $0, 2(PC) + // BR target + // + // Note that the instruction sequence MUST NOT clobber + // the condition code. + m3 ^= 0xe // invert 3-bit mask + zRIE(_c, opcode, uint32(r1), m3, uint32(sizeRIE+sizeRIL)/2, 0, 0, 0, uint32(i2), asm) + zRIL(_c, op_BRCL, uint32(Always), uint32(v-sizeRIE/2), asm) + } else { + zRIE(_c, opcode, uint32(r1), m3, uint32(v), 0, 0, 0, uint32(i2), asm) + } + + case 91: // test under mask (immediate) + var opcode uint32 + switch p.As { + case ATMHH: + opcode = op_TMHH + case ATMHL: + opcode = op_TMHL + case ATMLH: + opcode = op_TMLH + case ATMLL: + opcode = op_TMLL + } + zRI(opcode, uint32(p.From.Reg), uint32(c.vregoff(&p.To)), asm) + + case 92: // insert program mask + zRRE(op_IPM, uint32(p.From.Reg), 0, asm) + + case 93: // GOT lookup + v := c.vregoff(&p.To) + if v != 0 { + c.ctxt.Diag("invalid offset against GOT slot %v", p) + } + zRIL(_b, op_LGRL, uint32(p.To.Reg), 0, asm) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc + 2) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Type = objabi.R_GOTPCREL + rel.Add = 2 + int64(rel.Siz) + + case 94: // TLS local exec model + zRIL(_b, op_LARL, regtmp(p), (sizeRIL+sizeRXY+sizeRI)>>1, asm) + zRXY(op_LG, uint32(p.To.Reg), regtmp(p), 0, 0, asm) + zRI(op_BRC, 0xF, (sizeRI+8)>>1, asm) + *asm = append(*asm, 0, 0, 0, 0, 0, 0, 0, 0) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc + sizeRIL + sizeRXY + sizeRI) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Type = objabi.R_TLS_LE + rel.Add = 0 + + case 95: // TLS initial exec model + // Assembly | Relocation symbol | Done Here? + // -------------------------------------------------------------- + // ear %r11, %a0 | | + // sllg %r11, %r11, 32 | | + // ear %r11, %a1 | | + // larl %r10, <var>@indntpoff | R_390_TLS_IEENT | Y + // lg %r10, 0(%r10) | R_390_TLS_LOAD (tag) | Y + // la %r10, 0(%r10, %r11) | | + // -------------------------------------------------------------- + + // R_390_TLS_IEENT + zRIL(_b, op_LARL, regtmp(p), 0, asm) + ieent := obj.Addrel(c.cursym) + ieent.Off = int32(c.pc + 2) + ieent.Siz = 4 + ieent.Sym = p.From.Sym + ieent.Type = objabi.R_TLS_IE + ieent.Add = 2 + int64(ieent.Siz) + + // R_390_TLS_LOAD + zRXY(op_LGF, uint32(p.To.Reg), regtmp(p), 0, 0, asm) + // TODO(mundaym): add R_390_TLS_LOAD relocation here + // not strictly required but might allow the linker to optimize + + case 96: // clear macro + length := c.vregoff(&p.From) + offset := c.vregoff(&p.To) + reg := p.To.Reg + if reg == 0 { + reg = REGSP + } + if length <= 0 { + c.ctxt.Diag("cannot CLEAR %d bytes, must be greater than 0", length) + } + for length > 0 { + if offset < 0 || offset >= DISP12 { + if offset >= -DISP20/2 && offset < DISP20/2 { + zRXY(op_LAY, regtmp(p), uint32(reg), 0, uint32(offset), asm) + } else { + if reg != int16(regtmp(p)) { + zRRE(op_LGR, regtmp(p), uint32(reg), asm) + } + zRIL(_a, op_AGFI, regtmp(p), uint32(offset), asm) + } + reg = int16(regtmp(p)) + offset = 0 + } + size := length + if size > 256 { + size = 256 + } + + switch size { + case 1: + zSI(op_MVI, 0, uint32(reg), uint32(offset), asm) + case 2: + zSIL(op_MVHHI, uint32(reg), uint32(offset), 0, asm) + case 4: + zSIL(op_MVHI, uint32(reg), uint32(offset), 0, asm) + case 8: + zSIL(op_MVGHI, uint32(reg), uint32(offset), 0, asm) + default: + zSS(_a, op_XC, uint32(size-1), 0, uint32(reg), uint32(offset), uint32(reg), uint32(offset), asm) + } + + length -= size + offset += size + } + + case 97: // store multiple + rstart := p.From.Reg + rend := p.Reg + offset := c.regoff(&p.To) + reg := p.To.Reg + if reg == 0 { + reg = REGSP + } + if offset < -DISP20/2 || offset >= DISP20/2 { + if reg != int16(regtmp(p)) { + zRRE(op_LGR, regtmp(p), uint32(reg), asm) + } + zRIL(_a, op_AGFI, regtmp(p), uint32(offset), asm) + reg = int16(regtmp(p)) + offset = 0 + } + switch p.As { + case ASTMY: + if offset >= 0 && offset < DISP12 { + zRS(op_STM, uint32(rstart), uint32(rend), uint32(reg), uint32(offset), asm) + } else { + zRSY(op_STMY, uint32(rstart), uint32(rend), uint32(reg), uint32(offset), asm) + } + case ASTMG: + zRSY(op_STMG, uint32(rstart), uint32(rend), uint32(reg), uint32(offset), asm) + } + + case 98: // load multiple + rstart := p.Reg + rend := p.To.Reg + offset := c.regoff(&p.From) + reg := p.From.Reg + if reg == 0 { + reg = REGSP + } + if offset < -DISP20/2 || offset >= DISP20/2 { + if reg != int16(regtmp(p)) { + zRRE(op_LGR, regtmp(p), uint32(reg), asm) + } + zRIL(_a, op_AGFI, regtmp(p), uint32(offset), asm) + reg = int16(regtmp(p)) + offset = 0 + } + switch p.As { + case ALMY: + if offset >= 0 && offset < DISP12 { + zRS(op_LM, uint32(rstart), uint32(rend), uint32(reg), uint32(offset), asm) + } else { + zRSY(op_LMY, uint32(rstart), uint32(rend), uint32(reg), uint32(offset), asm) + } + case ALMG: + zRSY(op_LMG, uint32(rstart), uint32(rend), uint32(reg), uint32(offset), asm) + } + + case 99: // interlocked load and op + if p.To.Index != 0 { + c.ctxt.Diag("cannot use indexed address") + } + offset := c.regoff(&p.To) + if offset < -DISP20/2 || offset >= DISP20/2 { + c.ctxt.Diag("%v does not fit into 20-bit signed integer", offset) + } + var opcode uint32 + switch p.As { + case ALAA: + opcode = op_LAA + case ALAAG: + opcode = op_LAAG + case ALAAL: + opcode = op_LAAL + case ALAALG: + opcode = op_LAALG + case ALAN: + opcode = op_LAN + case ALANG: + opcode = op_LANG + case ALAX: + opcode = op_LAX + case ALAXG: + opcode = op_LAXG + case ALAO: + opcode = op_LAO + case ALAOG: + opcode = op_LAOG + } + zRSY(opcode, uint32(p.Reg), uint32(p.From.Reg), uint32(p.To.Reg), uint32(offset), asm) + + case 100: // VRX STORE + op, m3, _ := vop(p.As) + v1 := p.From.Reg + if p.Reg != 0 { + m3 = uint32(c.vregoff(&p.From)) + v1 = p.Reg + } + b2 := p.To.Reg + if b2 == 0 { + b2 = REGSP + } + d2 := uint32(c.vregoff(&p.To)) + zVRX(op, uint32(v1), uint32(p.To.Index), uint32(b2), d2, m3, asm) + + case 101: // VRX LOAD + op, m3, _ := vop(p.As) + src := &p.From + if p.GetFrom3() != nil { + m3 = uint32(c.vregoff(&p.From)) + src = p.GetFrom3() + } + b2 := src.Reg + if b2 == 0 { + b2 = REGSP + } + d2 := uint32(c.vregoff(src)) + zVRX(op, uint32(p.To.Reg), uint32(src.Index), uint32(b2), d2, m3, asm) + + case 102: // VRV SCATTER + op, _, _ := vop(p.As) + m3 := uint32(c.vregoff(&p.From)) + b2 := p.To.Reg + if b2 == 0 { + b2 = REGSP + } + d2 := uint32(c.vregoff(&p.To)) + zVRV(op, uint32(p.Reg), uint32(p.To.Index), uint32(b2), d2, m3, asm) + + case 103: // VRV GATHER + op, _, _ := vop(p.As) + m3 := uint32(c.vregoff(&p.From)) + b2 := p.GetFrom3().Reg + if b2 == 0 { + b2 = REGSP + } + d2 := uint32(c.vregoff(p.GetFrom3())) + zVRV(op, uint32(p.To.Reg), uint32(p.GetFrom3().Index), uint32(b2), d2, m3, asm) + + case 104: // VRS SHIFT/ROTATE and LOAD GR FROM VR ELEMENT + op, m4, _ := vop(p.As) + fr := p.Reg + if fr == 0 { + fr = p.To.Reg + } + bits := uint32(c.vregoff(&p.From)) + zVRS(op, uint32(p.To.Reg), uint32(fr), uint32(p.From.Reg), bits, m4, asm) + + case 105: // VRS STORE MULTIPLE + op, _, _ := vop(p.As) + offset := uint32(c.vregoff(&p.To)) + reg := p.To.Reg + if reg == 0 { + reg = REGSP + } + zVRS(op, uint32(p.From.Reg), uint32(p.Reg), uint32(reg), offset, 0, asm) + + case 106: // VRS LOAD MULTIPLE + op, _, _ := vop(p.As) + offset := uint32(c.vregoff(&p.From)) + reg := p.From.Reg + if reg == 0 { + reg = REGSP + } + zVRS(op, uint32(p.Reg), uint32(p.To.Reg), uint32(reg), offset, 0, asm) + + case 107: // VRS STORE WITH LENGTH + op, _, _ := vop(p.As) + offset := uint32(c.vregoff(&p.To)) + reg := p.To.Reg + if reg == 0 { + reg = REGSP + } + zVRS(op, uint32(p.Reg), uint32(p.From.Reg), uint32(reg), offset, 0, asm) + + case 108: // VRS LOAD WITH LENGTH + op, _, _ := vop(p.As) + offset := uint32(c.vregoff(p.GetFrom3())) + reg := p.GetFrom3().Reg + if reg == 0 { + reg = REGSP + } + zVRS(op, uint32(p.To.Reg), uint32(p.From.Reg), uint32(reg), offset, 0, asm) + + case 109: // VRI-a + op, m3, _ := vop(p.As) + i2 := uint32(c.vregoff(&p.From)) + if p.GetFrom3() != nil { + m3 = uint32(c.vregoff(&p.From)) + i2 = uint32(c.vregoff(p.GetFrom3())) + } + switch p.As { + case AVZERO: + i2 = 0 + case AVONE: + i2 = 0xffff + } + zVRIa(op, uint32(p.To.Reg), i2, m3, asm) + + case 110: + op, m4, _ := vop(p.As) + i2 := uint32(c.vregoff(&p.From)) + i3 := uint32(c.vregoff(p.GetFrom3())) + zVRIb(op, uint32(p.To.Reg), i2, i3, m4, asm) + + case 111: + op, m4, _ := vop(p.As) + i2 := uint32(c.vregoff(&p.From)) + zVRIc(op, uint32(p.To.Reg), uint32(p.Reg), i2, m4, asm) + + case 112: + op, m5, _ := vop(p.As) + i4 := uint32(c.vregoff(&p.From)) + zVRId(op, uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), i4, m5, asm) + + case 113: + op, m4, _ := vop(p.As) + m5 := singleElementMask(p.As) + i3 := uint32(c.vregoff(&p.From)) + zVRIe(op, uint32(p.To.Reg), uint32(p.Reg), i3, m5, m4, asm) + + case 114: // VRR-a + op, m3, m5 := vop(p.As) + m4 := singleElementMask(p.As) + zVRRa(op, uint32(p.To.Reg), uint32(p.From.Reg), m5, m4, m3, asm) + + case 115: // VRR-a COMPARE + op, m3, m5 := vop(p.As) + m4 := singleElementMask(p.As) + zVRRa(op, uint32(p.From.Reg), uint32(p.To.Reg), m5, m4, m3, asm) + + case 117: // VRR-b + op, m4, m5 := vop(p.As) + zVRRb(op, uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), m5, m4, asm) + + case 118: // VRR-c + op, m4, m6 := vop(p.As) + m5 := singleElementMask(p.As) + v3 := p.Reg + if v3 == 0 { + v3 = p.To.Reg + } + zVRRc(op, uint32(p.To.Reg), uint32(p.From.Reg), uint32(v3), m6, m5, m4, asm) + + case 119: // VRR-c SHIFT/ROTATE/DIVIDE/SUB (rhs value on the left, like SLD, DIV etc.) + op, m4, m6 := vop(p.As) + m5 := singleElementMask(p.As) + v2 := p.Reg + if v2 == 0 { + v2 = p.To.Reg + } + zVRRc(op, uint32(p.To.Reg), uint32(v2), uint32(p.From.Reg), m6, m5, m4, asm) + + case 120: // VRR-d + op, m6, _ := vop(p.As) + m5 := singleElementMask(p.As) + v1 := uint32(p.To.Reg) + v2 := uint32(p.From.Reg) + v3 := uint32(p.Reg) + v4 := uint32(p.GetFrom3().Reg) + zVRRd(op, v1, v2, v3, m6, m5, v4, asm) + + case 121: // VRR-e + op, m6, _ := vop(p.As) + m5 := singleElementMask(p.As) + v1 := uint32(p.To.Reg) + v2 := uint32(p.From.Reg) + v3 := uint32(p.Reg) + v4 := uint32(p.GetFrom3().Reg) + zVRRe(op, v1, v2, v3, m6, m5, v4, asm) + + case 122: // VRR-f LOAD VRS FROM GRS DISJOINT + op, _, _ := vop(p.As) + zVRRf(op, uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), asm) + + case 123: // VPDI $m4, V2, V3, V1 + op, _, _ := vop(p.As) + m4 := c.regoff(&p.From) + zVRRc(op, uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), 0, 0, uint32(m4), asm) + } +} + +func (c *ctxtz) vregoff(a *obj.Addr) int64 { + c.instoffset = 0 + if a != nil { + c.aclass(a) + } + return c.instoffset +} + +func (c *ctxtz) regoff(a *obj.Addr) int32 { + return int32(c.vregoff(a)) +} + +// find if the displacement is within 12 bit. +func isU12(displacement int32) bool { + return displacement >= 0 && displacement < DISP12 +} + +// zopload12 returns the RX op with 12 bit displacement for the given load. +func (c *ctxtz) zopload12(a obj.As) (uint32, bool) { + switch a { + case AFMOVD: + return op_LD, true + case AFMOVS: + return op_LE, true + } + return 0, false +} + +// zopload returns the RXY op for the given load. +func (c *ctxtz) zopload(a obj.As) uint32 { + switch a { + // fixed point load + case AMOVD: + return op_LG + case AMOVW: + return op_LGF + case AMOVWZ: + return op_LLGF + case AMOVH: + return op_LGH + case AMOVHZ: + return op_LLGH + case AMOVB: + return op_LGB + case AMOVBZ: + return op_LLGC + + // floating point load + case AFMOVD: + return op_LDY + case AFMOVS: + return op_LEY + + // byte reversed load + case AMOVDBR: + return op_LRVG + case AMOVWBR: + return op_LRV + case AMOVHBR: + return op_LRVH + } + + c.ctxt.Diag("unknown store opcode %v", a) + return 0 +} + +// zopstore12 returns the RX op with 12 bit displacement for the given store. +func (c *ctxtz) zopstore12(a obj.As) (uint32, bool) { + switch a { + case AFMOVD: + return op_STD, true + case AFMOVS: + return op_STE, true + case AMOVW, AMOVWZ: + return op_ST, true + case AMOVH, AMOVHZ: + return op_STH, true + case AMOVB, AMOVBZ: + return op_STC, true + } + return 0, false +} + +// zopstore returns the RXY op for the given store. +func (c *ctxtz) zopstore(a obj.As) uint32 { + switch a { + // fixed point store + case AMOVD: + return op_STG + case AMOVW, AMOVWZ: + return op_STY + case AMOVH, AMOVHZ: + return op_STHY + case AMOVB, AMOVBZ: + return op_STCY + + // floating point store + case AFMOVD: + return op_STDY + case AFMOVS: + return op_STEY + + // byte reversed store + case AMOVDBR: + return op_STRVG + case AMOVWBR: + return op_STRV + case AMOVHBR: + return op_STRVH + } + + c.ctxt.Diag("unknown store opcode %v", a) + return 0 +} + +// zoprre returns the RRE op for the given a. +func (c *ctxtz) zoprre(a obj.As) uint32 { + switch a { + case ACMP: + return op_CGR + case ACMPU: + return op_CLGR + case AFCMPO: //ordered + return op_KDBR + case AFCMPU: //unordered + return op_CDBR + case ACEBR: + return op_CEBR + } + c.ctxt.Diag("unknown rre opcode %v", a) + return 0 +} + +// zoprr returns the RR op for the given a. +func (c *ctxtz) zoprr(a obj.As) uint32 { + switch a { + case ACMPW: + return op_CR + case ACMPWU: + return op_CLR + } + c.ctxt.Diag("unknown rr opcode %v", a) + return 0 +} + +// zopril returns the RIL op for the given a. +func (c *ctxtz) zopril(a obj.As) uint32 { + switch a { + case ACMP: + return op_CGFI + case ACMPU: + return op_CLGFI + case ACMPW: + return op_CFI + case ACMPWU: + return op_CLFI + } + c.ctxt.Diag("unknown ril opcode %v", a) + return 0 +} + +// z instructions sizes +const ( + sizeE = 2 + sizeI = 2 + sizeIE = 4 + sizeMII = 6 + sizeRI = 4 + sizeRI1 = 4 + sizeRI2 = 4 + sizeRI3 = 4 + sizeRIE = 6 + sizeRIE1 = 6 + sizeRIE2 = 6 + sizeRIE3 = 6 + sizeRIE4 = 6 + sizeRIE5 = 6 + sizeRIE6 = 6 + sizeRIL = 6 + sizeRIL1 = 6 + sizeRIL2 = 6 + sizeRIL3 = 6 + sizeRIS = 6 + sizeRR = 2 + sizeRRD = 4 + sizeRRE = 4 + sizeRRF = 4 + sizeRRF1 = 4 + sizeRRF2 = 4 + sizeRRF3 = 4 + sizeRRF4 = 4 + sizeRRF5 = 4 + sizeRRR = 2 + sizeRRS = 6 + sizeRS = 4 + sizeRS1 = 4 + sizeRS2 = 4 + sizeRSI = 4 + sizeRSL = 6 + sizeRSY = 6 + sizeRSY1 = 6 + sizeRSY2 = 6 + sizeRX = 4 + sizeRX1 = 4 + sizeRX2 = 4 + sizeRXE = 6 + sizeRXF = 6 + sizeRXY = 6 + sizeRXY1 = 6 + sizeRXY2 = 6 + sizeS = 4 + sizeSI = 4 + sizeSIL = 6 + sizeSIY = 6 + sizeSMI = 6 + sizeSS = 6 + sizeSS1 = 6 + sizeSS2 = 6 + sizeSS3 = 6 + sizeSS4 = 6 + sizeSS5 = 6 + sizeSS6 = 6 + sizeSSE = 6 + sizeSSF = 6 +) + +// instruction format variations +type form int + +const ( + _a form = iota + _b + _c + _d + _e + _f +) + +func zE(op uint32, asm *[]byte) { + *asm = append(*asm, uint8(op>>8), uint8(op)) +} + +func zI(op, i1 uint32, asm *[]byte) { + *asm = append(*asm, uint8(op>>8), uint8(i1)) +} + +func zMII(op, m1, ri2, ri3 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(m1)<<4)|uint8((ri2>>8)&0x0F), + uint8(ri2), + uint8(ri3>>16), + uint8(ri3>>8), + uint8(ri3)) +} + +func zRI(op, r1_m1, i2_ri2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r1_m1)<<4)|(uint8(op)&0x0F), + uint8(i2_ri2>>8), + uint8(i2_ri2)) +} + +// Expected argument values for the instruction formats. +// +// Format a1 a2 a3 a4 a5 a6 a7 +// ------------------------------------ +// a r1, 0, i2, 0, 0, m3, 0 +// b r1, r2, ri4, 0, 0, m3, 0 +// c r1, m3, ri4, 0, 0, 0, i2 +// d r1, r3, i2, 0, 0, 0, 0 +// e r1, r3, ri2, 0, 0, 0, 0 +// f r1, r2, 0, i3, i4, 0, i5 +// g r1, m3, i2, 0, 0, 0, 0 +func zRIE(f form, op, r1, r2_m3_r3, i2_ri4_ri2, i3, i4, m3, i2_i5 uint32, asm *[]byte) { + *asm = append(*asm, uint8(op>>8), uint8(r1)<<4|uint8(r2_m3_r3&0x0F)) + + switch f { + default: + *asm = append(*asm, uint8(i2_ri4_ri2>>8), uint8(i2_ri4_ri2)) + case _f: + *asm = append(*asm, uint8(i3), uint8(i4)) + } + + switch f { + case _a, _b: + *asm = append(*asm, uint8(m3)<<4) + default: + *asm = append(*asm, uint8(i2_i5)) + } + + *asm = append(*asm, uint8(op)) +} + +func zRIL(f form, op, r1_m1, i2_ri2 uint32, asm *[]byte) { + if f == _a || f == _b { + r1_m1 = r1_m1 - obj.RBaseS390X // this is a register base + } + *asm = append(*asm, + uint8(op>>8), + (uint8(r1_m1)<<4)|(uint8(op)&0x0F), + uint8(i2_ri2>>24), + uint8(i2_ri2>>16), + uint8(i2_ri2>>8), + uint8(i2_ri2)) +} + +func zRIS(op, r1, m3, b4, d4, i2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r1)<<4)|uint8(m3&0x0F), + (uint8(b4)<<4)|(uint8(d4>>8)&0x0F), + uint8(d4), + uint8(i2), + uint8(op)) +} + +func zRR(op, r1, r2 uint32, asm *[]byte) { + *asm = append(*asm, uint8(op>>8), (uint8(r1)<<4)|uint8(r2&0x0F)) +} + +func zRRD(op, r1, r3, r2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(op), + uint8(r1)<<4, + (uint8(r3)<<4)|uint8(r2&0x0F)) +} + +func zRRE(op, r1, r2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(op), + 0, + (uint8(r1)<<4)|uint8(r2&0x0F)) +} + +func zRRF(op, r3_m3, m4, r1, r2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(op), + (uint8(r3_m3)<<4)|uint8(m4&0x0F), + (uint8(r1)<<4)|uint8(r2&0x0F)) +} + +func zRRS(op, r1, r2, b4, d4, m3 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r1)<<4)|uint8(r2&0x0F), + (uint8(b4)<<4)|uint8((d4>>8)&0x0F), + uint8(d4), + uint8(m3)<<4, + uint8(op)) +} + +func zRS(op, r1, r3_m3, b2, d2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r1)<<4)|uint8(r3_m3&0x0F), + (uint8(b2)<<4)|uint8((d2>>8)&0x0F), + uint8(d2)) +} + +func zRSI(op, r1, r3, ri2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r1)<<4)|uint8(r3&0x0F), + uint8(ri2>>8), + uint8(ri2)) +} + +func zRSL(op, l1, b2, d2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(l1), + (uint8(b2)<<4)|uint8((d2>>8)&0x0F), + uint8(d2), + uint8(op)) +} + +func zRSY(op, r1, r3_m3, b2, d2 uint32, asm *[]byte) { + dl2 := uint16(d2) & 0x0FFF + *asm = append(*asm, + uint8(op>>8), + (uint8(r1)<<4)|uint8(r3_m3&0x0F), + (uint8(b2)<<4)|(uint8(dl2>>8)&0x0F), + uint8(dl2), + uint8(d2>>12), + uint8(op)) +} + +func zRX(op, r1_m1, x2, b2, d2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r1_m1)<<4)|uint8(x2&0x0F), + (uint8(b2)<<4)|uint8((d2>>8)&0x0F), + uint8(d2)) +} + +func zRXE(op, r1, x2, b2, d2, m3 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r1)<<4)|uint8(x2&0x0F), + (uint8(b2)<<4)|uint8((d2>>8)&0x0F), + uint8(d2), + uint8(m3)<<4, + uint8(op)) +} + +func zRXF(op, r3, x2, b2, d2, m1 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r3)<<4)|uint8(x2&0x0F), + (uint8(b2)<<4)|uint8((d2>>8)&0x0F), + uint8(d2), + uint8(m1)<<4, + uint8(op)) +} + +func zRXY(op, r1_m1, x2, b2, d2 uint32, asm *[]byte) { + dl2 := uint16(d2) & 0x0FFF + *asm = append(*asm, + uint8(op>>8), + (uint8(r1_m1)<<4)|uint8(x2&0x0F), + (uint8(b2)<<4)|(uint8(dl2>>8)&0x0F), + uint8(dl2), + uint8(d2>>12), + uint8(op)) +} + +func zS(op, b2, d2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(op), + (uint8(b2)<<4)|uint8((d2>>8)&0x0F), + uint8(d2)) +} + +func zSI(op, i2, b1, d1 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(i2), + (uint8(b1)<<4)|uint8((d1>>8)&0x0F), + uint8(d1)) +} + +func zSIL(op, b1, d1, i2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(op), + (uint8(b1)<<4)|uint8((d1>>8)&0x0F), + uint8(d1), + uint8(i2>>8), + uint8(i2)) +} + +func zSIY(op, i2, b1, d1 uint32, asm *[]byte) { + dl1 := uint16(d1) & 0x0FFF + *asm = append(*asm, + uint8(op>>8), + uint8(i2), + (uint8(b1)<<4)|(uint8(dl1>>8)&0x0F), + uint8(dl1), + uint8(d1>>12), + uint8(op)) +} + +func zSMI(op, m1, b3, d3, ri2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(m1)<<4, + (uint8(b3)<<4)|uint8((d3>>8)&0x0F), + uint8(d3), + uint8(ri2>>8), + uint8(ri2)) +} + +// Expected argument values for the instruction formats. +// +// Format a1 a2 a3 a4 a5 a6 +// ------------------------------- +// a l1, 0, b1, d1, b2, d2 +// b l1, l2, b1, d1, b2, d2 +// c l1, i3, b1, d1, b2, d2 +// d r1, r3, b1, d1, b2, d2 +// e r1, r3, b2, d2, b4, d4 +// f 0, l2, b1, d1, b2, d2 +func zSS(f form, op, l1_r1, l2_i3_r3, b1_b2, d1_d2, b2_b4, d2_d4 uint32, asm *[]byte) { + *asm = append(*asm, uint8(op>>8)) + + switch f { + case _a: + *asm = append(*asm, uint8(l1_r1)) + case _b, _c, _d, _e: + *asm = append(*asm, (uint8(l1_r1)<<4)|uint8(l2_i3_r3&0x0F)) + case _f: + *asm = append(*asm, uint8(l2_i3_r3)) + } + + *asm = append(*asm, + (uint8(b1_b2)<<4)|uint8((d1_d2>>8)&0x0F), + uint8(d1_d2), + (uint8(b2_b4)<<4)|uint8((d2_d4>>8)&0x0F), + uint8(d2_d4)) +} + +func zSSE(op, b1, d1, b2, d2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(op), + (uint8(b1)<<4)|uint8((d1>>8)&0x0F), + uint8(d1), + (uint8(b2)<<4)|uint8((d2>>8)&0x0F), + uint8(d2)) +} + +func zSSF(op, r3, b1, d1, b2, d2 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(r3)<<4)|(uint8(op)&0x0F), + (uint8(b1)<<4)|uint8((d1>>8)&0x0F), + uint8(d1), + (uint8(b2)<<4)|uint8((d2>>8)&0x0F), + uint8(d2)) +} + +func rxb(va, vb, vc, vd uint32) uint8 { + mask := uint8(0) + if va >= REG_V16 && va <= REG_V31 { + mask |= 0x8 + } + if vb >= REG_V16 && vb <= REG_V31 { + mask |= 0x4 + } + if vc >= REG_V16 && vc <= REG_V31 { + mask |= 0x2 + } + if vd >= REG_V16 && vd <= REG_V31 { + mask |= 0x1 + } + return mask +} + +func zVRX(op, v1, x2, b2, d2, m3 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(x2)&0xf), + (uint8(b2)<<4)|(uint8(d2>>8)&0xf), + uint8(d2), + (uint8(m3)<<4)|rxb(v1, 0, 0, 0), + uint8(op)) +} + +func zVRV(op, v1, v2, b2, d2, m3 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v2)&0xf), + (uint8(b2)<<4)|(uint8(d2>>8)&0xf), + uint8(d2), + (uint8(m3)<<4)|rxb(v1, v2, 0, 0), + uint8(op)) +} + +func zVRS(op, v1, v3_r3, b2, d2, m4 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v3_r3)&0xf), + (uint8(b2)<<4)|(uint8(d2>>8)&0xf), + uint8(d2), + (uint8(m4)<<4)|rxb(v1, v3_r3, 0, 0), + uint8(op)) +} + +func zVRRa(op, v1, v2, m5, m4, m3 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v2)&0xf), + 0, + (uint8(m5)<<4)|(uint8(m4)&0xf), + (uint8(m3)<<4)|rxb(v1, v2, 0, 0), + uint8(op)) +} + +func zVRRb(op, v1, v2, v3, m5, m4 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v2)&0xf), + uint8(v3)<<4, + uint8(m5)<<4, + (uint8(m4)<<4)|rxb(v1, v2, v3, 0), + uint8(op)) +} + +func zVRRc(op, v1, v2, v3, m6, m5, m4 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v2)&0xf), + uint8(v3)<<4, + (uint8(m6)<<4)|(uint8(m5)&0xf), + (uint8(m4)<<4)|rxb(v1, v2, v3, 0), + uint8(op)) +} + +func zVRRd(op, v1, v2, v3, m5, m6, v4 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v2)&0xf), + (uint8(v3)<<4)|(uint8(m5)&0xf), + uint8(m6)<<4, + (uint8(v4)<<4)|rxb(v1, v2, v3, v4), + uint8(op)) +} + +func zVRRe(op, v1, v2, v3, m6, m5, v4 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v2)&0xf), + (uint8(v3)<<4)|(uint8(m6)&0xf), + uint8(m5), + (uint8(v4)<<4)|rxb(v1, v2, v3, v4), + uint8(op)) +} + +func zVRRf(op, v1, r2, r3 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(r2)&0xf), + uint8(r3)<<4, + 0, + rxb(v1, 0, 0, 0), + uint8(op)) +} + +func zVRIa(op, v1, i2, m3 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(v1)<<4, + uint8(i2>>8), + uint8(i2), + (uint8(m3)<<4)|rxb(v1, 0, 0, 0), + uint8(op)) +} + +func zVRIb(op, v1, i2, i3, m4 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + uint8(v1)<<4, + uint8(i2), + uint8(i3), + (uint8(m4)<<4)|rxb(v1, 0, 0, 0), + uint8(op)) +} + +func zVRIc(op, v1, v3, i2, m4 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v3)&0xf), + uint8(i2>>8), + uint8(i2), + (uint8(m4)<<4)|rxb(v1, v3, 0, 0), + uint8(op)) +} + +func zVRId(op, v1, v2, v3, i4, m5 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v2)&0xf), + uint8(v3)<<4, + uint8(i4), + (uint8(m5)<<4)|rxb(v1, v2, v3, 0), + uint8(op)) +} + +func zVRIe(op, v1, v2, i3, m5, m4 uint32, asm *[]byte) { + *asm = append(*asm, + uint8(op>>8), + (uint8(v1)<<4)|(uint8(v2)&0xf), + uint8(i3>>4), + (uint8(i3)<<4)|(uint8(m5)&0xf), + (uint8(m4)<<4)|rxb(v1, v2, 0, 0), + uint8(op)) +} diff --git a/src/cmd/internal/obj/s390x/condition_code.go b/src/cmd/internal/obj/s390x/condition_code.go new file mode 100644 index 0000000..f498fd6 --- /dev/null +++ b/src/cmd/internal/obj/s390x/condition_code.go @@ -0,0 +1,128 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390x + +import ( + "fmt" +) + +// CCMask represents a 4-bit condition code mask. Bits that +// are not part of the mask should be 0. +// +// Condition code masks represent the 4 possible values of +// the 2-bit condition code as individual bits. Since IBM Z +// is a big-endian platform bits are numbered from left to +// right. The lowest value, 0, is represented by 8 (0b1000) +// and the highest value, 3, is represented by 1 (0b0001). +// +// Note that condition code values have different semantics +// depending on the instruction that set the condition code. +// The names given here assume that the condition code was +// set by an integer or floating point comparison. Other +// instructions may use these same codes to indicate +// different results such as a carry or overflow. +type CCMask uint8 + +const ( + Never CCMask = 0 // no-op + + // 1-bit masks + Equal CCMask = 1 << 3 + Less CCMask = 1 << 2 + Greater CCMask = 1 << 1 + Unordered CCMask = 1 << 0 + + // 2-bit masks + EqualOrUnordered CCMask = Equal | Unordered // not less and not greater + LessOrEqual CCMask = Less | Equal // ordered and not greater + LessOrGreater CCMask = Less | Greater // ordered and not equal + LessOrUnordered CCMask = Less | Unordered // not greater and not equal + GreaterOrEqual CCMask = Greater | Equal // ordered and not less + GreaterOrUnordered CCMask = Greater | Unordered // not less and not equal + + // 3-bit masks + NotEqual CCMask = Always ^ Equal + NotLess CCMask = Always ^ Less + NotGreater CCMask = Always ^ Greater + NotUnordered CCMask = Always ^ Unordered + + // 4-bit mask + Always CCMask = Equal | Less | Greater | Unordered + + // useful aliases + Carry CCMask = GreaterOrUnordered + NoCarry CCMask = LessOrEqual + Borrow CCMask = NoCarry + NoBorrow CCMask = Carry +) + +// Inverse returns the complement of the condition code mask. +func (c CCMask) Inverse() CCMask { + return c ^ Always +} + +// ReverseComparison swaps the bits at 0b0100 and 0b0010 in the mask, +// reversing the behavior of greater than and less than conditions. +func (c CCMask) ReverseComparison() CCMask { + r := c & EqualOrUnordered + if c&Less != 0 { + r |= Greater + } + if c&Greater != 0 { + r |= Less + } + return r +} + +func (c CCMask) String() string { + switch c { + // 0-bit mask + case Never: + return "Never" + + // 1-bit masks + case Equal: + return "Equal" + case Less: + return "Less" + case Greater: + return "Greater" + case Unordered: + return "Unordered" + + // 2-bit masks + case EqualOrUnordered: + return "EqualOrUnordered" + case LessOrEqual: + return "LessOrEqual" + case LessOrGreater: + return "LessOrGreater" + case LessOrUnordered: + return "LessOrUnordered" + case GreaterOrEqual: + return "GreaterOrEqual" + case GreaterOrUnordered: + return "GreaterOrUnordered" + + // 3-bit masks + case NotEqual: + return "NotEqual" + case NotLess: + return "NotLess" + case NotGreater: + return "NotGreater" + case NotUnordered: + return "NotUnordered" + + // 4-bit mask + case Always: + return "Always" + } + + // invalid + return fmt.Sprintf("Invalid (%#x)", c) +} + +func (CCMask) CanBeAnSSAAux() {} diff --git a/src/cmd/internal/obj/s390x/listz.go b/src/cmd/internal/obj/s390x/listz.go new file mode 100644 index 0000000..ee58026 --- /dev/null +++ b/src/cmd/internal/obj/s390x/listz.go @@ -0,0 +1,73 @@ +// Based on cmd/internal/obj/ppc64/list9.go. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package s390x + +import ( + "cmd/internal/obj" + "fmt" +) + +func init() { + obj.RegisterRegister(obj.RBaseS390X, REG_R0+1024, rconv) + obj.RegisterOpcode(obj.ABaseS390X, Anames) +} + +func rconv(r int) string { + if r == 0 { + return "NONE" + } + if r == REGG { + // Special case. + return "g" + } + if REG_R0 <= r && r <= REG_R15 { + return fmt.Sprintf("R%d", r-REG_R0) + } + if REG_F0 <= r && r <= REG_F15 { + return fmt.Sprintf("F%d", r-REG_F0) + } + if REG_AR0 <= r && r <= REG_AR15 { + return fmt.Sprintf("AR%d", r-REG_AR0) + } + if REG_V0 <= r && r <= REG_V31 { + return fmt.Sprintf("V%d", r-REG_V0) + } + return fmt.Sprintf("Rgok(%d)", r-obj.RBaseS390X) +} + +func DRconv(a int) string { + s := "C_??" + if a >= C_NONE && a <= C_NCLASS { + s = cnamesz[a] + } + var fp string + fp += s + return fp +} diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go new file mode 100644 index 0000000..4e84756 --- /dev/null +++ b/src/cmd/internal/obj/s390x/objz.go @@ -0,0 +1,802 @@ +// Based on cmd/internal/obj/ppc64/obj9.go. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package s390x + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "log" + "math" +) + +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + p.From.Class = 0 + p.To.Class = 0 + + c := ctxtz{ctxt: ctxt, newprog: newprog} + + // Rewrite BR/BL to symbol as TYPE_BRANCH. + switch p.As { + case ABR, ABL, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY: + if p.To.Sym != nil { + p.To.Type = obj.TYPE_BRANCH + } + } + + // Rewrite float constants to values stored in memory unless they are +0. + switch p.As { + case AFMOVS: + if p.From.Type == obj.TYPE_FCONST { + f32 := float32(p.From.Val.(float64)) + if math.Float32bits(f32) == 0 { // +0 + break + } + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float32Sym(f32) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + case AFMOVD: + if p.From.Type == obj.TYPE_FCONST { + f64 := p.From.Val.(float64) + if math.Float64bits(f64) == 0 { // +0 + break + } + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float64Sym(f64) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + // put constants not loadable by LOAD IMMEDIATE into memory + case AMOVD: + if p.From.Type == obj.TYPE_CONST { + val := p.From.Offset + if int64(int32(val)) != val && + int64(uint32(val)) != val && + int64(uint64(val)&(0xffffffff<<32)) != val { + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Int64Sym(p.From.Offset) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + } + } + + // Rewrite SUB constants into ADD. + switch p.As { + case ASUBC: + if p.From.Type == obj.TYPE_CONST && isint32(-p.From.Offset) { + p.From.Offset = -p.From.Offset + p.As = AADDC + } + + case ASUB: + if p.From.Type == obj.TYPE_CONST && isint32(-p.From.Offset) { + p.From.Offset = -p.From.Offset + p.As = AADD + } + } + + if c.ctxt.Flag_dynlink { + c.rewriteToUseGot(p) + } +} + +// Rewrite p, if necessary, to access global data via the global offset table. +func (c *ctxtz) rewriteToUseGot(p *obj.Prog) { + // At the moment EXRL instructions are not emitted by the compiler and only reference local symbols in + // assembly code. + if p.As == AEXRL { + return + } + + // We only care about global data: NAME_EXTERN means a global + // symbol in the Go sense, and p.Sym.Local is true for a few + // internally defined symbols. + // Rewrites must not clobber flags and therefore cannot use the + // ADD instruction. + if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // MOVD $sym, Rx becomes MOVD sym@GOT, Rx + // MOVD $sym+<off>, Rx becomes MOVD sym@GOT, Rx or REGTMP2; MOVD $<off>(Rx or REGTMP2), Rx + if p.To.Type != obj.TYPE_REG || p.As != AMOVD { + c.ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p) + } + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + q := p + if p.From.Offset != 0 { + target := p.To.Reg + if target == REG_R0 { + // Cannot use R0 as input to address calculation. + // REGTMP might be used by the assembler. + p.To.Reg = REGTMP2 + } + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_ADDR + q.From.Offset = p.From.Offset + q.From.Reg = p.To.Reg + q.To.Type = obj.TYPE_REG + q.To.Reg = target + p.From.Offset = 0 + } + } + if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + var source *obj.Addr + // MOVD sym, Ry becomes MOVD sym@GOT, REGTMP2; MOVD (REGTMP2), Ry + // MOVD Ry, sym becomes MOVD sym@GOT, REGTMP2; MOVD Ry, (REGTMP2) + // An addition may be inserted between the two MOVs if there is an offset. + if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + c.ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) + } + source = &p.From + } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + source = &p.To + } else { + return + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { + return + } + if source.Sym.Type == objabi.STLSBSS { + return + } + if source.Type != obj.TYPE_MEM { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + p1 := obj.Appendp(p, c.newprog) + p2 := obj.Appendp(p1, c.newprog) + + p1.As = AMOVD + p1.From.Type = obj.TYPE_MEM + p1.From.Sym = source.Sym + p1.From.Name = obj.NAME_GOTREF + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REGTMP2 + + p2.As = p.As + p2.From = p.From + p2.To = p.To + if p.From.Name == obj.NAME_EXTERN { + p2.From.Reg = REGTMP2 + p2.From.Name = obj.NAME_NONE + p2.From.Sym = nil + } else if p.To.Name == obj.NAME_EXTERN { + p2.To.Reg = REGTMP2 + p2.To.Name = obj.NAME_NONE + p2.To.Sym = nil + } else { + return + } + obj.Nopout(p) +} + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + // TODO(minux): add morestack short-cuts with small fixed frame-size. + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + c := ctxtz{ctxt: ctxt, cursym: cursym, newprog: newprog} + + p := c.cursym.Func().Text + textstksiz := p.To.Offset + if textstksiz == -8 { + // Compatibility hack. + p.From.Sym.Set(obj.AttrNoFrame, true) + textstksiz = 0 + } + if textstksiz%8 != 0 { + c.ctxt.Diag("frame size %d not a multiple of 8", textstksiz) + } + if p.From.Sym.NoFrame() { + if textstksiz != 0 { + c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) + } + } + + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) + + /* + * find leaf subroutines + * strip NOPs + * expand RET + */ + + var q *obj.Prog + for p := c.cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.ATEXT: + q = p + p.Mark |= LEAF + + case ABL, ABCL: + q = p + c.cursym.Func().Text.Mark &^= LEAF + fallthrough + + case ABC, + ABRC, + ABEQ, + ABGE, + ABGT, + ABLE, + ABLT, + ABLEU, + ABLTU, + ABNE, + ABR, + ABVC, + ABVS, + ACRJ, + ACGRJ, + ACLRJ, + ACLGRJ, + ACIJ, + ACGIJ, + ACLIJ, + ACLGIJ, + ACMPBEQ, + ACMPBGE, + ACMPBGT, + ACMPBLE, + ACMPBLT, + ACMPBNE, + ACMPUBEQ, + ACMPUBGE, + ACMPUBGT, + ACMPUBLE, + ACMPUBLT, + ACMPUBNE: + q = p + p.Mark |= BRANCH + + default: + q = p + } + } + + autosize := int32(0) + var pLast *obj.Prog + var pPre *obj.Prog + var pPreempt *obj.Prog + var pCheck *obj.Prog + wasSplit := false + for p := c.cursym.Func().Text; p != nil; p = p.Link { + pLast = p + switch p.As { + case obj.ATEXT: + autosize = int32(textstksiz) + + if p.Mark&LEAF != 0 && autosize == 0 { + // A leaf function with no locals has no frame. + p.From.Sym.Set(obj.AttrNoFrame, true) + } + + if !p.From.Sym.NoFrame() { + // If there is a stack frame at all, it includes + // space to save the LR. + autosize += int32(c.ctxt.Arch.FixedFrameSize) + } + + if p.Mark&LEAF != 0 && autosize < objabi.StackSmall { + // A leaf function with a small stack can be marked + // NOSPLIT, avoiding a stack check. + p.From.Sym.Set(obj.AttrNoSplit, true) + } + + p.To.Offset = int64(autosize) + + q := p + + if !p.From.Sym.NoSplit() { + p, pPreempt, pCheck = c.stacksplitPre(p, autosize) // emit pre part of split check + pPre = p + p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + wasSplit = true //need post part of split + } + + if autosize != 0 { + // Make sure to save link register for non-empty frame, even if + // it is a leaf function, so that traceback works. + // Store link register before decrementing SP, so if a signal comes + // during the execution of the function prologue, the traceback + // code will not see a half-updated stack frame. + // This sequence is not async preemptible, as if we open a frame + // at the current SP, it will clobber the saved LR. + q = c.ctxt.StartUnsafePoint(p, c.newprog) + + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_LR + q.To.Type = obj.TYPE_MEM + q.To.Reg = REGSP + q.To.Offset = int64(-autosize) + + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_ADDR + q.From.Offset = int64(-autosize) + q.From.Reg = REGSP // not actually needed - REGSP is assumed if no reg is provided + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = autosize + + q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) + + // On Linux, in a cgo binary we may get a SIGSETXID signal early on + // before the signal stack is set, as glibc doesn't allow us to block + // SIGSETXID. So a signal may land on the current stack and clobber + // the content below the SP. We store the LR again after the SP is + // decremented. + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_LR + q.To.Type = obj.TYPE_MEM + q.To.Reg = REGSP + q.To.Offset = 0 + } else if c.cursym.Func().Text.Mark&LEAF == 0 { + // A very few functions that do not return to their caller + // (e.g. gogo) are not identified as leaves but still have + // no frame. + c.cursym.Func().Text.Mark |= LEAF + } + + if c.cursym.Func().Text.Mark&LEAF != 0 { + c.cursym.Set(obj.AttrLeaf, true) + break + } + + if c.cursym.Func().Text.From.Sym.Wrapper() { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOVD g_panic(g), R3 + // CMP R3, $0 + // BEQ end + // MOVD panic_argp(R3), R4 + // ADD $(autosize+8), R1, R5 + // CMP R4, R5 + // BNE end + // ADD $8, R1, R6 + // MOVD R6, panic_argp(R3) + // end: + // NOP + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not a s390x NOP: it encodes to 0 instruction bytes. + + q = obj.Appendp(q, c.newprog) + + q.As = AMOVD + q.From.Type = obj.TYPE_MEM + q.From.Reg = REGG + q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R3 + + q = obj.Appendp(q, c.newprog) + q.As = ACMP + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R3 + q.To.Type = obj.TYPE_CONST + q.To.Offset = 0 + + q = obj.Appendp(q, c.newprog) + q.As = ABEQ + q.To.Type = obj.TYPE_BRANCH + p1 := q + + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_MEM + q.From.Reg = REG_R3 + q.From.Offset = 0 // Panic.argp + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R4 + + q = obj.Appendp(q, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(autosize) + c.ctxt.Arch.FixedFrameSize + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R5 + + q = obj.Appendp(q, c.newprog) + q.As = ACMP + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R4 + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R5 + + q = obj.Appendp(q, c.newprog) + q.As = ABNE + q.To.Type = obj.TYPE_BRANCH + p2 := q + + q = obj.Appendp(q, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = c.ctxt.Arch.FixedFrameSize + q.Reg = REGSP + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_R6 + + q = obj.Appendp(q, c.newprog) + q.As = AMOVD + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_R6 + q.To.Type = obj.TYPE_MEM + q.To.Reg = REG_R3 + q.To.Offset = 0 // Panic.argp + + q = obj.Appendp(q, c.newprog) + + q.As = obj.ANOP + p1.To.SetTarget(q) + p2.To.SetTarget(q) + } + + case obj.ARET: + retTarget := p.To.Sym + + if c.cursym.Func().Text.Mark&LEAF != 0 { + if autosize == 0 { + p.As = ABR + p.From = obj.Addr{} + if retTarget == nil { + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_LR + } else { + p.To.Type = obj.TYPE_BRANCH + p.To.Sym = retTarget + } + p.Mark |= BRANCH + break + } + + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(autosize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = -autosize + + q = obj.Appendp(p, c.newprog) + q.As = ABR + q.From = obj.Addr{} + if retTarget == nil { + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_LR + } else { + q.To.Type = obj.TYPE_BRANCH + q.To.Sym = retTarget + } + q.Mark |= BRANCH + q.Spadj = autosize + break + } + + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + p.From.Offset = 0 + p.To = obj.Addr{ + Type: obj.TYPE_REG, + Reg: REG_LR, + } + + q = p + + if autosize != 0 { + q = obj.Appendp(q, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(autosize) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -autosize + } + + q = obj.Appendp(q, c.newprog) + q.As = ABR + q.From = obj.Addr{} + if retTarget == nil { + q.To.Type = obj.TYPE_REG + q.To.Reg = REG_LR + } else { + q.To.Type = obj.TYPE_BRANCH + q.To.Sym = retTarget + } + q.Mark |= BRANCH + q.Spadj = autosize + + case AADD: + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { + p.Spadj = int32(-p.From.Offset) + } + + case obj.AGETCALLERPC: + if cursym.Leaf() { + /* MOVD LR, Rd */ + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + } else { + /* MOVD (RSP), Rd */ + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + } + } + + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { + f := c.cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s\n", c.cursym.Name) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + } + if wasSplit { + c.stacksplitPost(pLast, pPre, pPreempt, pCheck, autosize) // emit post part of split check + } +} + +// stacksplitPre generates the function stack check prologue following +// Prog p (which should be the TEXT Prog). It returns one or two +// branch Progs that must be patched to jump to the morestack epilogue, +// and the Prog that starts the morestack check. +func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCheck *obj.Prog) { + if c.ctxt.Flag_maymorestack != "" { + // Save LR and REGCTXT + const frameSize = 16 + p = c.ctxt.StartUnsafePoint(p, c.newprog) + // MOVD LR, -16(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REGSP, Offset: -frameSize} + // MOVD $-16(SP), SP + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From = obj.Addr{Type: obj.TYPE_ADDR, Offset: -frameSize, Reg: REGSP} + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REGSP} + p.Spadj = frameSize + // MOVD REGCTXT, 8(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REGCTXT} + p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REGSP, Offset: 8} + + // BL maymorestack + p = obj.Appendp(p, c.newprog) + p.As = ABL + // See ../x86/obj6.go + sym := c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI()) + p.To = obj.Addr{Type: obj.TYPE_BRANCH, Sym: sym} + + // Restore LR and REGCTXT + + // MOVD REGCTXT, 8(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGSP, Offset: 8} + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REGCTXT} + // MOVD (SP), LR + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGSP, Offset: 0} + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + // MOVD $16(SP), SP + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From = obj.Addr{Type: obj.TYPE_CONST, Reg: REGSP, Offset: frameSize} + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REGSP} + p.Spadj = -frameSize + + p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + } + + // MOVD g_stackguard(g), R3 + p = obj.Appendp(p, c.newprog) + // Jump back to here after morestack returns. + pCheck = p + + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0 + if c.cursym.CFunc() { + p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R3 + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + if framesize <= objabi.StackSmall { + // small stack: SP < stackguard + // CMPUBGE stackguard, SP, label-of-call-to-morestack + + p = obj.Appendp(p, c.newprog) + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R3 + p.Reg = REGSP + p.As = ACMPUBGE + p.To.Type = obj.TYPE_BRANCH + + return p, nil, pCheck + } + + // large stack: SP-framesize < stackguard-StackSmall + + offset := int64(framesize) - objabi.StackSmall + if framesize > objabi.StackBig { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // MOVD $(framesize-StackSmall), R4 + // CMPUBLT SP, R4, label-of-call-to-morestack + + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_CONST + p.From.Offset = offset + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R4 + + p = obj.Appendp(p, c.newprog) + pPreempt = p + p.As = ACMPUBLT + p.From.Type = obj.TYPE_REG + p.From.Reg = REGSP + p.Reg = REG_R4 + p.To.Type = obj.TYPE_BRANCH + } + + // Check against the stack guard. We've ensured this won't underflow. + // ADD $-(framesize-StackSmall), SP, R4 + // CMPUBGE stackguard, R4, label-of-call-to-morestack + p = obj.Appendp(p, c.newprog) + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = -offset + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R4 + + p = obj.Appendp(p, c.newprog) + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R3 + p.Reg = REG_R4 + p.As = ACMPUBGE + p.To.Type = obj.TYPE_BRANCH + + return p, pPreempt, pCheck +} + +// stacksplitPost generates the function epilogue that calls morestack +// and returns the new last instruction in the function. +// +// p is the last Prog in the function. pPre and pPreempt, if non-nil, +// are the instructions that branch to the epilogue. This will fill in +// their branch targets. pCheck is the Prog that begins the stack check. +func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre, pPreempt, pCheck *obj.Prog, framesize int32) *obj.Prog { + // Now we are at the end of the function, but logically + // we are still in function prologue. We need to fix the + // SP data and PCDATA. + spfix := obj.Appendp(p, c.newprog) + spfix.As = obj.ANOP + spfix.Spadj = -framesize + + pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog) + pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog) + + // MOVD LR, R5 + p = obj.Appendp(pcdata, c.newprog) + pPre.To.SetTarget(p) + p.As = AMOVD + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R5 + if pPreempt != nil { + pPreempt.To.SetTarget(p) + } + + // BL runtime.morestack(SB) + p = obj.Appendp(p, c.newprog) + + p.As = ABL + p.To.Type = obj.TYPE_BRANCH + if c.cursym.CFunc() { + p.To.Sym = c.ctxt.Lookup("runtime.morestackc") + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { + p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") + } else { + p.To.Sym = c.ctxt.Lookup("runtime.morestack") + } + + p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + + // BR pCheck + p = obj.Appendp(p, c.newprog) + + p.As = ABR + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(pCheck) + return p +} + +var unaryDst = map[obj.As]bool{ + ASTCK: true, + ASTCKC: true, + ASTCKE: true, + ASTCKF: true, + ANEG: true, + ANEGW: true, + AVONE: true, + AVZERO: true, +} + +var Links390x = obj.LinkArch{ + Arch: sys.ArchS390X, + Init: buildop, + Preprocess: preprocess, + Assemble: spanz, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: S390XDWARFRegisters, +} diff --git a/src/cmd/internal/obj/s390x/rotate.go b/src/cmd/internal/obj/s390x/rotate.go new file mode 100644 index 0000000..5407c8d --- /dev/null +++ b/src/cmd/internal/obj/s390x/rotate.go @@ -0,0 +1,117 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390x + +import ( + "math/bits" +) + +// RotateParams represents the immediates required for a "rotate +// then ... selected bits instruction". +// +// The Start and End values are the indexes that represent +// the masked region. They are inclusive and are in big- +// endian order (bit 0 is the MSB, bit 63 is the LSB). They +// may wrap around. +// +// Some examples: +// +// Masked region | Start | End +// --------------------------+-------+---- +// 0x00_00_00_00_00_00_00_0f | 60 | 63 +// 0xf0_00_00_00_00_00_00_00 | 0 | 3 +// 0xf0_00_00_00_00_00_00_0f | 60 | 3 +// +// The Amount value represents the amount to rotate the +// input left by. Note that this rotation is performed +// before the masked region is used. +type RotateParams struct { + Start uint8 // big-endian start bit index [0..63] + End uint8 // big-endian end bit index [0..63] + Amount uint8 // amount to rotate left +} + +// NewRotateParams creates a set of parameters representing a +// rotation left by the amount provided and a selection of the bits +// between the provided start and end indexes (inclusive). +// +// The start and end indexes and the rotation amount must all +// be in the range 0-63 inclusive or this function will panic. +func NewRotateParams(start, end, amount uint8) RotateParams { + if start&^63 != 0 { + panic("start out of bounds") + } + if end&^63 != 0 { + panic("end out of bounds") + } + if amount&^63 != 0 { + panic("amount out of bounds") + } + return RotateParams{ + Start: start, + End: end, + Amount: amount, + } +} + +// RotateLeft generates a new set of parameters with the rotation amount +// increased by the given value. The selected bits are left unchanged. +func (r RotateParams) RotateLeft(amount uint8) RotateParams { + r.Amount += amount + r.Amount &= 63 + return r +} + +// OutMask provides a mask representing the selected bits. +func (r RotateParams) OutMask() uint64 { + // Note: z must be unsigned for bootstrap compiler + z := uint8(63-r.End+r.Start) & 63 // number of zero bits in mask + return bits.RotateLeft64(^uint64(0)<<z, -int(r.Start)) +} + +// InMask provides a mask representing the selected bits relative +// to the source value (i.e. pre-rotation). +func (r RotateParams) InMask() uint64 { + return bits.RotateLeft64(r.OutMask(), -int(r.Amount)) +} + +// OutMerge tries to generate a new set of parameters representing +// the intersection between the selected bits and the provided mask. +// If the intersection is unrepresentable (0 or not contiguous) nil +// will be returned. +func (r RotateParams) OutMerge(mask uint64) *RotateParams { + mask &= r.OutMask() + if mask == 0 { + return nil + } + + // normalize the mask so that the set bits are left aligned + o := bits.LeadingZeros64(^mask) + mask = bits.RotateLeft64(mask, o) + z := bits.LeadingZeros64(mask) + mask = bits.RotateLeft64(mask, z) + + // check that the normalized mask is contiguous + l := bits.LeadingZeros64(^mask) + if l+bits.TrailingZeros64(mask) != 64 { + return nil + } + + // update start and end positions (rotation amount remains the same) + r.Start = uint8(o+z) & 63 + r.End = (r.Start + uint8(l) - 1) & 63 + return &r +} + +// InMerge tries to generate a new set of parameters representing +// the intersection between the selected bits and the provided mask +// as applied to the source value (i.e. pre-rotation). +// If the intersection is unrepresentable (0 or not contiguous) nil +// will be returned. +func (r RotateParams) InMerge(mask uint64) *RotateParams { + return r.OutMerge(bits.RotateLeft64(mask, int(r.Amount))) +} + +func (RotateParams) CanBeAnSSAAux() {} diff --git a/src/cmd/internal/obj/s390x/rotate_test.go b/src/cmd/internal/obj/s390x/rotate_test.go new file mode 100644 index 0000000..88421b1 --- /dev/null +++ b/src/cmd/internal/obj/s390x/rotate_test.go @@ -0,0 +1,122 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390x + +import ( + "testing" +) + +func TestRotateParamsMask(t *testing.T) { + tests := []struct { + start, end, amount uint8 + inMask, outMask uint64 + }{ + // start before end, no rotation + {start: 0, end: 63, amount: 0, inMask: ^uint64(0), outMask: ^uint64(0)}, + {start: 1, end: 63, amount: 0, inMask: ^uint64(0) >> 1, outMask: ^uint64(0) >> 1}, + {start: 0, end: 62, amount: 0, inMask: ^uint64(1), outMask: ^uint64(1)}, + {start: 1, end: 62, amount: 0, inMask: ^uint64(3) >> 1, outMask: ^uint64(3) >> 1}, + + // end before start, no rotation + {start: 63, end: 0, amount: 0, inMask: 1<<63 | 1, outMask: 1<<63 | 1}, + {start: 62, end: 0, amount: 0, inMask: 1<<63 | 3, outMask: 1<<63 | 3}, + {start: 63, end: 1, amount: 0, inMask: 3<<62 | 1, outMask: 3<<62 | 1}, + {start: 62, end: 1, amount: 0, inMask: 3<<62 | 3, outMask: 3<<62 | 3}, + + // rotation + {start: 32, end: 63, amount: 32, inMask: 0xffffffff00000000, outMask: 0x00000000ffffffff}, + {start: 48, end: 15, amount: 16, inMask: 0xffffffff00000000, outMask: 0xffff00000000ffff}, + {start: 0, end: 7, amount: -8 & 63, inMask: 0xff, outMask: 0xff << 56}, + } + for i, test := range tests { + r := NewRotateParams(test.start, test.end, test.amount) + if m := r.OutMask(); m != test.outMask { + t.Errorf("out mask %v: want %#x, got %#x", i, test.outMask, m) + } + if m := r.InMask(); m != test.inMask { + t.Errorf("in mask %v: want %#x, got %#x", i, test.inMask, m) + } + } +} + +func TestRotateParamsMerge(t *testing.T) { + tests := []struct { + // inputs + src RotateParams + mask uint64 + + // results + in *RotateParams + out *RotateParams + }{ + { + src: RotateParams{Start: 48, End: 15, Amount: 16}, + mask: 0xffffffffffffffff, + in: &RotateParams{Start: 48, End: 15, Amount: 16}, + out: &RotateParams{Start: 48, End: 15, Amount: 16}, + }, + { + src: RotateParams{Start: 16, End: 47, Amount: 0}, + mask: 0x00000000ffffffff, + in: &RotateParams{Start: 32, End: 47, Amount: 0}, + out: &RotateParams{Start: 32, End: 47, Amount: 0}, + }, + { + src: RotateParams{Start: 16, End: 47, Amount: 0}, + mask: 0xffff00000000ffff, + in: nil, + out: nil, + }, + { + src: RotateParams{Start: 0, End: 63, Amount: 0}, + mask: 0xf7f0000000000000, + in: nil, + out: nil, + }, + { + src: RotateParams{Start: 0, End: 63, Amount: 1}, + mask: 0x000000000000ff00, + in: &RotateParams{Start: 47, End: 54, Amount: 1}, + out: &RotateParams{Start: 48, End: 55, Amount: 1}, + }, + { + src: RotateParams{Start: 32, End: 63, Amount: 32}, + mask: 0xffff00000000ffff, + in: &RotateParams{Start: 32, End: 47, Amount: 32}, + out: &RotateParams{Start: 48, End: 63, Amount: 32}, + }, + { + src: RotateParams{Start: 0, End: 31, Amount: 32}, + mask: 0x8000000000000000, + in: nil, + out: &RotateParams{Start: 0, End: 0, Amount: 32}, + }, + { + src: RotateParams{Start: 0, End: 31, Amount: 32}, + mask: 0x0000000080000000, + in: &RotateParams{Start: 0, End: 0, Amount: 32}, + out: nil, + }, + } + + eq := func(x, y *RotateParams) bool { + if x == nil && y == nil { + return true + } + if x == nil || y == nil { + return false + } + return *x == *y + } + + for _, test := range tests { + if r := test.src.InMerge(test.mask); !eq(r, test.in) { + t.Errorf("%v merged with %#x (input): want %v, got %v", test.src, test.mask, test.in, r) + } + if r := test.src.OutMerge(test.mask); !eq(r, test.out) { + t.Errorf("%v merged with %#x (output): want %v, got %v", test.src, test.mask, test.out, r) + } + } +} diff --git a/src/cmd/internal/obj/s390x/vector.go b/src/cmd/internal/obj/s390x/vector.go new file mode 100644 index 0000000..00f5783 --- /dev/null +++ b/src/cmd/internal/obj/s390x/vector.go @@ -0,0 +1,1069 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390x + +import ( + "cmd/internal/obj" +) + +// This file contains utility functions for use when +// assembling vector instructions. + +// vop returns the opcode, element size and condition +// setting for the given (possibly extended) mnemonic. +func vop(as obj.As) (opcode, es, cs uint32) { + switch as { + default: + return 0, 0, 0 + case AVA: + return op_VA, 0, 0 + case AVAB: + return op_VA, 0, 0 + case AVAH: + return op_VA, 1, 0 + case AVAF: + return op_VA, 2, 0 + case AVAG: + return op_VA, 3, 0 + case AVAQ: + return op_VA, 4, 0 + case AVACC: + return op_VACC, 0, 0 + case AVACCB: + return op_VACC, 0, 0 + case AVACCH: + return op_VACC, 1, 0 + case AVACCF: + return op_VACC, 2, 0 + case AVACCG: + return op_VACC, 3, 0 + case AVACCQ: + return op_VACC, 4, 0 + case AVAC: + return op_VAC, 0, 0 + case AVACQ: + return op_VAC, 4, 0 + case AVMSLG, AVMSLEG, AVMSLOG, AVMSLEOG: + return op_VMSL, 3, 0 + case AVACCC: + return op_VACCC, 0, 0 + case AVACCCQ: + return op_VACCC, 4, 0 + case AVN: + return op_VN, 0, 0 + case AVNC: + return op_VNC, 0, 0 + case AVAVG: + return op_VAVG, 0, 0 + case AVAVGB: + return op_VAVG, 0, 0 + case AVAVGH: + return op_VAVG, 1, 0 + case AVAVGF: + return op_VAVG, 2, 0 + case AVAVGG: + return op_VAVG, 3, 0 + case AVAVGL: + return op_VAVGL, 0, 0 + case AVAVGLB: + return op_VAVGL, 0, 0 + case AVAVGLH: + return op_VAVGL, 1, 0 + case AVAVGLF: + return op_VAVGL, 2, 0 + case AVAVGLG: + return op_VAVGL, 3, 0 + case AVCKSM: + return op_VCKSM, 0, 0 + case AVCEQ: + return op_VCEQ, 0, 0 + case AVCEQB: + return op_VCEQ, 0, 0 + case AVCEQH: + return op_VCEQ, 1, 0 + case AVCEQF: + return op_VCEQ, 2, 0 + case AVCEQG: + return op_VCEQ, 3, 0 + case AVCEQBS: + return op_VCEQ, 0, 1 + case AVCEQHS: + return op_VCEQ, 1, 1 + case AVCEQFS: + return op_VCEQ, 2, 1 + case AVCEQGS: + return op_VCEQ, 3, 1 + case AVCH: + return op_VCH, 0, 0 + case AVCHB: + return op_VCH, 0, 0 + case AVCHH: + return op_VCH, 1, 0 + case AVCHF: + return op_VCH, 2, 0 + case AVCHG: + return op_VCH, 3, 0 + case AVCHBS: + return op_VCH, 0, 1 + case AVCHHS: + return op_VCH, 1, 1 + case AVCHFS: + return op_VCH, 2, 1 + case AVCHGS: + return op_VCH, 3, 1 + case AVCHL: + return op_VCHL, 0, 0 + case AVCHLB: + return op_VCHL, 0, 0 + case AVCHLH: + return op_VCHL, 1, 0 + case AVCHLF: + return op_VCHL, 2, 0 + case AVCHLG: + return op_VCHL, 3, 0 + case AVCHLBS: + return op_VCHL, 0, 1 + case AVCHLHS: + return op_VCHL, 1, 1 + case AVCHLFS: + return op_VCHL, 2, 1 + case AVCHLGS: + return op_VCHL, 3, 1 + case AVCLZ: + return op_VCLZ, 0, 0 + case AVCLZB: + return op_VCLZ, 0, 0 + case AVCLZH: + return op_VCLZ, 1, 0 + case AVCLZF: + return op_VCLZ, 2, 0 + case AVCLZG: + return op_VCLZ, 3, 0 + case AVCTZ: + return op_VCTZ, 0, 0 + case AVCTZB: + return op_VCTZ, 0, 0 + case AVCTZH: + return op_VCTZ, 1, 0 + case AVCTZF: + return op_VCTZ, 2, 0 + case AVCTZG: + return op_VCTZ, 3, 0 + case AVEC: + return op_VEC, 0, 0 + case AVECB: + return op_VEC, 0, 0 + case AVECH: + return op_VEC, 1, 0 + case AVECF: + return op_VEC, 2, 0 + case AVECG: + return op_VEC, 3, 0 + case AVECL: + return op_VECL, 0, 0 + case AVECLB: + return op_VECL, 0, 0 + case AVECLH: + return op_VECL, 1, 0 + case AVECLF: + return op_VECL, 2, 0 + case AVECLG: + return op_VECL, 3, 0 + case AVERIM: + return op_VERIM, 0, 0 + case AVERIMB: + return op_VERIM, 0, 0 + case AVERIMH: + return op_VERIM, 1, 0 + case AVERIMF: + return op_VERIM, 2, 0 + case AVERIMG: + return op_VERIM, 3, 0 + case AVERLL: + return op_VERLL, 0, 0 + case AVERLLB: + return op_VERLL, 0, 0 + case AVERLLH: + return op_VERLL, 1, 0 + case AVERLLF: + return op_VERLL, 2, 0 + case AVERLLG: + return op_VERLL, 3, 0 + case AVERLLV: + return op_VERLLV, 0, 0 + case AVERLLVB: + return op_VERLLV, 0, 0 + case AVERLLVH: + return op_VERLLV, 1, 0 + case AVERLLVF: + return op_VERLLV, 2, 0 + case AVERLLVG: + return op_VERLLV, 3, 0 + case AVESLV: + return op_VESLV, 0, 0 + case AVESLVB: + return op_VESLV, 0, 0 + case AVESLVH: + return op_VESLV, 1, 0 + case AVESLVF: + return op_VESLV, 2, 0 + case AVESLVG: + return op_VESLV, 3, 0 + case AVESL: + return op_VESL, 0, 0 + case AVESLB: + return op_VESL, 0, 0 + case AVESLH: + return op_VESL, 1, 0 + case AVESLF: + return op_VESL, 2, 0 + case AVESLG: + return op_VESL, 3, 0 + case AVESRA: + return op_VESRA, 0, 0 + case AVESRAB: + return op_VESRA, 0, 0 + case AVESRAH: + return op_VESRA, 1, 0 + case AVESRAF: + return op_VESRA, 2, 0 + case AVESRAG: + return op_VESRA, 3, 0 + case AVESRAV: + return op_VESRAV, 0, 0 + case AVESRAVB: + return op_VESRAV, 0, 0 + case AVESRAVH: + return op_VESRAV, 1, 0 + case AVESRAVF: + return op_VESRAV, 2, 0 + case AVESRAVG: + return op_VESRAV, 3, 0 + case AVESRL: + return op_VESRL, 0, 0 + case AVESRLB: + return op_VESRL, 0, 0 + case AVESRLH: + return op_VESRL, 1, 0 + case AVESRLF: + return op_VESRL, 2, 0 + case AVESRLG: + return op_VESRL, 3, 0 + case AVESRLV: + return op_VESRLV, 0, 0 + case AVESRLVB: + return op_VESRLV, 0, 0 + case AVESRLVH: + return op_VESRLV, 1, 0 + case AVESRLVF: + return op_VESRLV, 2, 0 + case AVESRLVG: + return op_VESRLV, 3, 0 + case AVX: + return op_VX, 0, 0 + case AVFAE: + return op_VFAE, 0, 0 + case AVFAEB: + return op_VFAE, 0, 0 + case AVFAEH: + return op_VFAE, 1, 0 + case AVFAEF: + return op_VFAE, 2, 0 + case AVFAEBS: + return op_VFAE, 0, 1 + case AVFAEHS: + return op_VFAE, 1, 1 + case AVFAEFS: + return op_VFAE, 2, 1 + case AVFAEZB: + return op_VFAE, 0, 2 + case AVFAEZH: + return op_VFAE, 1, 2 + case AVFAEZF: + return op_VFAE, 2, 2 + case AVFAEZBS: + return op_VFAE, 0, 3 + case AVFAEZHS: + return op_VFAE, 1, 3 + case AVFAEZFS: + return op_VFAE, 2, 3 + case AVFEE: + return op_VFEE, 0, 0 + case AVFEEB: + return op_VFEE, 0, 0 + case AVFEEH: + return op_VFEE, 1, 0 + case AVFEEF: + return op_VFEE, 2, 0 + case AVFEEBS: + return op_VFEE, 0, 1 + case AVFEEHS: + return op_VFEE, 1, 1 + case AVFEEFS: + return op_VFEE, 2, 1 + case AVFEEZB: + return op_VFEE, 0, 2 + case AVFEEZH: + return op_VFEE, 1, 2 + case AVFEEZF: + return op_VFEE, 2, 2 + case AVFEEZBS: + return op_VFEE, 0, 3 + case AVFEEZHS: + return op_VFEE, 1, 3 + case AVFEEZFS: + return op_VFEE, 2, 3 + case AVFENE: + return op_VFENE, 0, 0 + case AVFENEB: + return op_VFENE, 0, 0 + case AVFENEH: + return op_VFENE, 1, 0 + case AVFENEF: + return op_VFENE, 2, 0 + case AVFENEBS: + return op_VFENE, 0, 1 + case AVFENEHS: + return op_VFENE, 1, 1 + case AVFENEFS: + return op_VFENE, 2, 1 + case AVFENEZB: + return op_VFENE, 0, 2 + case AVFENEZH: + return op_VFENE, 1, 2 + case AVFENEZF: + return op_VFENE, 2, 2 + case AVFENEZBS: + return op_VFENE, 0, 3 + case AVFENEZHS: + return op_VFENE, 1, 3 + case AVFENEZFS: + return op_VFENE, 2, 3 + case AVFA: + return op_VFA, 0, 0 + case AVFADB: + return op_VFA, 3, 0 + case AWFADB: + return op_VFA, 3, 0 + case AWFK: + return op_WFK, 0, 0 + case AWFKDB: + return op_WFK, 3, 0 + case AVFCE: + return op_VFCE, 0, 0 + case AVFCEDB: + return op_VFCE, 3, 0 + case AVFCEDBS: + return op_VFCE, 3, 1 + case AWFCEDB: + return op_VFCE, 3, 0 + case AWFCEDBS: + return op_VFCE, 3, 1 + case AVFCH: + return op_VFCH, 0, 0 + case AVFCHDB: + return op_VFCH, 3, 0 + case AVFCHDBS: + return op_VFCH, 3, 1 + case AWFCHDB: + return op_VFCH, 3, 0 + case AWFCHDBS: + return op_VFCH, 3, 1 + case AVFCHE: + return op_VFCHE, 0, 0 + case AVFCHEDB: + return op_VFCHE, 3, 0 + case AVFCHEDBS: + return op_VFCHE, 3, 1 + case AWFCHEDB: + return op_VFCHE, 3, 0 + case AWFCHEDBS: + return op_VFCHE, 3, 1 + case AWFC: + return op_WFC, 0, 0 + case AWFCDB: + return op_WFC, 3, 0 + case AVCDG: + return op_VCDG, 0, 0 + case AVCDGB: + return op_VCDG, 3, 0 + case AWCDGB: + return op_VCDG, 3, 0 + case AVCDLG: + return op_VCDLG, 0, 0 + case AVCDLGB: + return op_VCDLG, 3, 0 + case AWCDLGB: + return op_VCDLG, 3, 0 + case AVCGD: + return op_VCGD, 0, 0 + case AVCGDB: + return op_VCGD, 3, 0 + case AWCGDB: + return op_VCGD, 3, 0 + case AVCLGD: + return op_VCLGD, 0, 0 + case AVCLGDB: + return op_VCLGD, 3, 0 + case AWCLGDB: + return op_VCLGD, 3, 0 + case AVFD: + return op_VFD, 0, 0 + case AVFDDB: + return op_VFD, 3, 0 + case AWFDDB: + return op_VFD, 3, 0 + case AVLDE: + return op_VLDE, 0, 0 + case AVLDEB: + return op_VLDE, 2, 0 + case AWLDEB: + return op_VLDE, 2, 0 + case AVLED: + return op_VLED, 0, 0 + case AVLEDB: + return op_VLED, 3, 0 + case AWLEDB: + return op_VLED, 3, 0 + case AVFM: + return op_VFM, 0, 0 + case AVFMDB: + return op_VFM, 3, 0 + case AWFMDB: + return op_VFM, 3, 0 + case AVFMA: + return op_VFMA, 0, 0 + case AVFMADB: + return op_VFMA, 3, 0 + case AWFMADB: + return op_VFMA, 3, 0 + case AVFMS: + return op_VFMS, 0, 0 + case AVFMSDB: + return op_VFMS, 3, 0 + case AWFMSDB: + return op_VFMS, 3, 0 + case AVFPSO: + return op_VFPSO, 0, 0 + case AVFPSODB: + return op_VFPSO, 3, 0 + case AWFPSODB: + return op_VFPSO, 3, 0 + case AVFLCDB: + return op_VFPSO, 3, 0 + case AWFLCDB: + return op_VFPSO, 3, 0 + case AVFLNDB: + return op_VFPSO, 3, 1 + case AWFLNDB: + return op_VFPSO, 3, 1 + case AVFLPDB: + return op_VFPSO, 3, 2 + case AWFLPDB: + return op_VFPSO, 3, 2 + case AVFSQ: + return op_VFSQ, 0, 0 + case AVFSQDB: + return op_VFSQ, 3, 0 + case AWFSQDB: + return op_VFSQ, 3, 0 + case AVFS: + return op_VFS, 0, 0 + case AVFSDB: + return op_VFS, 3, 0 + case AWFSDB: + return op_VFS, 3, 0 + case AVFTCI: + return op_VFTCI, 0, 0 + case AVFTCIDB: + return op_VFTCI, 3, 0 + case AWFTCIDB: + return op_VFTCI, 3, 0 + case AVGFM: + return op_VGFM, 0, 0 + case AVGFMB: + return op_VGFM, 0, 0 + case AVGFMH: + return op_VGFM, 1, 0 + case AVGFMF: + return op_VGFM, 2, 0 + case AVGFMG: + return op_VGFM, 3, 0 + case AVGFMA: + return op_VGFMA, 0, 0 + case AVGFMAB: + return op_VGFMA, 0, 0 + case AVGFMAH: + return op_VGFMA, 1, 0 + case AVGFMAF: + return op_VGFMA, 2, 0 + case AVGFMAG: + return op_VGFMA, 3, 0 + case AVGEF: + return op_VGEF, 0, 0 + case AVGEG: + return op_VGEG, 0, 0 + case AVGBM: + return op_VGBM, 0, 0 + case AVZERO: + return op_VGBM, 0, 0 + case AVONE: + return op_VGBM, 0, 0 + case AVGM: + return op_VGM, 0, 0 + case AVGMB: + return op_VGM, 0, 0 + case AVGMH: + return op_VGM, 1, 0 + case AVGMF: + return op_VGM, 2, 0 + case AVGMG: + return op_VGM, 3, 0 + case AVISTR: + return op_VISTR, 0, 0 + case AVISTRB: + return op_VISTR, 0, 0 + case AVISTRH: + return op_VISTR, 1, 0 + case AVISTRF: + return op_VISTR, 2, 0 + case AVISTRBS: + return op_VISTR, 0, 1 + case AVISTRHS: + return op_VISTR, 1, 1 + case AVISTRFS: + return op_VISTR, 2, 1 + case AVL: + return op_VL, 0, 0 + case AVLR: + return op_VLR, 0, 0 + case AVLREP: + return op_VLREP, 0, 0 + case AVLREPB: + return op_VLREP, 0, 0 + case AVLREPH: + return op_VLREP, 1, 0 + case AVLREPF: + return op_VLREP, 2, 0 + case AVLREPG: + return op_VLREP, 3, 0 + case AVLC: + return op_VLC, 0, 0 + case AVLCB: + return op_VLC, 0, 0 + case AVLCH: + return op_VLC, 1, 0 + case AVLCF: + return op_VLC, 2, 0 + case AVLCG: + return op_VLC, 3, 0 + case AVLEH: + return op_VLEH, 0, 0 + case AVLEF: + return op_VLEF, 0, 0 + case AVLEG: + return op_VLEG, 0, 0 + case AVLEB: + return op_VLEB, 0, 0 + case AVLEIH: + return op_VLEIH, 0, 0 + case AVLEIF: + return op_VLEIF, 0, 0 + case AVLEIG: + return op_VLEIG, 0, 0 + case AVLEIB: + return op_VLEIB, 0, 0 + case AVFI: + return op_VFI, 0, 0 + case AVFIDB: + return op_VFI, 3, 0 + case AWFIDB: + return op_VFI, 3, 0 + case AVLGV: + return op_VLGV, 0, 0 + case AVLGVB: + return op_VLGV, 0, 0 + case AVLGVH: + return op_VLGV, 1, 0 + case AVLGVF: + return op_VLGV, 2, 0 + case AVLGVG: + return op_VLGV, 3, 0 + case AVLLEZ: + return op_VLLEZ, 0, 0 + case AVLLEZB: + return op_VLLEZ, 0, 0 + case AVLLEZH: + return op_VLLEZ, 1, 0 + case AVLLEZF: + return op_VLLEZ, 2, 0 + case AVLLEZG: + return op_VLLEZ, 3, 0 + case AVLM: + return op_VLM, 0, 0 + case AVLP: + return op_VLP, 0, 0 + case AVLPB: + return op_VLP, 0, 0 + case AVLPH: + return op_VLP, 1, 0 + case AVLPF: + return op_VLP, 2, 0 + case AVLPG: + return op_VLP, 3, 0 + case AVLBB: + return op_VLBB, 0, 0 + case AVLVG: + return op_VLVG, 0, 0 + case AVLVGB: + return op_VLVG, 0, 0 + case AVLVGH: + return op_VLVG, 1, 0 + case AVLVGF: + return op_VLVG, 2, 0 + case AVLVGG: + return op_VLVG, 3, 0 + case AVLVGP: + return op_VLVGP, 0, 0 + case AVLL: + return op_VLL, 0, 0 + case AVMX: + return op_VMX, 0, 0 + case AVMXB: + return op_VMX, 0, 0 + case AVMXH: + return op_VMX, 1, 0 + case AVMXF: + return op_VMX, 2, 0 + case AVMXG: + return op_VMX, 3, 0 + case AVMXL: + return op_VMXL, 0, 0 + case AVMXLB: + return op_VMXL, 0, 0 + case AVMXLH: + return op_VMXL, 1, 0 + case AVMXLF: + return op_VMXL, 2, 0 + case AVMXLG: + return op_VMXL, 3, 0 + case AVMRH: + return op_VMRH, 0, 0 + case AVMRHB: + return op_VMRH, 0, 0 + case AVMRHH: + return op_VMRH, 1, 0 + case AVMRHF: + return op_VMRH, 2, 0 + case AVMRHG: + return op_VMRH, 3, 0 + case AVMRL: + return op_VMRL, 0, 0 + case AVMRLB: + return op_VMRL, 0, 0 + case AVMRLH: + return op_VMRL, 1, 0 + case AVMRLF: + return op_VMRL, 2, 0 + case AVMRLG: + return op_VMRL, 3, 0 + case AVMN: + return op_VMN, 0, 0 + case AVMNB: + return op_VMN, 0, 0 + case AVMNH: + return op_VMN, 1, 0 + case AVMNF: + return op_VMN, 2, 0 + case AVMNG: + return op_VMN, 3, 0 + case AVMNL: + return op_VMNL, 0, 0 + case AVMNLB: + return op_VMNL, 0, 0 + case AVMNLH: + return op_VMNL, 1, 0 + case AVMNLF: + return op_VMNL, 2, 0 + case AVMNLG: + return op_VMNL, 3, 0 + case AVMAE: + return op_VMAE, 0, 0 + case AVMAEB: + return op_VMAE, 0, 0 + case AVMAEH: + return op_VMAE, 1, 0 + case AVMAEF: + return op_VMAE, 2, 0 + case AVMAH: + return op_VMAH, 0, 0 + case AVMAHB: + return op_VMAH, 0, 0 + case AVMAHH: + return op_VMAH, 1, 0 + case AVMAHF: + return op_VMAH, 2, 0 + case AVMALE: + return op_VMALE, 0, 0 + case AVMALEB: + return op_VMALE, 0, 0 + case AVMALEH: + return op_VMALE, 1, 0 + case AVMALEF: + return op_VMALE, 2, 0 + case AVMALH: + return op_VMALH, 0, 0 + case AVMALHB: + return op_VMALH, 0, 0 + case AVMALHH: + return op_VMALH, 1, 0 + case AVMALHF: + return op_VMALH, 2, 0 + case AVMALO: + return op_VMALO, 0, 0 + case AVMALOB: + return op_VMALO, 0, 0 + case AVMALOH: + return op_VMALO, 1, 0 + case AVMALOF: + return op_VMALO, 2, 0 + case AVMAL: + return op_VMAL, 0, 0 + case AVMALB: + return op_VMAL, 0, 0 + case AVMALHW: + return op_VMAL, 1, 0 + case AVMALF: + return op_VMAL, 2, 0 + case AVMAO: + return op_VMAO, 0, 0 + case AVMAOB: + return op_VMAO, 0, 0 + case AVMAOH: + return op_VMAO, 1, 0 + case AVMAOF: + return op_VMAO, 2, 0 + case AVME: + return op_VME, 0, 0 + case AVMEB: + return op_VME, 0, 0 + case AVMEH: + return op_VME, 1, 0 + case AVMEF: + return op_VME, 2, 0 + case AVMH: + return op_VMH, 0, 0 + case AVMHB: + return op_VMH, 0, 0 + case AVMHH: + return op_VMH, 1, 0 + case AVMHF: + return op_VMH, 2, 0 + case AVMLE: + return op_VMLE, 0, 0 + case AVMLEB: + return op_VMLE, 0, 0 + case AVMLEH: + return op_VMLE, 1, 0 + case AVMLEF: + return op_VMLE, 2, 0 + case AVMLH: + return op_VMLH, 0, 0 + case AVMLHB: + return op_VMLH, 0, 0 + case AVMLHH: + return op_VMLH, 1, 0 + case AVMLHF: + return op_VMLH, 2, 0 + case AVMLO: + return op_VMLO, 0, 0 + case AVMLOB: + return op_VMLO, 0, 0 + case AVMLOH: + return op_VMLO, 1, 0 + case AVMLOF: + return op_VMLO, 2, 0 + case AVML: + return op_VML, 0, 0 + case AVMLB: + return op_VML, 0, 0 + case AVMLHW: + return op_VML, 1, 0 + case AVMLF: + return op_VML, 2, 0 + case AVMO: + return op_VMO, 0, 0 + case AVMOB: + return op_VMO, 0, 0 + case AVMOH: + return op_VMO, 1, 0 + case AVMOF: + return op_VMO, 2, 0 + case AVNO: + return op_VNO, 0, 0 + case AVNOT: + return op_VNO, 0, 0 + case AVO: + return op_VO, 0, 0 + case AVPK: + return op_VPK, 0, 0 + case AVPKH: + return op_VPK, 1, 0 + case AVPKF: + return op_VPK, 2, 0 + case AVPKG: + return op_VPK, 3, 0 + case AVPKLS: + return op_VPKLS, 0, 0 + case AVPKLSH: + return op_VPKLS, 1, 0 + case AVPKLSF: + return op_VPKLS, 2, 0 + case AVPKLSG: + return op_VPKLS, 3, 0 + case AVPKLSHS: + return op_VPKLS, 1, 1 + case AVPKLSFS: + return op_VPKLS, 2, 1 + case AVPKLSGS: + return op_VPKLS, 3, 1 + case AVPKS: + return op_VPKS, 0, 0 + case AVPKSH: + return op_VPKS, 1, 0 + case AVPKSF: + return op_VPKS, 2, 0 + case AVPKSG: + return op_VPKS, 3, 0 + case AVPKSHS: + return op_VPKS, 1, 1 + case AVPKSFS: + return op_VPKS, 2, 1 + case AVPKSGS: + return op_VPKS, 3, 1 + case AVPERM: + return op_VPERM, 0, 0 + case AVPDI: + return op_VPDI, 0, 0 + case AVPOPCT: + return op_VPOPCT, 0, 0 + case AVREP: + return op_VREP, 0, 0 + case AVREPB: + return op_VREP, 0, 0 + case AVREPH: + return op_VREP, 1, 0 + case AVREPF: + return op_VREP, 2, 0 + case AVREPG: + return op_VREP, 3, 0 + case AVREPI: + return op_VREPI, 0, 0 + case AVREPIB: + return op_VREPI, 0, 0 + case AVREPIH: + return op_VREPI, 1, 0 + case AVREPIF: + return op_VREPI, 2, 0 + case AVREPIG: + return op_VREPI, 3, 0 + case AVSCEF: + return op_VSCEF, 0, 0 + case AVSCEG: + return op_VSCEG, 0, 0 + case AVSEL: + return op_VSEL, 0, 0 + case AVSL: + return op_VSL, 0, 0 + case AVSLB: + return op_VSLB, 0, 0 + case AVSLDB: + return op_VSLDB, 0, 0 + case AVSRA: + return op_VSRA, 0, 0 + case AVSRAB: + return op_VSRAB, 0, 0 + case AVSRL: + return op_VSRL, 0, 0 + case AVSRLB: + return op_VSRLB, 0, 0 + case AVSEG: + return op_VSEG, 0, 0 + case AVSEGB: + return op_VSEG, 0, 0 + case AVSEGH: + return op_VSEG, 1, 0 + case AVSEGF: + return op_VSEG, 2, 0 + case AVST: + return op_VST, 0, 0 + case AVSTEH: + return op_VSTEH, 0, 0 + case AVSTEF: + return op_VSTEF, 0, 0 + case AVSTEG: + return op_VSTEG, 0, 0 + case AVSTEB: + return op_VSTEB, 0, 0 + case AVSTM: + return op_VSTM, 0, 0 + case AVSTL: + return op_VSTL, 0, 0 + case AVSTRC: + return op_VSTRC, 0, 0 + case AVSTRCB: + return op_VSTRC, 0, 0 + case AVSTRCH: + return op_VSTRC, 1, 0 + case AVSTRCF: + return op_VSTRC, 2, 0 + case AVSTRCBS: + return op_VSTRC, 0, 1 + case AVSTRCHS: + return op_VSTRC, 1, 1 + case AVSTRCFS: + return op_VSTRC, 2, 1 + case AVSTRCZB: + return op_VSTRC, 0, 2 + case AVSTRCZH: + return op_VSTRC, 1, 2 + case AVSTRCZF: + return op_VSTRC, 2, 2 + case AVSTRCZBS: + return op_VSTRC, 0, 3 + case AVSTRCZHS: + return op_VSTRC, 1, 3 + case AVSTRCZFS: + return op_VSTRC, 2, 3 + case AVS: + return op_VS, 0, 0 + case AVSB: + return op_VS, 0, 0 + case AVSH: + return op_VS, 1, 0 + case AVSF: + return op_VS, 2, 0 + case AVSG: + return op_VS, 3, 0 + case AVSQ: + return op_VS, 4, 0 + case AVSCBI: + return op_VSCBI, 0, 0 + case AVSCBIB: + return op_VSCBI, 0, 0 + case AVSCBIH: + return op_VSCBI, 1, 0 + case AVSCBIF: + return op_VSCBI, 2, 0 + case AVSCBIG: + return op_VSCBI, 3, 0 + case AVSCBIQ: + return op_VSCBI, 4, 0 + case AVSBCBI: + return op_VSBCBI, 0, 0 + case AVSBCBIQ: + return op_VSBCBI, 4, 0 + case AVSBI: + return op_VSBI, 0, 0 + case AVSBIQ: + return op_VSBI, 4, 0 + case AVSUMG: + return op_VSUMG, 0, 0 + case AVSUMGH: + return op_VSUMG, 1, 0 + case AVSUMGF: + return op_VSUMG, 2, 0 + case AVSUMQ: + return op_VSUMQ, 0, 0 + case AVSUMQF: + return op_VSUMQ, 2, 0 + case AVSUMQG: + return op_VSUMQ, 3, 0 + case AVSUM: + return op_VSUM, 0, 0 + case AVSUMB: + return op_VSUM, 0, 0 + case AVSUMH: + return op_VSUM, 1, 0 + case AVTM: + return op_VTM, 0, 0 + case AVUPH: + return op_VUPH, 0, 0 + case AVUPHB: + return op_VUPH, 0, 0 + case AVUPHH: + return op_VUPH, 1, 0 + case AVUPHF: + return op_VUPH, 2, 0 + case AVUPLH: + return op_VUPLH, 0, 0 + case AVUPLHB: + return op_VUPLH, 0, 0 + case AVUPLHH: + return op_VUPLH, 1, 0 + case AVUPLHF: + return op_VUPLH, 2, 0 + case AVUPLL: + return op_VUPLL, 0, 0 + case AVUPLLB: + return op_VUPLL, 0, 0 + case AVUPLLH: + return op_VUPLL, 1, 0 + case AVUPLLF: + return op_VUPLL, 2, 0 + case AVUPL: + return op_VUPL, 0, 0 + case AVUPLB: + return op_VUPL, 0, 0 + case AVUPLHW: + return op_VUPL, 1, 0 + case AVUPLF: + return op_VUPL, 2, 0 + } +} + +// singleElementMask returns the single element mask bits required for the +// given instruction. +func singleElementMask(as obj.As) uint32 { + switch as { + case AWFADB, + AWFK, + AWFKDB, + AWFCEDB, + AWFCEDBS, + AWFCHDB, + AWFCHDBS, + AWFCHEDB, + AWFCHEDBS, + AWFC, + AWFCDB, + AWCDGB, + AWCDLGB, + AWCGDB, + AWCLGDB, + AWFDDB, + AWLDEB, + AWLEDB, + AWFMDB, + AWFMADB, + AWFMSDB, + AWFPSODB, + AWFLCDB, + AWFLNDB, + AWFLPDB, + AWFSQDB, + AWFSDB, + AWFTCIDB, + AWFIDB: + return 8 + case AVMSLEG: + return 8 + case AVMSLOG: + return 4 + case AVMSLEOG: + return 12 + } + return 0 +} diff --git a/src/cmd/internal/obj/sizeof_test.go b/src/cmd/internal/obj/sizeof_test.go new file mode 100644 index 0000000..69e6047 --- /dev/null +++ b/src/cmd/internal/obj/sizeof_test.go @@ -0,0 +1,38 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "reflect" + "testing" + "unsafe" +) + +// Assert that the size of important structures do not change unexpectedly. + +func TestSizeof(t *testing.T) { + const _64bit = unsafe.Sizeof(uintptr(0)) == 8 + + var tests = []struct { + val interface{} // type as a value + _32bit uintptr // size on 32bit platforms + _64bit uintptr // size on 64bit platforms + }{ + {Addr{}, 32, 48}, + {LSym{}, 72, 120}, + {Prog{}, 132, 200}, + } + + for _, tt := range tests { + want := tt._32bit + if _64bit { + want = tt._64bit + } + got := reflect.TypeOf(tt.val).Size() + if want != got { + t.Errorf("unsafe.Sizeof(%T) = %d, want %d", tt.val, got, want) + } + } +} diff --git a/src/cmd/internal/obj/stringer.go b/src/cmd/internal/obj/stringer.go new file mode 100644 index 0000000..a4d507d --- /dev/null +++ b/src/cmd/internal/obj/stringer.go @@ -0,0 +1,104 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore +// +build ignore + +// This is a mini version of the stringer tool customized for the Anames table +// in the architecture support for obj. +// This version just generates the slice of strings, not the String method. + +package main + +import ( + "bufio" + "flag" + "fmt" + "log" + "os" + "regexp" + "strings" +) + +var ( + input = flag.String("i", "", "input file name") + output = flag.String("o", "", "output file name") + pkg = flag.String("p", "", "package name") +) + +var Are = regexp.MustCompile(`^\tA([A-Za-z0-9]+)`) + +func main() { + flag.Parse() + if *input == "" || *output == "" || *pkg == "" { + flag.Usage() + os.Exit(2) + } + in, err := os.Open(*input) + if err != nil { + log.Fatal(err) + } + fd, err := os.Create(*output) + if err != nil { + log.Fatal(err) + } + out := bufio.NewWriter(fd) + defer out.Flush() + var on = false + s := bufio.NewScanner(in) + first := true + for s.Scan() { + line := s.Text() + if !on { + // First relevant line contains "= obj.ABase". + // If we find it, delete the = so we don't stop immediately. + const prefix = "= obj.ABase" + index := strings.Index(line, prefix) + if index < 0 { + continue + } + // It's on. Start with the header. + fmt.Fprintf(out, header, *input, *output, *pkg, *pkg) + on = true + line = line[:index] + } + // Strip comments so their text won't defeat our heuristic. + index := strings.Index(line, "//") + if index > 0 { + line = line[:index] + } + index = strings.Index(line, "/*") + if index > 0 { + line = line[:index] + } + // Termination condition: Any line with an = changes the sequence, + // so stop there, and stop at a closing brace. + if strings.HasPrefix(line, "}") || strings.ContainsRune(line, '=') { + break + } + sub := Are.FindStringSubmatch(line) + if len(sub) < 2 { + continue + } + if first { + fmt.Fprintf(out, "\tobj.A_ARCHSPECIFIC: %q,\n", sub[1]) + first = false + } else { + fmt.Fprintf(out, "\t%q,\n", sub[1]) + } + } + fmt.Fprintln(out, "}") + if s.Err() != nil { + log.Fatal(err) + } +} + +const header = `// Code generated by stringer -i %s -o %s -p %s; DO NOT EDIT. + +package %s + +import "cmd/internal/obj" + +var Anames = []string{ +` diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go new file mode 100644 index 0000000..e5b052c --- /dev/null +++ b/src/cmd/internal/obj/sym.go @@ -0,0 +1,452 @@ +// Derived from Inferno utils/6l/obj.c and utils/6l/span.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/obj.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package obj + +import ( + "cmd/internal/goobj" + "cmd/internal/notsha256" + "cmd/internal/objabi" + "encoding/base64" + "fmt" + "internal/buildcfg" + "log" + "math" + "sort" +) + +func Linknew(arch *LinkArch) *Link { + ctxt := new(Link) + ctxt.hash = make(map[string]*LSym) + ctxt.funchash = make(map[string]*LSym) + ctxt.statichash = make(map[string]*LSym) + ctxt.Arch = arch + ctxt.Pathname = objabi.WorkingDir() + + if err := ctxt.Headtype.Set(buildcfg.GOOS); err != nil { + log.Fatalf("unknown goos %s", buildcfg.GOOS) + } + + ctxt.Flag_optimize = true + return ctxt +} + +// LookupDerived looks up or creates the symbol with name derived from symbol s. +// The resulting symbol will be static iff s is. +func (ctxt *Link) LookupDerived(s *LSym, name string) *LSym { + if s.Static() { + return ctxt.LookupStatic(name) + } + return ctxt.Lookup(name) +} + +// LookupStatic looks up the static symbol with name name. +// If it does not exist, it creates it. +func (ctxt *Link) LookupStatic(name string) *LSym { + s := ctxt.statichash[name] + if s == nil { + s = &LSym{Name: name, Attribute: AttrStatic} + ctxt.statichash[name] = s + } + return s +} + +// LookupABI looks up a symbol with the given ABI. +// If it does not exist, it creates it. +func (ctxt *Link) LookupABI(name string, abi ABI) *LSym { + return ctxt.LookupABIInit(name, abi, nil) +} + +// LookupABI looks up a symbol with the given ABI. +// If it does not exist, it creates it and +// passes it to init for one-time initialization. +func (ctxt *Link) LookupABIInit(name string, abi ABI, init func(s *LSym)) *LSym { + var hash map[string]*LSym + switch abi { + case ABI0: + hash = ctxt.hash + case ABIInternal: + hash = ctxt.funchash + default: + panic("unknown ABI") + } + + ctxt.hashmu.Lock() + s := hash[name] + if s == nil { + s = &LSym{Name: name} + s.SetABI(abi) + hash[name] = s + if init != nil { + init(s) + } + } + ctxt.hashmu.Unlock() + return s +} + +// Lookup looks up the symbol with name name. +// If it does not exist, it creates it. +func (ctxt *Link) Lookup(name string) *LSym { + return ctxt.LookupInit(name, nil) +} + +// LookupInit looks up the symbol with name name. +// If it does not exist, it creates it and +// passes it to init for one-time initialization. +func (ctxt *Link) LookupInit(name string, init func(s *LSym)) *LSym { + ctxt.hashmu.Lock() + s := ctxt.hash[name] + if s == nil { + s = &LSym{Name: name} + ctxt.hash[name] = s + if init != nil { + init(s) + } + } + ctxt.hashmu.Unlock() + return s +} + +func (ctxt *Link) Float32Sym(f float32) *LSym { + i := math.Float32bits(f) + name := fmt.Sprintf("$f32.%08x", i) + return ctxt.LookupInit(name, func(s *LSym) { + s.Size = 4 + s.WriteFloat32(ctxt, 0, f) + s.Type = objabi.SRODATA + s.Set(AttrLocal, true) + s.Set(AttrContentAddressable, true) + ctxt.constSyms = append(ctxt.constSyms, s) + }) +} + +func (ctxt *Link) Float64Sym(f float64) *LSym { + i := math.Float64bits(f) + name := fmt.Sprintf("$f64.%016x", i) + return ctxt.LookupInit(name, func(s *LSym) { + s.Size = 8 + s.WriteFloat64(ctxt, 0, f) + s.Type = objabi.SRODATA + s.Set(AttrLocal, true) + s.Set(AttrContentAddressable, true) + ctxt.constSyms = append(ctxt.constSyms, s) + }) +} + +func (ctxt *Link) Int64Sym(i int64) *LSym { + name := fmt.Sprintf("$i64.%016x", uint64(i)) + return ctxt.LookupInit(name, func(s *LSym) { + s.Size = 8 + s.WriteInt(ctxt, 0, 8, i) + s.Type = objabi.SRODATA + s.Set(AttrLocal, true) + s.Set(AttrContentAddressable, true) + ctxt.constSyms = append(ctxt.constSyms, s) + }) +} + +// GCLocalsSym generates a content-addressable sym containing data. +func (ctxt *Link) GCLocalsSym(data []byte) *LSym { + sum := notsha256.Sum256(data) + str := base64.StdEncoding.EncodeToString(sum[:16]) + return ctxt.LookupInit(fmt.Sprintf("gclocals·%s", str), func(lsym *LSym) { + lsym.P = data + lsym.Set(AttrContentAddressable, true) + }) +} + +// Assign index to symbols. +// asm is set to true if this is called by the assembler (i.e. not the compiler), +// in which case all the symbols are non-package (for now). +func (ctxt *Link) NumberSyms() { + if ctxt.Headtype == objabi.Haix { + // Data must be sorted to keep a constant order in TOC symbols. + // As they are created during Progedit, two symbols can be switched between + // two different compilations. Therefore, BuildID will be different. + // TODO: find a better place and optimize to only sort TOC symbols + sort.Slice(ctxt.Data, func(i, j int) bool { + return ctxt.Data[i].Name < ctxt.Data[j].Name + }) + } + + // Constant symbols are created late in the concurrent phase. Sort them + // to ensure a deterministic order. + sort.Slice(ctxt.constSyms, func(i, j int) bool { + return ctxt.constSyms[i].Name < ctxt.constSyms[j].Name + }) + ctxt.Data = append(ctxt.Data, ctxt.constSyms...) + ctxt.constSyms = nil + + ctxt.pkgIdx = make(map[string]int32) + ctxt.defs = []*LSym{} + ctxt.hashed64defs = []*LSym{} + ctxt.hasheddefs = []*LSym{} + ctxt.nonpkgdefs = []*LSym{} + + var idx, hashedidx, hashed64idx, nonpkgidx int32 + ctxt.traverseSyms(traverseDefs|traversePcdata, func(s *LSym) { + // if Pkgpath is unknown, cannot hash symbols with relocations, as it + // may reference named symbols whose names are not fully expanded. + if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) { + if s.Size <= 8 && len(s.R) == 0 && contentHashSection(s) == 0 { + // We can use short hash only for symbols without relocations. + // Don't use short hash for symbols that belong in a particular section + // or require special handling (such as type symbols). + s.PkgIdx = goobj.PkgIdxHashed64 + s.SymIdx = hashed64idx + if hashed64idx != int32(len(ctxt.hashed64defs)) { + panic("bad index") + } + ctxt.hashed64defs = append(ctxt.hashed64defs, s) + hashed64idx++ + } else { + s.PkgIdx = goobj.PkgIdxHashed + s.SymIdx = hashedidx + if hashedidx != int32(len(ctxt.hasheddefs)) { + panic("bad index") + } + ctxt.hasheddefs = append(ctxt.hasheddefs, s) + hashedidx++ + } + } else if isNonPkgSym(ctxt, s) { + s.PkgIdx = goobj.PkgIdxNone + s.SymIdx = nonpkgidx + if nonpkgidx != int32(len(ctxt.nonpkgdefs)) { + panic("bad index") + } + ctxt.nonpkgdefs = append(ctxt.nonpkgdefs, s) + nonpkgidx++ + } else { + s.PkgIdx = goobj.PkgIdxSelf + s.SymIdx = idx + if idx != int32(len(ctxt.defs)) { + panic("bad index") + } + ctxt.defs = append(ctxt.defs, s) + idx++ + } + s.Set(AttrIndexed, true) + }) + + ipkg := int32(1) // 0 is invalid index + nonpkgdef := nonpkgidx + ctxt.traverseSyms(traverseRefs|traverseAux, func(rs *LSym) { + if rs.PkgIdx != goobj.PkgIdxInvalid { + return + } + if !ctxt.Flag_linkshared { + // Assign special index for builtin symbols. + // Don't do it when linking against shared libraries, as the runtime + // may be in a different library. + if i := goobj.BuiltinIdx(rs.Name, int(rs.ABI())); i != -1 { + rs.PkgIdx = goobj.PkgIdxBuiltin + rs.SymIdx = int32(i) + rs.Set(AttrIndexed, true) + return + } + } + pkg := rs.Pkg + if rs.ContentAddressable() { + // for now, only support content-addressable symbols that are always locally defined. + panic("hashed refs unsupported for now") + } + if pkg == "" || pkg == "\"\"" || pkg == "_" || !rs.Indexed() { + rs.PkgIdx = goobj.PkgIdxNone + rs.SymIdx = nonpkgidx + rs.Set(AttrIndexed, true) + if nonpkgidx != nonpkgdef+int32(len(ctxt.nonpkgrefs)) { + panic("bad index") + } + ctxt.nonpkgrefs = append(ctxt.nonpkgrefs, rs) + nonpkgidx++ + return + } + if k, ok := ctxt.pkgIdx[pkg]; ok { + rs.PkgIdx = k + return + } + rs.PkgIdx = ipkg + ctxt.pkgIdx[pkg] = ipkg + ipkg++ + }) +} + +// Returns whether s is a non-package symbol, which needs to be referenced +// by name instead of by index. +func isNonPkgSym(ctxt *Link, s *LSym) bool { + if ctxt.IsAsm && !s.Static() { + // asm symbols are referenced by name only, except static symbols + // which are file-local and can be referenced by index. + return true + } + if ctxt.Flag_linkshared { + // The referenced symbol may be in a different shared library so + // the linker cannot see its index. + return true + } + if s.Pkg == "_" { + // The frontend uses package "_" to mark symbols that should not + // be referenced by index, e.g. linkname'd symbols. + return true + } + if s.DuplicateOK() { + // Dupok symbol needs to be dedup'd by name. + return true + } + return false +} + +// StaticNamePref is the prefix the front end applies to static temporary +// variables. When turned into LSyms, these can be tagged as static so +// as to avoid inserting them into the linker's name lookup tables. +const StaticNamePref = ".stmp_" + +type traverseFlag uint32 + +const ( + traverseDefs traverseFlag = 1 << iota + traverseRefs + traverseAux + traversePcdata + + traverseAll = traverseDefs | traverseRefs | traverseAux | traversePcdata +) + +// Traverse symbols based on flag, call fn for each symbol. +func (ctxt *Link) traverseSyms(flag traverseFlag, fn func(*LSym)) { + fnNoNil := func(s *LSym) { + if s != nil { + fn(s) + } + } + lists := [][]*LSym{ctxt.Text, ctxt.Data} + files := ctxt.PosTable.FileTable() + for _, list := range lists { + for _, s := range list { + if flag&traverseDefs != 0 { + fn(s) + } + if flag&traverseRefs != 0 { + for _, r := range s.R { + fnNoNil(r.Sym) + } + } + if flag&traverseAux != 0 { + fnNoNil(s.Gotype) + if s.Type == objabi.STEXT { + f := func(parent *LSym, aux *LSym) { + fn(aux) + } + ctxt.traverseFuncAux(flag, s, f, files) + } + } + if flag&traversePcdata != 0 && s.Type == objabi.STEXT { + fi := s.Func().Pcln + fnNoNil(fi.Pcsp) + fnNoNil(fi.Pcfile) + fnNoNil(fi.Pcline) + fnNoNil(fi.Pcinline) + for _, d := range fi.Pcdata { + fnNoNil(d) + } + } + } + } +} + +func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent *LSym, aux *LSym), files []string) { + fninfo := fsym.Func() + pc := &fninfo.Pcln + if flag&traverseAux == 0 { + // NB: should it become necessary to walk aux sym reloc references + // without walking the aux syms themselves, this can be changed. + panic("should not be here") + } + for _, d := range pc.Funcdata { + if d != nil { + fn(fsym, d) + } + } + usedFiles := make([]goobj.CUFileIndex, 0, len(pc.UsedFiles)) + for f := range pc.UsedFiles { + usedFiles = append(usedFiles, f) + } + sort.Slice(usedFiles, func(i, j int) bool { return usedFiles[i] < usedFiles[j] }) + for _, f := range usedFiles { + if filesym := ctxt.Lookup(files[f]); filesym != nil { + fn(fsym, filesym) + } + } + for _, call := range pc.InlTree.nodes { + if call.Func != nil { + fn(fsym, call.Func) + } + f, _ := ctxt.getFileSymbolAndLine(call.Pos) + if filesym := ctxt.Lookup(f); filesym != nil { + fn(fsym, filesym) + } + } + + dwsyms := []*LSym{fninfo.dwarfRangesSym, fninfo.dwarfLocSym, fninfo.dwarfDebugLinesSym, fninfo.dwarfInfoSym} + for _, dws := range dwsyms { + if dws == nil || dws.Size == 0 { + continue + } + fn(fsym, dws) + if flag&traverseRefs != 0 { + for _, r := range dws.R { + if r.Sym != nil { + fn(dws, r.Sym) + } + } + } + } +} + +// Traverse aux symbols, calling fn for each sym/aux pair. +func (ctxt *Link) traverseAuxSyms(flag traverseFlag, fn func(parent *LSym, aux *LSym)) { + lists := [][]*LSym{ctxt.Text, ctxt.Data} + files := ctxt.PosTable.FileTable() + for _, list := range lists { + for _, s := range list { + if s.Gotype != nil { + if flag&traverseDefs != 0 { + fn(s, s.Gotype) + } + } + if s.Type != objabi.STEXT { + continue + } + ctxt.traverseFuncAux(flag, s, fn, files) + } + } +} diff --git a/src/cmd/internal/obj/textflag.go b/src/cmd/internal/obj/textflag.go new file mode 100644 index 0000000..5ae7502 --- /dev/null +++ b/src/cmd/internal/obj/textflag.go @@ -0,0 +1,58 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file defines flags attached to various functions +// and data objects. The compilers, assemblers, and linker must +// all agree on these values. + +package obj + +const ( + // Don't profile the marked routine. + // + // Deprecated: Not implemented, do not use. + NOPROF = 1 + + // It is ok for the linker to get multiple of these symbols. It will + // pick one of the duplicates to use. + DUPOK = 2 + + // Don't insert stack check preamble. + NOSPLIT = 4 + + // Put this data in a read-only section. + RODATA = 8 + + // This data contains no pointers. + NOPTR = 16 + + // This is a wrapper function and should not count as + // disabling 'recover' or appear in tracebacks by default. + WRAPPER = 32 + + // This function uses its incoming context register. + NEEDCTXT = 64 + + // When passed to objw.Global, causes Local to be set to true on the LSym it creates. + LOCAL = 128 + + // Allocate a word of thread local storage and store the offset from the + // thread local base to the thread local storage in this variable. + TLSBSS = 256 + + // Do not insert instructions to allocate a stack frame for this function. + // Only valid on functions that declare a frame size of 0. + // TODO(mwhudson): only implemented for ppc64x at present. + NOFRAME = 512 + + // Function can call reflect.Type.Method or reflect.Type.MethodByName. + REFLECTMETHOD = 1024 + + // Function is the outermost frame of the call stack. Call stack unwinders + // should stop at this function. + TOPFRAME = 2048 + + // Function is an ABI wrapper. + ABIWRAPPER = 4096 +) diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go new file mode 100644 index 0000000..b219a07 --- /dev/null +++ b/src/cmd/internal/obj/util.go @@ -0,0 +1,673 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "bytes" + "cmd/internal/objabi" + "cmd/internal/src" + "fmt" + "internal/buildcfg" + "io" + "strings" +) + +const REG_NONE = 0 + +// Line returns a string containing the filename and line number for p +func (p *Prog) Line() string { + return p.Ctxt.OutermostPos(p.Pos).Format(false, true) +} +func (p *Prog) InnermostLine(w io.Writer) { + p.Ctxt.InnermostPos(p.Pos).WriteTo(w, false, true) +} + +// InnermostLineNumber returns a string containing the line number for the +// innermost inlined function (if any inlining) at p's position +func (p *Prog) InnermostLineNumber() string { + return p.Ctxt.InnermostPos(p.Pos).LineNumber() +} + +// InnermostLineNumberHTML returns a string containing the line number for the +// innermost inlined function (if any inlining) at p's position +func (p *Prog) InnermostLineNumberHTML() string { + return p.Ctxt.InnermostPos(p.Pos).LineNumberHTML() +} + +// InnermostFilename returns a string containing the innermost +// (in inlining) filename at p's position +func (p *Prog) InnermostFilename() string { + // TODO For now, this is only used for debugging output, and if we need more/better information, it might change. + // An example of what we might want to see is the full stack of positions for inlined code, so we get some visibility into what is recorded there. + pos := p.Ctxt.InnermostPos(p.Pos) + if !pos.IsKnown() { + return "<unknown file name>" + } + return pos.Filename() +} + +func (p *Prog) AllPos(result []src.Pos) []src.Pos { + return p.Ctxt.AllPos(p.Pos, result) +} + +var armCondCode = []string{ + ".EQ", + ".NE", + ".CS", + ".CC", + ".MI", + ".PL", + ".VS", + ".VC", + ".HI", + ".LS", + ".GE", + ".LT", + ".GT", + ".LE", + "", + ".NV", +} + +/* ARM scond byte */ +const ( + C_SCOND = (1 << 4) - 1 + C_SBIT = 1 << 4 + C_PBIT = 1 << 5 + C_WBIT = 1 << 6 + C_FBIT = 1 << 7 + C_UBIT = 1 << 7 + C_SCOND_XOR = 14 +) + +// CConv formats opcode suffix bits (Prog.Scond). +func CConv(s uint8) string { + if s == 0 { + return "" + } + for i := range opSuffixSpace { + sset := &opSuffixSpace[i] + if sset.arch == buildcfg.GOARCH { + return sset.cconv(s) + } + } + return fmt.Sprintf("SC???%d", s) +} + +// CConvARM formats ARM opcode suffix bits (mostly condition codes). +func CConvARM(s uint8) string { + // TODO: could be great to move suffix-related things into + // ARM asm backends some day. + // obj/x86 can be used as an example. + + sc := armCondCode[(s&C_SCOND)^C_SCOND_XOR] + if s&C_SBIT != 0 { + sc += ".S" + } + if s&C_PBIT != 0 { + sc += ".P" + } + if s&C_WBIT != 0 { + sc += ".W" + } + if s&C_UBIT != 0 { /* ambiguous with FBIT */ + sc += ".U" + } + return sc +} + +func (p *Prog) String() string { + if p == nil { + return "<nil Prog>" + } + if p.Ctxt == nil { + return "<Prog without ctxt>" + } + return fmt.Sprintf("%.5d (%v)\t%s", p.Pc, p.Line(), p.InstructionString()) +} + +func (p *Prog) InnermostString(w io.Writer) { + if p == nil { + io.WriteString(w, "<nil Prog>") + return + } + if p.Ctxt == nil { + io.WriteString(w, "<Prog without ctxt>") + return + } + fmt.Fprintf(w, "%.5d (", p.Pc) + p.InnermostLine(w) + io.WriteString(w, ")\t") + p.WriteInstructionString(w) +} + +// InstructionString returns a string representation of the instruction without preceding +// program counter or file and line number. +func (p *Prog) InstructionString() string { + buf := new(bytes.Buffer) + p.WriteInstructionString(buf) + return buf.String() +} + +// WriteInstructionString writes a string representation of the instruction without preceding +// program counter or file and line number. +func (p *Prog) WriteInstructionString(w io.Writer) { + if p == nil { + io.WriteString(w, "<nil Prog>") + return + } + + if p.Ctxt == nil { + io.WriteString(w, "<Prog without ctxt>") + return + } + + sc := CConv(p.Scond) + + io.WriteString(w, p.As.String()) + io.WriteString(w, sc) + sep := "\t" + + if p.From.Type != TYPE_NONE { + io.WriteString(w, sep) + WriteDconv(w, p, &p.From) + sep = ", " + } + if p.Reg != REG_NONE { + // Should not happen but might as well show it if it does. + fmt.Fprintf(w, "%s%v", sep, Rconv(int(p.Reg))) + sep = ", " + } + for i := range p.RestArgs { + if p.RestArgs[i].Pos == Source { + io.WriteString(w, sep) + WriteDconv(w, p, &p.RestArgs[i].Addr) + sep = ", " + } + } + + if p.As == ATEXT { + // If there are attributes, print them. Otherwise, skip the comma. + // In short, print one of these two: + // TEXT foo(SB), DUPOK|NOSPLIT, $0 + // TEXT foo(SB), $0 + s := p.From.Sym.TextAttrString() + if s != "" { + fmt.Fprintf(w, "%s%s", sep, s) + sep = ", " + } + } + if p.To.Type != TYPE_NONE { + io.WriteString(w, sep) + WriteDconv(w, p, &p.To) + } + if p.RegTo2 != REG_NONE { + fmt.Fprintf(w, "%s%v", sep, Rconv(int(p.RegTo2))) + } + for i := range p.RestArgs { + if p.RestArgs[i].Pos == Destination { + io.WriteString(w, sep) + WriteDconv(w, p, &p.RestArgs[i].Addr) + sep = ", " + } + } +} + +func (ctxt *Link) NewProg() *Prog { + p := new(Prog) + p.Ctxt = ctxt + return p +} + +func (ctxt *Link) CanReuseProgs() bool { + return ctxt.Debugasm == 0 +} + +// Dconv accepts an argument 'a' within a prog 'p' and returns a string +// with a formatted version of the argument. +func Dconv(p *Prog, a *Addr) string { + buf := new(bytes.Buffer) + writeDconv(buf, p, a, false) + return buf.String() +} + +// DconvWithABIDetail accepts an argument 'a' within a prog 'p' +// and returns a string with a formatted version of the argument, in +// which text symbols are rendered with explicit ABI selectors. +func DconvWithABIDetail(p *Prog, a *Addr) string { + buf := new(bytes.Buffer) + writeDconv(buf, p, a, true) + return buf.String() +} + +// WriteDconv accepts an argument 'a' within a prog 'p' +// and writes a formatted version of the arg to the writer. +func WriteDconv(w io.Writer, p *Prog, a *Addr) { + writeDconv(w, p, a, false) +} + +func writeDconv(w io.Writer, p *Prog, a *Addr, abiDetail bool) { + switch a.Type { + default: + fmt.Fprintf(w, "type=%d", a.Type) + + case TYPE_NONE: + if a.Name != NAME_NONE || a.Reg != 0 || a.Sym != nil { + a.WriteNameTo(w) + fmt.Fprintf(w, "(%v)(NONE)", Rconv(int(a.Reg))) + } + + case TYPE_REG: + // TODO(rsc): This special case is for x86 instructions like + // PINSRQ CX,$1,X6 + // where the $1 is included in the p->to Addr. + // Move into a new field. + if a.Offset != 0 && (a.Reg < RBaseARM64 || a.Reg >= RBaseMIPS) { + fmt.Fprintf(w, "$%d,%v", a.Offset, Rconv(int(a.Reg))) + return + } + + if a.Name != NAME_NONE || a.Sym != nil { + a.WriteNameTo(w) + fmt.Fprintf(w, "(%v)(REG)", Rconv(int(a.Reg))) + } else { + io.WriteString(w, Rconv(int(a.Reg))) + } + if (RBaseARM64+1<<10+1<<9) /* arm64.REG_ELEM */ <= a.Reg && + a.Reg < (RBaseARM64+1<<11) /* arm64.REG_ELEM_END */ { + fmt.Fprintf(w, "[%d]", a.Index) + } + + case TYPE_BRANCH: + if a.Sym != nil { + fmt.Fprintf(w, "%s%s(SB)", a.Sym.Name, abiDecorate(a, abiDetail)) + } else if a.Target() != nil { + fmt.Fprint(w, a.Target().Pc) + } else { + fmt.Fprintf(w, "%d(PC)", a.Offset) + } + + case TYPE_INDIR: + io.WriteString(w, "*") + a.writeNameTo(w, abiDetail) + + case TYPE_MEM: + a.WriteNameTo(w) + if a.Index != REG_NONE { + if a.Scale == 0 { + // arm64 shifted or extended register offset, scale = 0. + fmt.Fprintf(w, "(%v)", Rconv(int(a.Index))) + } else { + fmt.Fprintf(w, "(%v*%d)", Rconv(int(a.Index)), int(a.Scale)) + } + } + + case TYPE_CONST: + io.WriteString(w, "$") + a.WriteNameTo(w) + if a.Reg != 0 { + fmt.Fprintf(w, "(%v)", Rconv(int(a.Reg))) + } + + case TYPE_TEXTSIZE: + if a.Val.(int32) == objabi.ArgsSizeUnknown { + fmt.Fprintf(w, "$%d", a.Offset) + } else { + fmt.Fprintf(w, "$%d-%d", a.Offset, a.Val.(int32)) + } + + case TYPE_FCONST: + str := fmt.Sprintf("%.17g", a.Val.(float64)) + // Make sure 1 prints as 1.0 + if !strings.ContainsAny(str, ".e") { + str += ".0" + } + fmt.Fprintf(w, "$(%s)", str) + + case TYPE_SCONST: + fmt.Fprintf(w, "$%q", a.Val.(string)) + + case TYPE_ADDR: + io.WriteString(w, "$") + a.writeNameTo(w, abiDetail) + + case TYPE_SHIFT: + v := int(a.Offset) + ops := "<<>>->@>" + switch buildcfg.GOARCH { + case "arm": + op := ops[((v>>5)&3)<<1:] + if v&(1<<4) != 0 { + fmt.Fprintf(w, "R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15) + } else { + fmt.Fprintf(w, "R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31) + } + if a.Reg != 0 { + fmt.Fprintf(w, "(%v)", Rconv(int(a.Reg))) + } + case "arm64": + op := ops[((v>>22)&3)<<1:] + r := (v >> 16) & 31 + fmt.Fprintf(w, "%s%c%c%d", Rconv(r+RBaseARM64), op[0], op[1], (v>>10)&63) + default: + panic("TYPE_SHIFT is not supported on " + buildcfg.GOARCH) + } + + case TYPE_REGREG: + fmt.Fprintf(w, "(%v, %v)", Rconv(int(a.Reg)), Rconv(int(a.Offset))) + + case TYPE_REGREG2: + fmt.Fprintf(w, "%v, %v", Rconv(int(a.Offset)), Rconv(int(a.Reg))) + + case TYPE_REGLIST: + io.WriteString(w, RLconv(a.Offset)) + + case TYPE_SPECIAL: + io.WriteString(w, SPCconv(a.Offset)) + } +} + +func (a *Addr) WriteNameTo(w io.Writer) { + a.writeNameTo(w, false) +} + +func (a *Addr) writeNameTo(w io.Writer, abiDetail bool) { + + switch a.Name { + default: + fmt.Fprintf(w, "name=%d", a.Name) + + case NAME_NONE: + switch { + case a.Reg == REG_NONE: + fmt.Fprint(w, a.Offset) + case a.Offset == 0: + fmt.Fprintf(w, "(%v)", Rconv(int(a.Reg))) + case a.Offset != 0: + fmt.Fprintf(w, "%d(%v)", a.Offset, Rconv(int(a.Reg))) + } + + // Note: a.Reg == REG_NONE encodes the default base register for the NAME_ type. + case NAME_EXTERN: + reg := "SB" + if a.Reg != REG_NONE { + reg = Rconv(int(a.Reg)) + } + if a.Sym != nil { + fmt.Fprintf(w, "%s%s%s(%s)", a.Sym.Name, abiDecorate(a, abiDetail), offConv(a.Offset), reg) + } else { + fmt.Fprintf(w, "%s(%s)", offConv(a.Offset), reg) + } + + case NAME_GOTREF: + reg := "SB" + if a.Reg != REG_NONE { + reg = Rconv(int(a.Reg)) + } + if a.Sym != nil { + fmt.Fprintf(w, "%s%s@GOT(%s)", a.Sym.Name, offConv(a.Offset), reg) + } else { + fmt.Fprintf(w, "%s@GOT(%s)", offConv(a.Offset), reg) + } + + case NAME_STATIC: + reg := "SB" + if a.Reg != REG_NONE { + reg = Rconv(int(a.Reg)) + } + if a.Sym != nil { + fmt.Fprintf(w, "%s<>%s(%s)", a.Sym.Name, offConv(a.Offset), reg) + } else { + fmt.Fprintf(w, "<>%s(%s)", offConv(a.Offset), reg) + } + + case NAME_AUTO: + reg := "SP" + if a.Reg != REG_NONE { + reg = Rconv(int(a.Reg)) + } + if a.Sym != nil { + fmt.Fprintf(w, "%s%s(%s)", a.Sym.Name, offConv(a.Offset), reg) + } else { + fmt.Fprintf(w, "%s(%s)", offConv(a.Offset), reg) + } + + case NAME_PARAM: + reg := "FP" + if a.Reg != REG_NONE { + reg = Rconv(int(a.Reg)) + } + if a.Sym != nil { + fmt.Fprintf(w, "%s%s(%s)", a.Sym.Name, offConv(a.Offset), reg) + } else { + fmt.Fprintf(w, "%s(%s)", offConv(a.Offset), reg) + } + case NAME_TOCREF: + reg := "SB" + if a.Reg != REG_NONE { + reg = Rconv(int(a.Reg)) + } + if a.Sym != nil { + fmt.Fprintf(w, "%s%s(%s)", a.Sym.Name, offConv(a.Offset), reg) + } else { + fmt.Fprintf(w, "%s(%s)", offConv(a.Offset), reg) + } + } +} + +func offConv(off int64) string { + if off == 0 { + return "" + } + return fmt.Sprintf("%+d", off) +} + +// opSuffixSet is like regListSet, but for opcode suffixes. +// +// Unlike some other similar structures, uint8 space is not +// divided by its own values set (because there are only 256 of them). +// Instead, every arch may interpret/format all 8 bits as they like, +// as long as they register proper cconv function for it. +type opSuffixSet struct { + arch string + cconv func(suffix uint8) string +} + +var opSuffixSpace []opSuffixSet + +// RegisterOpSuffix assigns cconv function for formatting opcode suffixes +// when compiling for GOARCH=arch. +// +// cconv is never called with 0 argument. +func RegisterOpSuffix(arch string, cconv func(uint8) string) { + opSuffixSpace = append(opSuffixSpace, opSuffixSet{ + arch: arch, + cconv: cconv, + }) +} + +type regSet struct { + lo int + hi int + Rconv func(int) string +} + +// Few enough architectures that a linear scan is fastest. +// Not even worth sorting. +var regSpace []regSet + +/* + Each architecture defines a register space as a unique + integer range. + Here is the list of architectures and the base of their register spaces. +*/ + +const ( + // Because of masking operations in the encodings, each register + // space should start at 0 modulo some power of 2. + RBase386 = 1 * 1024 + RBaseAMD64 = 2 * 1024 + RBaseARM = 3 * 1024 + RBasePPC64 = 4 * 1024 // range [4k, 8k) + RBaseARM64 = 8 * 1024 // range [8k, 13k) + RBaseMIPS = 13 * 1024 // range [13k, 14k) + RBaseS390X = 14 * 1024 // range [14k, 15k) + RBaseRISCV = 15 * 1024 // range [15k, 16k) + RBaseWasm = 16 * 1024 + RBaseLOONG64 = 17 * 1024 +) + +// RegisterRegister binds a pretty-printer (Rconv) for register +// numbers to a given register number range. Lo is inclusive, +// hi exclusive (valid registers are lo through hi-1). +func RegisterRegister(lo, hi int, Rconv func(int) string) { + regSpace = append(regSpace, regSet{lo, hi, Rconv}) +} + +func Rconv(reg int) string { + if reg == REG_NONE { + return "NONE" + } + for i := range regSpace { + rs := ®Space[i] + if rs.lo <= reg && reg < rs.hi { + return rs.Rconv(reg) + } + } + return fmt.Sprintf("R???%d", reg) +} + +type regListSet struct { + lo int64 + hi int64 + RLconv func(int64) string +} + +var regListSpace []regListSet + +// Each architecture is allotted a distinct subspace: [Lo, Hi) for declaring its +// arch-specific register list numbers. +const ( + RegListARMLo = 0 + RegListARMHi = 1 << 16 + + // arm64 uses the 60th bit to differentiate from other archs + RegListARM64Lo = 1 << 60 + RegListARM64Hi = 1<<61 - 1 + + // x86 uses the 61th bit to differentiate from other archs + RegListX86Lo = 1 << 61 + RegListX86Hi = 1<<62 - 1 +) + +// RegisterRegisterList binds a pretty-printer (RLconv) for register list +// numbers to a given register list number range. Lo is inclusive, +// hi exclusive (valid register list are lo through hi-1). +func RegisterRegisterList(lo, hi int64, rlconv func(int64) string) { + regListSpace = append(regListSpace, regListSet{lo, hi, rlconv}) +} + +func RLconv(list int64) string { + for i := range regListSpace { + rls := ®ListSpace[i] + if rls.lo <= list && list < rls.hi { + return rls.RLconv(list) + } + } + return fmt.Sprintf("RL???%d", list) +} + +// Special operands +type spcSet struct { + lo int64 + hi int64 + SPCconv func(int64) string +} + +var spcSpace []spcSet + +// RegisterSpecialOperands binds a pretty-printer (SPCconv) for special +// operand numbers to a given special operand number range. Lo is inclusive, +// hi is exclusive (valid special operands are lo through hi-1). +func RegisterSpecialOperands(lo, hi int64, rlconv func(int64) string) { + spcSpace = append(spcSpace, spcSet{lo, hi, rlconv}) +} + +// SPCconv returns the string representation of the special operand spc. +func SPCconv(spc int64) string { + for i := range spcSpace { + spcs := &spcSpace[i] + if spcs.lo <= spc && spc < spcs.hi { + return spcs.SPCconv(spc) + } + } + return fmt.Sprintf("SPC???%d", spc) +} + +type opSet struct { + lo As + names []string +} + +// Not even worth sorting +var aSpace []opSet + +// RegisterOpcode binds a list of instruction names +// to a given instruction number range. +func RegisterOpcode(lo As, Anames []string) { + if len(Anames) > AllowedOpCodes { + panic(fmt.Sprintf("too many instructions, have %d max %d", len(Anames), AllowedOpCodes)) + } + aSpace = append(aSpace, opSet{lo, Anames}) +} + +func (a As) String() string { + if 0 <= a && int(a) < len(Anames) { + return Anames[a] + } + for i := range aSpace { + as := &aSpace[i] + if as.lo <= a && int(a-as.lo) < len(as.names) { + return as.names[a-as.lo] + } + } + return fmt.Sprintf("A???%d", a) +} + +var Anames = []string{ + "XXX", + "CALL", + "DUFFCOPY", + "DUFFZERO", + "END", + "FUNCDATA", + "JMP", + "NOP", + "PCALIGN", + "PCDATA", + "RET", + "GETCALLERPC", + "TEXT", + "UNDEF", +} + +func Bool2int(b bool) int { + // The compiler currently only optimizes this form. + // See issue 6011. + var i int + if b { + i = 1 + } else { + i = 0 + } + return i +} + +func abiDecorate(a *Addr, abiDetail bool) string { + if !abiDetail || a.Sym == nil { + return "" + } + return fmt.Sprintf("<%s>", a.Sym.ABI()) +} diff --git a/src/cmd/internal/obj/wasm/a.out.go b/src/cmd/internal/obj/wasm/a.out.go new file mode 100644 index 0000000..83ce0a6 --- /dev/null +++ b/src/cmd/internal/obj/wasm/a.out.go @@ -0,0 +1,342 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package wasm + +import "cmd/internal/obj" + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p wasm + +const ( + /* mark flags */ + DONE = 1 << iota + PRESERVEFLAGS // not allowed to clobber flags +) + +/* + * wasm + */ +const ( + ACallImport = obj.ABaseWasm + obj.A_ARCHSPECIFIC + iota + AGet + ASet + ATee + ANot // alias for I32Eqz + + // The following are low-level WebAssembly instructions. + // Their order matters, since it matches the opcode encoding. + // Gaps in the encoding are indicated by comments. + + AUnreachable // opcode 0x00 + ANop + ABlock + ALoop + AIf + AElse + + AEnd // opcode 0x0B + ABr + ABrIf + ABrTable + // ACall and AReturn are WebAssembly instructions. obj.ACALL and obj.ARET are higher level instructions + // with Go semantics, e.g. they manipulate the Go stack on the linear memory. + AReturn + ACall + ACallIndirect + + ADrop // opcode 0x1A + ASelect + + ALocalGet // opcode 0x20 + ALocalSet + ALocalTee + AGlobalGet + AGlobalSet + + AI32Load // opcode 0x28 + AI64Load + AF32Load + AF64Load + AI32Load8S + AI32Load8U + AI32Load16S + AI32Load16U + AI64Load8S + AI64Load8U + AI64Load16S + AI64Load16U + AI64Load32S + AI64Load32U + AI32Store + AI64Store + AF32Store + AF64Store + AI32Store8 + AI32Store16 + AI64Store8 + AI64Store16 + AI64Store32 + ACurrentMemory + AGrowMemory + + AI32Const + AI64Const + AF32Const + AF64Const + + AI32Eqz + AI32Eq + AI32Ne + AI32LtS + AI32LtU + AI32GtS + AI32GtU + AI32LeS + AI32LeU + AI32GeS + AI32GeU + + AI64Eqz + AI64Eq + AI64Ne + AI64LtS + AI64LtU + AI64GtS + AI64GtU + AI64LeS + AI64LeU + AI64GeS + AI64GeU + + AF32Eq + AF32Ne + AF32Lt + AF32Gt + AF32Le + AF32Ge + + AF64Eq + AF64Ne + AF64Lt + AF64Gt + AF64Le + AF64Ge + + AI32Clz + AI32Ctz + AI32Popcnt + AI32Add + AI32Sub + AI32Mul + AI32DivS + AI32DivU + AI32RemS + AI32RemU + AI32And + AI32Or + AI32Xor + AI32Shl + AI32ShrS + AI32ShrU + AI32Rotl + AI32Rotr + + AI64Clz + AI64Ctz + AI64Popcnt + AI64Add + AI64Sub + AI64Mul + AI64DivS + AI64DivU + AI64RemS + AI64RemU + AI64And + AI64Or + AI64Xor + AI64Shl + AI64ShrS + AI64ShrU + AI64Rotl + AI64Rotr + + AF32Abs + AF32Neg + AF32Ceil + AF32Floor + AF32Trunc + AF32Nearest + AF32Sqrt + AF32Add + AF32Sub + AF32Mul + AF32Div + AF32Min + AF32Max + AF32Copysign + + AF64Abs + AF64Neg + AF64Ceil + AF64Floor + AF64Trunc + AF64Nearest + AF64Sqrt + AF64Add + AF64Sub + AF64Mul + AF64Div + AF64Min + AF64Max + AF64Copysign + + AI32WrapI64 + AI32TruncF32S + AI32TruncF32U + AI32TruncF64S + AI32TruncF64U + AI64ExtendI32S + AI64ExtendI32U + AI64TruncF32S + AI64TruncF32U + AI64TruncF64S + AI64TruncF64U + AF32ConvertI32S + AF32ConvertI32U + AF32ConvertI64S + AF32ConvertI64U + AF32DemoteF64 + AF64ConvertI32S + AF64ConvertI32U + AF64ConvertI64S + AF64ConvertI64U + AF64PromoteF32 + AI32ReinterpretF32 + AI64ReinterpretF64 + AF32ReinterpretI32 + AF64ReinterpretI64 + AI32Extend8S + AI32Extend16S + AI64Extend8S + AI64Extend16S + AI64Extend32S + + AI32TruncSatF32S // opcode 0xFC 0x00 + AI32TruncSatF32U + AI32TruncSatF64S + AI32TruncSatF64U + AI64TruncSatF32S + AI64TruncSatF32U + AI64TruncSatF64S + AI64TruncSatF64U + + AMemoryInit + ADataDrop + AMemoryCopy + AMemoryFill + ATableInit + AElemDrop + ATableCopy + ATableGrow + ATableSize + ATableFill + + ALast // Sentinel: End of low-level WebAssembly instructions. + + ARESUMEPOINT + // ACALLNORESUME is a call which is not followed by a resume point. + // It is allowed inside of WebAssembly blocks, whereas obj.ACALL is not. + // However, it is not allowed to switch goroutines while inside of an ACALLNORESUME call. + ACALLNORESUME + + ARETUNWIND + + AMOVB + AMOVH + AMOVW + AMOVD + + AWORD + ALAST +) + +const ( + REG_NONE = 0 +) + +const ( + // globals + REG_SP = obj.RBaseWasm + iota // SP is currently 32-bit, until 64-bit memory operations are available + REG_CTXT + REG_g + // RET* are used by runtime.return0 and runtime.reflectcall. These functions pass return values in registers. + REG_RET0 + REG_RET1 + REG_RET2 + REG_RET3 + REG_PAUSE + + // i32 locals + REG_R0 + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + + // f32 locals + REG_F0 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + + // f64 locals + REG_F16 + REG_F17 + REG_F18 + REG_F19 + REG_F20 + REG_F21 + REG_F22 + REG_F23 + REG_F24 + REG_F25 + REG_F26 + REG_F27 + REG_F28 + REG_F29 + REG_F30 + REG_F31 + + REG_PC_B // also first parameter, i32 + + MAXREG + + MINREG = REG_SP + REGSP = REG_SP + REGCTXT = REG_CTXT + REGG = REG_g +) diff --git a/src/cmd/internal/obj/wasm/anames.go b/src/cmd/internal/obj/wasm/anames.go new file mode 100644 index 0000000..c9bc15d --- /dev/null +++ b/src/cmd/internal/obj/wasm/anames.go @@ -0,0 +1,218 @@ +// Code generated by stringer -i a.out.go -o anames.go -p wasm; DO NOT EDIT. + +package wasm + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "CallImport", + "Get", + "Set", + "Tee", + "Not", + "Unreachable", + "Nop", + "Block", + "Loop", + "If", + "Else", + "End", + "Br", + "BrIf", + "BrTable", + "Return", + "Call", + "CallIndirect", + "Drop", + "Select", + "LocalGet", + "LocalSet", + "LocalTee", + "GlobalGet", + "GlobalSet", + "I32Load", + "I64Load", + "F32Load", + "F64Load", + "I32Load8S", + "I32Load8U", + "I32Load16S", + "I32Load16U", + "I64Load8S", + "I64Load8U", + "I64Load16S", + "I64Load16U", + "I64Load32S", + "I64Load32U", + "I32Store", + "I64Store", + "F32Store", + "F64Store", + "I32Store8", + "I32Store16", + "I64Store8", + "I64Store16", + "I64Store32", + "CurrentMemory", + "GrowMemory", + "I32Const", + "I64Const", + "F32Const", + "F64Const", + "I32Eqz", + "I32Eq", + "I32Ne", + "I32LtS", + "I32LtU", + "I32GtS", + "I32GtU", + "I32LeS", + "I32LeU", + "I32GeS", + "I32GeU", + "I64Eqz", + "I64Eq", + "I64Ne", + "I64LtS", + "I64LtU", + "I64GtS", + "I64GtU", + "I64LeS", + "I64LeU", + "I64GeS", + "I64GeU", + "F32Eq", + "F32Ne", + "F32Lt", + "F32Gt", + "F32Le", + "F32Ge", + "F64Eq", + "F64Ne", + "F64Lt", + "F64Gt", + "F64Le", + "F64Ge", + "I32Clz", + "I32Ctz", + "I32Popcnt", + "I32Add", + "I32Sub", + "I32Mul", + "I32DivS", + "I32DivU", + "I32RemS", + "I32RemU", + "I32And", + "I32Or", + "I32Xor", + "I32Shl", + "I32ShrS", + "I32ShrU", + "I32Rotl", + "I32Rotr", + "I64Clz", + "I64Ctz", + "I64Popcnt", + "I64Add", + "I64Sub", + "I64Mul", + "I64DivS", + "I64DivU", + "I64RemS", + "I64RemU", + "I64And", + "I64Or", + "I64Xor", + "I64Shl", + "I64ShrS", + "I64ShrU", + "I64Rotl", + "I64Rotr", + "F32Abs", + "F32Neg", + "F32Ceil", + "F32Floor", + "F32Trunc", + "F32Nearest", + "F32Sqrt", + "F32Add", + "F32Sub", + "F32Mul", + "F32Div", + "F32Min", + "F32Max", + "F32Copysign", + "F64Abs", + "F64Neg", + "F64Ceil", + "F64Floor", + "F64Trunc", + "F64Nearest", + "F64Sqrt", + "F64Add", + "F64Sub", + "F64Mul", + "F64Div", + "F64Min", + "F64Max", + "F64Copysign", + "I32WrapI64", + "I32TruncF32S", + "I32TruncF32U", + "I32TruncF64S", + "I32TruncF64U", + "I64ExtendI32S", + "I64ExtendI32U", + "I64TruncF32S", + "I64TruncF32U", + "I64TruncF64S", + "I64TruncF64U", + "F32ConvertI32S", + "F32ConvertI32U", + "F32ConvertI64S", + "F32ConvertI64U", + "F32DemoteF64", + "F64ConvertI32S", + "F64ConvertI32U", + "F64ConvertI64S", + "F64ConvertI64U", + "F64PromoteF32", + "I32ReinterpretF32", + "I64ReinterpretF64", + "F32ReinterpretI32", + "F64ReinterpretI64", + "I32Extend8S", + "I32Extend16S", + "I64Extend8S", + "I64Extend16S", + "I64Extend32S", + "I32TruncSatF32S", + "I32TruncSatF32U", + "I32TruncSatF64S", + "I32TruncSatF64U", + "I64TruncSatF32S", + "I64TruncSatF32U", + "I64TruncSatF64S", + "I64TruncSatF64U", + "MemoryInit", + "DataDrop", + "MemoryCopy", + "MemoryFill", + "TableInit", + "ElemDrop", + "TableCopy", + "TableGrow", + "TableSize", + "TableFill", + "Last", + "RESUMEPOINT", + "CALLNORESUME", + "RETUNWIND", + "MOVB", + "MOVH", + "MOVW", + "MOVD", + "WORD", + "LAST", +} diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go new file mode 100644 index 0000000..9b0aabe --- /dev/null +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -0,0 +1,1201 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package wasm + +import ( + "bytes" + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "encoding/binary" + "fmt" + "io" + "math" +) + +var Register = map[string]int16{ + "SP": REG_SP, + "CTXT": REG_CTXT, + "g": REG_g, + "RET0": REG_RET0, + "RET1": REG_RET1, + "RET2": REG_RET2, + "RET3": REG_RET3, + "PAUSE": REG_PAUSE, + + "R0": REG_R0, + "R1": REG_R1, + "R2": REG_R2, + "R3": REG_R3, + "R4": REG_R4, + "R5": REG_R5, + "R6": REG_R6, + "R7": REG_R7, + "R8": REG_R8, + "R9": REG_R9, + "R10": REG_R10, + "R11": REG_R11, + "R12": REG_R12, + "R13": REG_R13, + "R14": REG_R14, + "R15": REG_R15, + + "F0": REG_F0, + "F1": REG_F1, + "F2": REG_F2, + "F3": REG_F3, + "F4": REG_F4, + "F5": REG_F5, + "F6": REG_F6, + "F7": REG_F7, + "F8": REG_F8, + "F9": REG_F9, + "F10": REG_F10, + "F11": REG_F11, + "F12": REG_F12, + "F13": REG_F13, + "F14": REG_F14, + "F15": REG_F15, + + "F16": REG_F16, + "F17": REG_F17, + "F18": REG_F18, + "F19": REG_F19, + "F20": REG_F20, + "F21": REG_F21, + "F22": REG_F22, + "F23": REG_F23, + "F24": REG_F24, + "F25": REG_F25, + "F26": REG_F26, + "F27": REG_F27, + "F28": REG_F28, + "F29": REG_F29, + "F30": REG_F30, + "F31": REG_F31, + + "PC_B": REG_PC_B, +} + +var registerNames []string + +func init() { + obj.RegisterRegister(MINREG, MAXREG, rconv) + obj.RegisterOpcode(obj.ABaseWasm, Anames) + + registerNames = make([]string, MAXREG-MINREG) + for name, reg := range Register { + registerNames[reg-MINREG] = name + } +} + +func rconv(r int) string { + return registerNames[r-MINREG] +} + +var unaryDst = map[obj.As]bool{ + ASet: true, + ATee: true, + ACall: true, + ACallIndirect: true, + ACallImport: true, + ABr: true, + ABrIf: true, + ABrTable: true, + AI32Store: true, + AI64Store: true, + AF32Store: true, + AF64Store: true, + AI32Store8: true, + AI32Store16: true, + AI64Store8: true, + AI64Store16: true, + AI64Store32: true, + ACALLNORESUME: true, +} + +var Linkwasm = obj.LinkArch{ + Arch: sys.ArchWasm, + Init: instinit, + Preprocess: preprocess, + Assemble: assemble, + UnaryDst: unaryDst, +} + +var ( + morestack *obj.LSym + morestackNoCtxt *obj.LSym + gcWriteBarrier *obj.LSym + sigpanic *obj.LSym +) + +const ( + /* mark flags */ + WasmImport = 1 << 0 +) + +func instinit(ctxt *obj.Link) { + morestack = ctxt.Lookup("runtime.morestack") + morestackNoCtxt = ctxt.Lookup("runtime.morestack_noctxt") + gcWriteBarrier = ctxt.LookupABI("runtime.gcWriteBarrier", obj.ABIInternal) + sigpanic = ctxt.LookupABI("runtime.sigpanic", obj.ABIInternal) +} + +func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { + appendp := func(p *obj.Prog, as obj.As, args ...obj.Addr) *obj.Prog { + if p.As != obj.ANOP { + p2 := obj.Appendp(p, newprog) + p2.Pc = p.Pc + p = p2 + } + p.As = as + switch len(args) { + case 0: + p.From = obj.Addr{} + p.To = obj.Addr{} + case 1: + if unaryDst[as] { + p.From = obj.Addr{} + p.To = args[0] + } else { + p.From = args[0] + p.To = obj.Addr{} + } + case 2: + p.From = args[0] + p.To = args[1] + default: + panic("bad args") + } + return p + } + + framesize := s.Func().Text.To.Offset + if framesize < 0 { + panic("bad framesize") + } + s.Func().Args = s.Func().Text.To.Val.(int32) + s.Func().Locals = int32(framesize) + + if s.Func().Text.From.Sym.Wrapper() { + // if g._panic != nil && g._panic.argp == FP { + // g._panic.argp = bottom-of-frame + // } + // + // MOVD g_panic(g), R0 + // Get R0 + // I64Eqz + // Not + // If + // Get SP + // I64ExtendI32U + // I64Const $framesize+8 + // I64Add + // I64Load panic_argp(R0) + // I64Eq + // If + // MOVD SP, panic_argp(R0) + // End + // End + + gpanic := obj.Addr{ + Type: obj.TYPE_MEM, + Reg: REGG, + Offset: 4 * 8, // g_panic + } + + panicargp := obj.Addr{ + Type: obj.TYPE_MEM, + Reg: REG_R0, + Offset: 0, // panic.argp + } + + p := s.Func().Text + p = appendp(p, AMOVD, gpanic, regAddr(REG_R0)) + + p = appendp(p, AGet, regAddr(REG_R0)) + p = appendp(p, AI64Eqz) + p = appendp(p, ANot) + p = appendp(p, AIf) + + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI64ExtendI32U) + p = appendp(p, AI64Const, constAddr(framesize+8)) + p = appendp(p, AI64Add) + p = appendp(p, AI64Load, panicargp) + + p = appendp(p, AI64Eq) + p = appendp(p, AIf) + p = appendp(p, AMOVD, regAddr(REG_SP), panicargp) + p = appendp(p, AEnd) + + p = appendp(p, AEnd) + } + + if framesize > 0 { + p := s.Func().Text + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI32Const, constAddr(framesize)) + p = appendp(p, AI32Sub) + p = appendp(p, ASet, regAddr(REG_SP)) + p.Spadj = int32(framesize) + } + + needMoreStack := !s.Func().Text.From.Sym.NoSplit() + + // If the maymorestack debug option is enabled, insert the + // call to maymorestack *before* processing resume points so + // we can construct a resume point after maymorestack for + // morestack to resume at. + var pMorestack = s.Func().Text + if needMoreStack && ctxt.Flag_maymorestack != "" { + p := pMorestack + + // Save REGCTXT on the stack. + const tempFrame = 8 + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI32Const, constAddr(tempFrame)) + p = appendp(p, AI32Sub) + p = appendp(p, ASet, regAddr(REG_SP)) + p.Spadj = tempFrame + ctxtp := obj.Addr{ + Type: obj.TYPE_MEM, + Reg: REG_SP, + Offset: 0, + } + p = appendp(p, AMOVD, regAddr(REGCTXT), ctxtp) + + // maymorestack must not itself preempt because we + // don't have full stack information, so this can be + // ACALLNORESUME. + p = appendp(p, ACALLNORESUME, constAddr(0)) + // See ../x86/obj6.go + sym := ctxt.LookupABI(ctxt.Flag_maymorestack, s.ABI()) + p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: sym} + + // Restore REGCTXT. + p = appendp(p, AMOVD, ctxtp, regAddr(REGCTXT)) + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI32Const, constAddr(tempFrame)) + p = appendp(p, AI32Add) + p = appendp(p, ASet, regAddr(REG_SP)) + p.Spadj = -tempFrame + + // Add an explicit ARESUMEPOINT after maymorestack for + // morestack to resume at. + pMorestack = appendp(p, ARESUMEPOINT) + } + + // Introduce resume points for CALL instructions + // and collect other explicit resume points. + numResumePoints := 0 + explicitBlockDepth := 0 + pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction + var tableIdxs []uint64 + tablePC := int64(0) + base := ctxt.PosTable.Pos(s.Func().Text.Pos).Base() + for p := s.Func().Text; p != nil; p = p.Link { + prevBase := base + base = ctxt.PosTable.Pos(p.Pos).Base() + switch p.As { + case ABlock, ALoop, AIf: + explicitBlockDepth++ + + case AEnd: + if explicitBlockDepth == 0 { + panic("End without block") + } + explicitBlockDepth-- + + case ARESUMEPOINT: + if explicitBlockDepth != 0 { + panic("RESUME can only be used on toplevel") + } + p.As = AEnd + for tablePC <= pc { + tableIdxs = append(tableIdxs, uint64(numResumePoints)) + tablePC++ + } + numResumePoints++ + pc++ + + case obj.ACALL: + if explicitBlockDepth != 0 { + panic("CALL can only be used on toplevel, try CALLNORESUME instead") + } + appendp(p, ARESUMEPOINT) + } + + p.Pc = pc + + // Increase pc whenever some pc-value table needs a new entry. Don't increase it + // more often to avoid bloat of the BrTable instruction. + // The "base != prevBase" condition detects inlined instructions. They are an + // implicit call, so entering and leaving this section affects the stack trace. + if p.As == ACALLNORESUME || p.As == obj.ANOP || p.As == ANop || p.Spadj != 0 || base != prevBase { + pc++ + if p.To.Sym == sigpanic { + // The panic stack trace expects the PC at the call of sigpanic, + // not the next one. However, runtime.Caller subtracts 1 from the + // PC. To make both PC and PC-1 work (have the same line number), + // we advance the PC by 2 at sigpanic. + pc++ + } + } + } + tableIdxs = append(tableIdxs, uint64(numResumePoints)) + s.Size = pc + 1 + + if needMoreStack { + p := pMorestack + + if framesize <= objabi.StackSmall { + // small stack: SP <= stackguard + // Get SP + // Get g + // I32WrapI64 + // I32Load $stackguard0 + // I32GtU + + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AGet, regAddr(REGG)) + p = appendp(p, AI32WrapI64) + p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0 + p = appendp(p, AI32LeU) + } else { + // large stack: SP-framesize <= stackguard-StackSmall + // SP <= stackguard+(framesize-StackSmall) + // Get SP + // Get g + // I32WrapI64 + // I32Load $stackguard0 + // I32Const $(framesize-StackSmall) + // I32Add + // I32GtU + + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AGet, regAddr(REGG)) + p = appendp(p, AI32WrapI64) + p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0 + p = appendp(p, AI32Const, constAddr(framesize-objabi.StackSmall)) + p = appendp(p, AI32Add) + p = appendp(p, AI32LeU) + } + // TODO(neelance): handle wraparound case + + p = appendp(p, AIf) + // This CALL does *not* have a resume point after it + // (we already inserted all of the resume points). As + // a result, morestack will resume at the *previous* + // resume point (typically, the beginning of the + // function) and perform the morestack check again. + // This is why we don't need an explicit loop like + // other architectures. + p = appendp(p, obj.ACALL, constAddr(0)) + if s.Func().Text.From.Sym.NeedCtxt() { + p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack} + } else { + p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt} + } + p = appendp(p, AEnd) + } + + // record the branches targeting the entry loop and the unwind exit, + // their targets with be filled in later + var entryPointLoopBranches []*obj.Prog + var unwindExitBranches []*obj.Prog + currentDepth := 0 + for p := s.Func().Text; p != nil; p = p.Link { + switch p.As { + case ABlock, ALoop, AIf: + currentDepth++ + case AEnd: + currentDepth-- + } + + switch p.As { + case obj.AJMP: + jmp := *p + p.As = obj.ANOP + + if jmp.To.Type == obj.TYPE_BRANCH { + // jump to basic block + p = appendp(p, AI32Const, constAddr(jmp.To.Val.(*obj.Prog).Pc)) + p = appendp(p, ASet, regAddr(REG_PC_B)) // write next basic block to PC_B + p = appendp(p, ABr) // jump to beginning of entryPointLoop + entryPointLoopBranches = append(entryPointLoopBranches, p) + break + } + + // low-level WebAssembly call to function + switch jmp.To.Type { + case obj.TYPE_MEM: + if !notUsePC_B[jmp.To.Sym.Name] { + // Set PC_B parameter to function entry. + p = appendp(p, AI32Const, constAddr(0)) + } + p = appendp(p, ACall, jmp.To) + + case obj.TYPE_NONE: + // (target PC is on stack) + p = appendp(p, AI32WrapI64) + p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero + p = appendp(p, AI32ShrU) + + // Set PC_B parameter to function entry. + // We need to push this before pushing the target PC_F, + // so temporarily pop PC_F, using our REG_PC_B as a + // scratch register, and push it back after pushing 0. + p = appendp(p, ASet, regAddr(REG_PC_B)) + p = appendp(p, AI32Const, constAddr(0)) + p = appendp(p, AGet, regAddr(REG_PC_B)) + + p = appendp(p, ACallIndirect) + + default: + panic("bad target for JMP") + } + + p = appendp(p, AReturn) + + case obj.ACALL, ACALLNORESUME: + call := *p + p.As = obj.ANOP + + pcAfterCall := call.Link.Pc + if call.To.Sym == sigpanic { + pcAfterCall-- // sigpanic expects to be called without advancing the pc + } + + // SP -= 8 + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI32Const, constAddr(8)) + p = appendp(p, AI32Sub) + p = appendp(p, ASet, regAddr(REG_SP)) + + // write return address to Go stack + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI64Const, obj.Addr{ + Type: obj.TYPE_ADDR, + Name: obj.NAME_EXTERN, + Sym: s, // PC_F + Offset: pcAfterCall, // PC_B + }) + p = appendp(p, AI64Store, constAddr(0)) + + // low-level WebAssembly call to function + switch call.To.Type { + case obj.TYPE_MEM: + if !notUsePC_B[call.To.Sym.Name] { + // Set PC_B parameter to function entry. + p = appendp(p, AI32Const, constAddr(0)) + } + p = appendp(p, ACall, call.To) + + case obj.TYPE_NONE: + // (target PC is on stack) + p = appendp(p, AI32WrapI64) + p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero + p = appendp(p, AI32ShrU) + + // Set PC_B parameter to function entry. + // We need to push this before pushing the target PC_F, + // so temporarily pop PC_F, using our PC_B as a + // scratch register, and push it back after pushing 0. + p = appendp(p, ASet, regAddr(REG_PC_B)) + p = appendp(p, AI32Const, constAddr(0)) + p = appendp(p, AGet, regAddr(REG_PC_B)) + + p = appendp(p, ACallIndirect) + + default: + panic("bad target for CALL") + } + + // gcWriteBarrier has no return value, it never unwinds the stack + if call.To.Sym == gcWriteBarrier { + break + } + + // return value of call is on the top of the stack, indicating whether to unwind the WebAssembly stack + if call.As == ACALLNORESUME && call.To.Sym != sigpanic { // sigpanic unwinds the stack, but it never resumes + // trying to unwind WebAssembly stack but call has no resume point, terminate with error + p = appendp(p, AIf) + p = appendp(p, obj.AUNDEF) + p = appendp(p, AEnd) + } else { + // unwinding WebAssembly stack to switch goroutine, return 1 + p = appendp(p, ABrIf) + unwindExitBranches = append(unwindExitBranches, p) + } + + case obj.ARET, ARETUNWIND: + ret := *p + p.As = obj.ANOP + + if framesize > 0 { + // SP += framesize + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI32Const, constAddr(framesize)) + p = appendp(p, AI32Add) + p = appendp(p, ASet, regAddr(REG_SP)) + // TODO(neelance): This should theoretically set Spadj, but it only works without. + // p.Spadj = int32(-framesize) + } + + if ret.To.Type == obj.TYPE_MEM { + // Set PC_B parameter to function entry. + p = appendp(p, AI32Const, constAddr(0)) + + // low-level WebAssembly call to function + p = appendp(p, ACall, ret.To) + p = appendp(p, AReturn) + break + } + + // SP += 8 + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI32Const, constAddr(8)) + p = appendp(p, AI32Add) + p = appendp(p, ASet, regAddr(REG_SP)) + + if ret.As == ARETUNWIND { + // function needs to unwind the WebAssembly stack, return 1 + p = appendp(p, AI32Const, constAddr(1)) + p = appendp(p, AReturn) + break + } + + // not unwinding the WebAssembly stack, return 0 + p = appendp(p, AI32Const, constAddr(0)) + p = appendp(p, AReturn) + } + } + + for p := s.Func().Text; p != nil; p = p.Link { + switch p.From.Name { + case obj.NAME_AUTO: + p.From.Offset += framesize + case obj.NAME_PARAM: + p.From.Reg = REG_SP + p.From.Offset += framesize + 8 // parameters are after the frame and the 8-byte return address + } + + switch p.To.Name { + case obj.NAME_AUTO: + p.To.Offset += framesize + case obj.NAME_PARAM: + p.To.Reg = REG_SP + p.To.Offset += framesize + 8 // parameters are after the frame and the 8-byte return address + } + + switch p.As { + case AGet: + if p.From.Type == obj.TYPE_ADDR { + get := *p + p.As = obj.ANOP + + switch get.From.Name { + case obj.NAME_EXTERN: + p = appendp(p, AI64Const, get.From) + case obj.NAME_AUTO, obj.NAME_PARAM: + p = appendp(p, AGet, regAddr(get.From.Reg)) + if get.From.Reg == REG_SP { + p = appendp(p, AI64ExtendI32U) + } + if get.From.Offset != 0 { + p = appendp(p, AI64Const, constAddr(get.From.Offset)) + p = appendp(p, AI64Add) + } + default: + panic("bad Get: invalid name") + } + } + + case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U: + if p.From.Type == obj.TYPE_MEM { + as := p.As + from := p.From + + p.As = AGet + p.From = regAddr(from.Reg) + + if from.Reg != REG_SP { + p = appendp(p, AI32WrapI64) + } + + p = appendp(p, as, constAddr(from.Offset)) + } + + case AMOVB, AMOVH, AMOVW, AMOVD: + mov := *p + p.As = obj.ANOP + + var loadAs obj.As + var storeAs obj.As + switch mov.As { + case AMOVB: + loadAs = AI64Load8U + storeAs = AI64Store8 + case AMOVH: + loadAs = AI64Load16U + storeAs = AI64Store16 + case AMOVW: + loadAs = AI64Load32U + storeAs = AI64Store32 + case AMOVD: + loadAs = AI64Load + storeAs = AI64Store + } + + appendValue := func() { + switch mov.From.Type { + case obj.TYPE_CONST: + p = appendp(p, AI64Const, constAddr(mov.From.Offset)) + + case obj.TYPE_ADDR: + switch mov.From.Name { + case obj.NAME_NONE, obj.NAME_PARAM, obj.NAME_AUTO: + p = appendp(p, AGet, regAddr(mov.From.Reg)) + if mov.From.Reg == REG_SP { + p = appendp(p, AI64ExtendI32U) + } + p = appendp(p, AI64Const, constAddr(mov.From.Offset)) + p = appendp(p, AI64Add) + case obj.NAME_EXTERN: + p = appendp(p, AI64Const, mov.From) + default: + panic("bad name for MOV") + } + + case obj.TYPE_REG: + p = appendp(p, AGet, mov.From) + if mov.From.Reg == REG_SP { + p = appendp(p, AI64ExtendI32U) + } + + case obj.TYPE_MEM: + p = appendp(p, AGet, regAddr(mov.From.Reg)) + if mov.From.Reg != REG_SP { + p = appendp(p, AI32WrapI64) + } + p = appendp(p, loadAs, constAddr(mov.From.Offset)) + + default: + panic("bad MOV type") + } + } + + switch mov.To.Type { + case obj.TYPE_REG: + appendValue() + if mov.To.Reg == REG_SP { + p = appendp(p, AI32WrapI64) + } + p = appendp(p, ASet, mov.To) + + case obj.TYPE_MEM: + switch mov.To.Name { + case obj.NAME_NONE, obj.NAME_PARAM: + p = appendp(p, AGet, regAddr(mov.To.Reg)) + if mov.To.Reg != REG_SP { + p = appendp(p, AI32WrapI64) + } + case obj.NAME_EXTERN: + p = appendp(p, AI32Const, obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN, Sym: mov.To.Sym}) + default: + panic("bad MOV name") + } + appendValue() + p = appendp(p, storeAs, constAddr(mov.To.Offset)) + + default: + panic("bad MOV type") + } + + case ACallImport: + p.As = obj.ANOP + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, ACall, obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: s}) + p.Mark = WasmImport + } + } + + { + p := s.Func().Text + if len(unwindExitBranches) > 0 { + p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack + for _, b := range unwindExitBranches { + b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p} + } + } + if len(entryPointLoopBranches) > 0 { + p = appendp(p, ALoop) // entryPointLoop, used to jump between basic blocks + for _, b := range entryPointLoopBranches { + b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p} + } + } + if numResumePoints > 0 { + // Add Block instructions for resume points and BrTable to jump to selected resume point. + for i := 0; i < numResumePoints+1; i++ { + p = appendp(p, ABlock) + } + p = appendp(p, AGet, regAddr(REG_PC_B)) // read next basic block from PC_B + p = appendp(p, ABrTable, obj.Addr{Val: tableIdxs}) + p = appendp(p, AEnd) // end of Block + } + for p.Link != nil { + p = p.Link // function instructions + } + if len(entryPointLoopBranches) > 0 { + p = appendp(p, AEnd) // end of entryPointLoop + } + p = appendp(p, obj.AUNDEF) + if len(unwindExitBranches) > 0 { + p = appendp(p, AEnd) // end of unwindExit + p = appendp(p, AI32Const, constAddr(1)) + } + } + + currentDepth = 0 + blockDepths := make(map[*obj.Prog]int) + for p := s.Func().Text; p != nil; p = p.Link { + switch p.As { + case ABlock, ALoop, AIf: + currentDepth++ + blockDepths[p] = currentDepth + case AEnd: + currentDepth-- + } + + switch p.As { + case ABr, ABrIf: + if p.To.Type == obj.TYPE_BRANCH { + blockDepth, ok := blockDepths[p.To.Val.(*obj.Prog)] + if !ok { + panic("label not at block") + } + p.To = constAddr(int64(currentDepth - blockDepth)) + } + } + } +} + +func constAddr(value int64) obj.Addr { + return obj.Addr{Type: obj.TYPE_CONST, Offset: value} +} + +func regAddr(reg int16) obj.Addr { + return obj.Addr{Type: obj.TYPE_REG, Reg: reg} +} + +// Most of the Go functions has a single parameter (PC_B) in +// Wasm ABI. This is a list of exceptions. +var notUsePC_B = map[string]bool{ + "_rt0_wasm_js": true, + "wasm_export_run": true, + "wasm_export_resume": true, + "wasm_export_getsp": true, + "wasm_pc_f_loop": true, + "runtime.wasmDiv": true, + "runtime.wasmTruncS": true, + "runtime.wasmTruncU": true, + "runtime.gcWriteBarrier": true, + "cmpbody": true, + "memeqbody": true, + "memcmp": true, + "memchr": true, +} + +func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { + type regVar struct { + global bool + index uint64 + } + + type varDecl struct { + count uint64 + typ valueType + } + + hasLocalSP := false + regVars := [MAXREG - MINREG]*regVar{ + REG_SP - MINREG: {true, 0}, + REG_CTXT - MINREG: {true, 1}, + REG_g - MINREG: {true, 2}, + REG_RET0 - MINREG: {true, 3}, + REG_RET1 - MINREG: {true, 4}, + REG_RET2 - MINREG: {true, 5}, + REG_RET3 - MINREG: {true, 6}, + REG_PAUSE - MINREG: {true, 7}, + } + var varDecls []*varDecl + useAssemblyRegMap := func() { + for i := int16(0); i < 16; i++ { + regVars[REG_R0+i-MINREG] = ®Var{false, uint64(i)} + } + } + + // Function starts with declaration of locals: numbers and types. + // Some functions use a special calling convention. + switch s.Name { + case "_rt0_wasm_js", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp", "wasm_pc_f_loop", + "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody": + varDecls = []*varDecl{} + useAssemblyRegMap() + case "memchr", "memcmp": + varDecls = []*varDecl{{count: 2, typ: i32}} + useAssemblyRegMap() + case "cmpbody": + varDecls = []*varDecl{{count: 2, typ: i64}} + useAssemblyRegMap() + case "runtime.gcWriteBarrier": + varDecls = []*varDecl{{count: 4, typ: i64}} + useAssemblyRegMap() + default: + // Normal calling convention: PC_B as WebAssembly parameter. First local variable is local SP cache. + regVars[REG_PC_B-MINREG] = ®Var{false, 0} + hasLocalSP = true + + var regUsed [MAXREG - MINREG]bool + for p := s.Func().Text; p != nil; p = p.Link { + if p.From.Reg != 0 { + regUsed[p.From.Reg-MINREG] = true + } + if p.To.Reg != 0 { + regUsed[p.To.Reg-MINREG] = true + } + } + + regs := []int16{REG_SP} + for reg := int16(REG_R0); reg <= REG_F31; reg++ { + if regUsed[reg-MINREG] { + regs = append(regs, reg) + } + } + + var lastDecl *varDecl + for i, reg := range regs { + t := regType(reg) + if lastDecl == nil || lastDecl.typ != t { + lastDecl = &varDecl{ + count: 0, + typ: t, + } + varDecls = append(varDecls, lastDecl) + } + lastDecl.count++ + if reg != REG_SP { + regVars[reg-MINREG] = ®Var{false, 1 + uint64(i)} + } + } + } + + w := new(bytes.Buffer) + + writeUleb128(w, uint64(len(varDecls))) + for _, decl := range varDecls { + writeUleb128(w, decl.count) + w.WriteByte(byte(decl.typ)) + } + + if hasLocalSP { + // Copy SP from its global variable into a local variable. Accessing a local variable is more efficient. + updateLocalSP(w) + } + + for p := s.Func().Text; p != nil; p = p.Link { + switch p.As { + case AGet: + if p.From.Type != obj.TYPE_REG { + panic("bad Get: argument is not a register") + } + reg := p.From.Reg + v := regVars[reg-MINREG] + if v == nil { + panic("bad Get: invalid register") + } + if reg == REG_SP && hasLocalSP { + writeOpcode(w, ALocalGet) + writeUleb128(w, 1) // local SP + continue + } + if v.global { + writeOpcode(w, AGlobalGet) + } else { + writeOpcode(w, ALocalGet) + } + writeUleb128(w, v.index) + continue + + case ASet: + if p.To.Type != obj.TYPE_REG { + panic("bad Set: argument is not a register") + } + reg := p.To.Reg + v := regVars[reg-MINREG] + if v == nil { + panic("bad Set: invalid register") + } + if reg == REG_SP && hasLocalSP { + writeOpcode(w, ALocalTee) + writeUleb128(w, 1) // local SP + } + if v.global { + writeOpcode(w, AGlobalSet) + } else { + if p.Link.As == AGet && p.Link.From.Reg == reg { + writeOpcode(w, ALocalTee) + p = p.Link + } else { + writeOpcode(w, ALocalSet) + } + } + writeUleb128(w, v.index) + continue + + case ATee: + if p.To.Type != obj.TYPE_REG { + panic("bad Tee: argument is not a register") + } + reg := p.To.Reg + v := regVars[reg-MINREG] + if v == nil { + panic("bad Tee: invalid register") + } + writeOpcode(w, ALocalTee) + writeUleb128(w, v.index) + continue + + case ANot: + writeOpcode(w, AI32Eqz) + continue + + case obj.AUNDEF: + writeOpcode(w, AUnreachable) + continue + + case obj.ANOP, obj.ATEXT, obj.AFUNCDATA, obj.APCDATA: + // ignore + continue + } + + writeOpcode(w, p.As) + + switch p.As { + case ABlock, ALoop, AIf: + if p.From.Offset != 0 { + // block type, rarely used, e.g. for code compiled with emscripten + w.WriteByte(0x80 - byte(p.From.Offset)) + continue + } + w.WriteByte(0x40) + + case ABr, ABrIf: + if p.To.Type != obj.TYPE_CONST { + panic("bad Br/BrIf") + } + writeUleb128(w, uint64(p.To.Offset)) + + case ABrTable: + idxs := p.To.Val.([]uint64) + writeUleb128(w, uint64(len(idxs)-1)) + for _, idx := range idxs { + writeUleb128(w, idx) + } + + case ACall: + switch p.To.Type { + case obj.TYPE_CONST: + writeUleb128(w, uint64(p.To.Offset)) + + case obj.TYPE_MEM: + if p.To.Name != obj.NAME_EXTERN && p.To.Name != obj.NAME_STATIC { + fmt.Println(p.To) + panic("bad name for Call") + } + r := obj.Addrel(s) + r.Siz = 1 // actually variable sized + r.Off = int32(w.Len()) + r.Type = objabi.R_CALL + if p.Mark&WasmImport != 0 { + r.Type = objabi.R_WASMIMPORT + } + r.Sym = p.To.Sym + if hasLocalSP { + // The stack may have moved, which changes SP. Update the local SP variable. + updateLocalSP(w) + } + + default: + panic("bad type for Call") + } + + case ACallIndirect: + writeUleb128(w, uint64(p.To.Offset)) + w.WriteByte(0x00) // reserved value + if hasLocalSP { + // The stack may have moved, which changes SP. Update the local SP variable. + updateLocalSP(w) + } + + case AI32Const, AI64Const: + if p.From.Name == obj.NAME_EXTERN { + r := obj.Addrel(s) + r.Siz = 1 // actually variable sized + r.Off = int32(w.Len()) + r.Type = objabi.R_ADDR + r.Sym = p.From.Sym + r.Add = p.From.Offset + break + } + writeSleb128(w, p.From.Offset) + + case AF32Const: + b := make([]byte, 4) + binary.LittleEndian.PutUint32(b, math.Float32bits(float32(p.From.Val.(float64)))) + w.Write(b) + + case AF64Const: + b := make([]byte, 8) + binary.LittleEndian.PutUint64(b, math.Float64bits(p.From.Val.(float64))) + w.Write(b) + + case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U: + if p.From.Offset < 0 { + panic("negative offset for *Load") + } + if p.From.Type != obj.TYPE_CONST { + panic("bad type for *Load") + } + if p.From.Offset > math.MaxUint32 { + ctxt.Diag("bad offset in %v", p) + } + writeUleb128(w, align(p.As)) + writeUleb128(w, uint64(p.From.Offset)) + + case AI32Store, AI64Store, AF32Store, AF64Store, AI32Store8, AI32Store16, AI64Store8, AI64Store16, AI64Store32: + if p.To.Offset < 0 { + panic("negative offset") + } + if p.From.Offset > math.MaxUint32 { + ctxt.Diag("bad offset in %v", p) + } + writeUleb128(w, align(p.As)) + writeUleb128(w, uint64(p.To.Offset)) + + case ACurrentMemory, AGrowMemory, AMemoryFill: + w.WriteByte(0x00) + + case AMemoryCopy: + w.WriteByte(0x00) + w.WriteByte(0x00) + + } + } + + w.WriteByte(0x0b) // end + + s.P = w.Bytes() +} + +func updateLocalSP(w *bytes.Buffer) { + writeOpcode(w, AGlobalGet) + writeUleb128(w, 0) // global SP + writeOpcode(w, ALocalSet) + writeUleb128(w, 1) // local SP +} + +func writeOpcode(w *bytes.Buffer, as obj.As) { + switch { + case as < AUnreachable: + panic(fmt.Sprintf("unexpected assembler op: %s", as)) + case as < AEnd: + w.WriteByte(byte(as - AUnreachable + 0x00)) + case as < ADrop: + w.WriteByte(byte(as - AEnd + 0x0B)) + case as < ALocalGet: + w.WriteByte(byte(as - ADrop + 0x1A)) + case as < AI32Load: + w.WriteByte(byte(as - ALocalGet + 0x20)) + case as < AI32TruncSatF32S: + w.WriteByte(byte(as - AI32Load + 0x28)) + case as < ALast: + w.WriteByte(0xFC) + w.WriteByte(byte(as - AI32TruncSatF32S + 0x00)) + default: + panic(fmt.Sprintf("unexpected assembler op: %s", as)) + } +} + +type valueType byte + +const ( + i32 valueType = 0x7F + i64 valueType = 0x7E + f32 valueType = 0x7D + f64 valueType = 0x7C +) + +func regType(reg int16) valueType { + switch { + case reg == REG_SP: + return i32 + case reg >= REG_R0 && reg <= REG_R15: + return i64 + case reg >= REG_F0 && reg <= REG_F15: + return f32 + case reg >= REG_F16 && reg <= REG_F31: + return f64 + default: + panic("invalid register") + } +} + +func align(as obj.As) uint64 { + switch as { + case AI32Load8S, AI32Load8U, AI64Load8S, AI64Load8U, AI32Store8, AI64Store8: + return 0 + case AI32Load16S, AI32Load16U, AI64Load16S, AI64Load16U, AI32Store16, AI64Store16: + return 1 + case AI32Load, AF32Load, AI64Load32S, AI64Load32U, AI32Store, AF32Store, AI64Store32: + return 2 + case AI64Load, AF64Load, AI64Store, AF64Store: + return 3 + default: + panic("align: bad op") + } +} + +func writeUleb128(w io.ByteWriter, v uint64) { + if v < 128 { + w.WriteByte(uint8(v)) + return + } + more := true + for more { + c := uint8(v & 0x7f) + v >>= 7 + more = v != 0 + if more { + c |= 0x80 + } + w.WriteByte(c) + } +} + +func writeSleb128(w io.ByteWriter, v int64) { + more := true + for more { + c := uint8(v & 0x7f) + s := uint8(v & 0x40) + v >>= 7 + more = !((v == 0 && s == 0) || (v == -1 && s != 0)) + if more { + c |= 0x80 + } + w.WriteByte(c) + } +} diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go new file mode 100644 index 0000000..b121f6d --- /dev/null +++ b/src/cmd/internal/obj/x86/a.out.go @@ -0,0 +1,426 @@ +// Inferno utils/6c/6.out.h +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6c/6.out.h +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x86 + +import "cmd/internal/obj" + +const ( + REG_NONE = 0 +) + +const ( + REG_AL = obj.RBaseAMD64 + iota + REG_CL + REG_DL + REG_BL + REG_SPB + REG_BPB + REG_SIB + REG_DIB + REG_R8B + REG_R9B + REG_R10B + REG_R11B + REG_R12B + REG_R13B + REG_R14B + REG_R15B + + REG_AX + REG_CX + REG_DX + REG_BX + REG_SP + REG_BP + REG_SI + REG_DI + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + + REG_AH + REG_CH + REG_DH + REG_BH + + REG_F0 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + + REG_M0 + REG_M1 + REG_M2 + REG_M3 + REG_M4 + REG_M5 + REG_M6 + REG_M7 + + REG_K0 + REG_K1 + REG_K2 + REG_K3 + REG_K4 + REG_K5 + REG_K6 + REG_K7 + + REG_X0 + REG_X1 + REG_X2 + REG_X3 + REG_X4 + REG_X5 + REG_X6 + REG_X7 + REG_X8 + REG_X9 + REG_X10 + REG_X11 + REG_X12 + REG_X13 + REG_X14 + REG_X15 + REG_X16 + REG_X17 + REG_X18 + REG_X19 + REG_X20 + REG_X21 + REG_X22 + REG_X23 + REG_X24 + REG_X25 + REG_X26 + REG_X27 + REG_X28 + REG_X29 + REG_X30 + REG_X31 + + REG_Y0 + REG_Y1 + REG_Y2 + REG_Y3 + REG_Y4 + REG_Y5 + REG_Y6 + REG_Y7 + REG_Y8 + REG_Y9 + REG_Y10 + REG_Y11 + REG_Y12 + REG_Y13 + REG_Y14 + REG_Y15 + REG_Y16 + REG_Y17 + REG_Y18 + REG_Y19 + REG_Y20 + REG_Y21 + REG_Y22 + REG_Y23 + REG_Y24 + REG_Y25 + REG_Y26 + REG_Y27 + REG_Y28 + REG_Y29 + REG_Y30 + REG_Y31 + + REG_Z0 + REG_Z1 + REG_Z2 + REG_Z3 + REG_Z4 + REG_Z5 + REG_Z6 + REG_Z7 + REG_Z8 + REG_Z9 + REG_Z10 + REG_Z11 + REG_Z12 + REG_Z13 + REG_Z14 + REG_Z15 + REG_Z16 + REG_Z17 + REG_Z18 + REG_Z19 + REG_Z20 + REG_Z21 + REG_Z22 + REG_Z23 + REG_Z24 + REG_Z25 + REG_Z26 + REG_Z27 + REG_Z28 + REG_Z29 + REG_Z30 + REG_Z31 + + REG_CS + REG_SS + REG_DS + REG_ES + REG_FS + REG_GS + + REG_GDTR // global descriptor table register + REG_IDTR // interrupt descriptor table register + REG_LDTR // local descriptor table register + REG_MSW // machine status word + REG_TASK // task register + + REG_CR0 + REG_CR1 + REG_CR2 + REG_CR3 + REG_CR4 + REG_CR5 + REG_CR6 + REG_CR7 + REG_CR8 + REG_CR9 + REG_CR10 + REG_CR11 + REG_CR12 + REG_CR13 + REG_CR14 + REG_CR15 + + REG_DR0 + REG_DR1 + REG_DR2 + REG_DR3 + REG_DR4 + REG_DR5 + REG_DR6 + REG_DR7 + + REG_TR0 + REG_TR1 + REG_TR2 + REG_TR3 + REG_TR4 + REG_TR5 + REG_TR6 + REG_TR7 + + REG_TLS + + MAXREG + + REG_CR = REG_CR0 + REG_DR = REG_DR0 + REG_TR = REG_TR0 + + REGARG = -1 + REGRET = REG_AX + FREGRET = REG_X0 + REGSP = REG_SP + REGCTXT = REG_DX + REGENTRYTMP0 = REG_R12 // scratch register available at function entry in ABIInternal + REGENTRYTMP1 = REG_R13 // scratch register available at function entry in ABIInternal + REGG = REG_R14 // g register in ABIInternal + REGEXT = REG_R15 // compiler allocates external registers R15 down + FREGMIN = REG_X0 + 5 // first register variable + FREGEXT = REG_X0 + 15 // first external register + T_TYPE = 1 << 0 + T_INDEX = 1 << 1 + T_OFFSET = 1 << 2 + T_FCONST = 1 << 3 + T_SYM = 1 << 4 + T_SCONST = 1 << 5 + T_64 = 1 << 6 + T_GOTYPE = 1 << 7 +) + +// https://www.uclibc.org/docs/psABI-x86_64.pdf, figure 3.36 +var AMD64DWARFRegisters = map[int16]int16{ + REG_AX: 0, + REG_DX: 1, + REG_CX: 2, + REG_BX: 3, + REG_SI: 4, + REG_DI: 5, + REG_BP: 6, + REG_SP: 7, + REG_R8: 8, + REG_R9: 9, + REG_R10: 10, + REG_R11: 11, + REG_R12: 12, + REG_R13: 13, + REG_R14: 14, + REG_R15: 15, + // 16 is "Return Address RA", whatever that is. + // 17-24 vector registers (X/Y/Z). + REG_X0: 17, + REG_X1: 18, + REG_X2: 19, + REG_X3: 20, + REG_X4: 21, + REG_X5: 22, + REG_X6: 23, + REG_X7: 24, + // 25-32 extended vector registers (X/Y/Z). + REG_X8: 25, + REG_X9: 26, + REG_X10: 27, + REG_X11: 28, + REG_X12: 29, + REG_X13: 30, + REG_X14: 31, + REG_X15: 32, + // ST registers. %stN => FN. + REG_F0: 33, + REG_F1: 34, + REG_F2: 35, + REG_F3: 36, + REG_F4: 37, + REG_F5: 38, + REG_F6: 39, + REG_F7: 40, + // MMX registers. %mmN => MN. + REG_M0: 41, + REG_M1: 42, + REG_M2: 43, + REG_M3: 44, + REG_M4: 45, + REG_M5: 46, + REG_M6: 47, + REG_M7: 48, + // 48 is flags, which doesn't have a name. + REG_ES: 50, + REG_CS: 51, + REG_SS: 52, + REG_DS: 53, + REG_FS: 54, + REG_GS: 55, + // 58 and 59 are {fs,gs}base, which don't have names. + REG_TR: 62, + REG_LDTR: 63, + // 64-66 are mxcsr, fcw, fsw, which don't have names. + + // 67-82 upper vector registers (X/Y/Z). + REG_X16: 67, + REG_X17: 68, + REG_X18: 69, + REG_X19: 70, + REG_X20: 71, + REG_X21: 72, + REG_X22: 73, + REG_X23: 74, + REG_X24: 75, + REG_X25: 76, + REG_X26: 77, + REG_X27: 78, + REG_X28: 79, + REG_X29: 80, + REG_X30: 81, + REG_X31: 82, + + // 118-125 vector mask registers. %kN => KN. + REG_K0: 118, + REG_K1: 119, + REG_K2: 120, + REG_K3: 121, + REG_K4: 122, + REG_K5: 123, + REG_K6: 124, + REG_K7: 125, +} + +// https://www.uclibc.org/docs/psABI-i386.pdf, table 2.14 +var X86DWARFRegisters = map[int16]int16{ + REG_AX: 0, + REG_CX: 1, + REG_DX: 2, + REG_BX: 3, + REG_SP: 4, + REG_BP: 5, + REG_SI: 6, + REG_DI: 7, + // 8 is "Return Address RA", whatever that is. + // 9 is flags, which doesn't have a name. + // ST registers. %stN => FN. + REG_F0: 11, + REG_F1: 12, + REG_F2: 13, + REG_F3: 14, + REG_F4: 15, + REG_F5: 16, + REG_F6: 17, + REG_F7: 18, + // XMM registers. %xmmN => XN. + REG_X0: 21, + REG_X1: 22, + REG_X2: 23, + REG_X3: 24, + REG_X4: 25, + REG_X5: 26, + REG_X6: 27, + REG_X7: 28, + // MMX registers. %mmN => MN. + REG_M0: 29, + REG_M1: 30, + REG_M2: 31, + REG_M3: 32, + REG_M4: 33, + REG_M5: 34, + REG_M6: 35, + REG_M7: 36, + // 39 is mxcsr, which doesn't have a name. + REG_ES: 40, + REG_CS: 41, + REG_SS: 42, + REG_DS: 43, + REG_FS: 44, + REG_GS: 45, + REG_TR: 48, + REG_LDTR: 49, +} diff --git a/src/cmd/internal/obj/x86/aenum.go b/src/cmd/internal/obj/x86/aenum.go new file mode 100644 index 0000000..f0913d7 --- /dev/null +++ b/src/cmd/internal/obj/x86/aenum.go @@ -0,0 +1,1609 @@ +// Code generated by x86avxgen. DO NOT EDIT. + +package x86 + +import "cmd/internal/obj" + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p x86 + +const ( + AAAA = obj.ABaseAMD64 + obj.A_ARCHSPECIFIC + iota + AAAD + AAAM + AAAS + AADCB + AADCL + AADCQ + AADCW + AADCXL + AADCXQ + AADDB + AADDL + AADDPD + AADDPS + AADDQ + AADDSD + AADDSS + AADDSUBPD + AADDSUBPS + AADDW + AADJSP + AADOXL + AADOXQ + AAESDEC + AAESDECLAST + AAESENC + AAESENCLAST + AAESIMC + AAESKEYGENASSIST + AANDB + AANDL + AANDNL + AANDNPD + AANDNPS + AANDNQ + AANDPD + AANDPS + AANDQ + AANDW + AARPL + ABEXTRL + ABEXTRQ + ABLENDPD + ABLENDPS + ABLENDVPD + ABLENDVPS + ABLSIL + ABLSIQ + ABLSMSKL + ABLSMSKQ + ABLSRL + ABLSRQ + ABOUNDL + ABOUNDW + ABSFL + ABSFQ + ABSFW + ABSRL + ABSRQ + ABSRW + ABSWAPL + ABSWAPQ + ABTCL + ABTCQ + ABTCW + ABTL + ABTQ + ABTRL + ABTRQ + ABTRW + ABTSL + ABTSQ + ABTSW + ABTW + ABYTE + ABZHIL + ABZHIQ + ACBW + ACDQ + ACDQE + ACLAC + ACLC + ACLD + ACLDEMOTE + ACLFLUSH + ACLFLUSHOPT + ACLI + ACLTS + ACLWB + ACMC + ACMOVLCC + ACMOVLCS + ACMOVLEQ + ACMOVLGE + ACMOVLGT + ACMOVLHI + ACMOVLLE + ACMOVLLS + ACMOVLLT + ACMOVLMI + ACMOVLNE + ACMOVLOC + ACMOVLOS + ACMOVLPC + ACMOVLPL + ACMOVLPS + ACMOVQCC + ACMOVQCS + ACMOVQEQ + ACMOVQGE + ACMOVQGT + ACMOVQHI + ACMOVQLE + ACMOVQLS + ACMOVQLT + ACMOVQMI + ACMOVQNE + ACMOVQOC + ACMOVQOS + ACMOVQPC + ACMOVQPL + ACMOVQPS + ACMOVWCC + ACMOVWCS + ACMOVWEQ + ACMOVWGE + ACMOVWGT + ACMOVWHI + ACMOVWLE + ACMOVWLS + ACMOVWLT + ACMOVWMI + ACMOVWNE + ACMOVWOC + ACMOVWOS + ACMOVWPC + ACMOVWPL + ACMOVWPS + ACMPB + ACMPL + ACMPPD + ACMPPS + ACMPQ + ACMPSB + ACMPSD + ACMPSL + ACMPSQ + ACMPSS + ACMPSW + ACMPW + ACMPXCHG16B + ACMPXCHG8B + ACMPXCHGB + ACMPXCHGL + ACMPXCHGQ + ACMPXCHGW + ACOMISD + ACOMISS + ACPUID + ACQO + ACRC32B + ACRC32L + ACRC32Q + ACRC32W + ACVTPD2PL + ACVTPD2PS + ACVTPL2PD + ACVTPL2PS + ACVTPS2PD + ACVTPS2PL + ACVTSD2SL + ACVTSD2SQ + ACVTSD2SS + ACVTSL2SD + ACVTSL2SS + ACVTSQ2SD + ACVTSQ2SS + ACVTSS2SD + ACVTSS2SL + ACVTSS2SQ + ACVTTPD2PL + ACVTTPS2PL + ACVTTSD2SL + ACVTTSD2SQ + ACVTTSS2SL + ACVTTSS2SQ + ACWD + ACWDE + ADAA + ADAS + ADECB + ADECL + ADECQ + ADECW + ADIVB + ADIVL + ADIVPD + ADIVPS + ADIVQ + ADIVSD + ADIVSS + ADIVW + ADPPD + ADPPS + AEMMS + AENTER + AEXTRACTPS + AF2XM1 + AFABS + AFADDD + AFADDDP + AFADDF + AFADDL + AFADDW + AFBLD + AFBSTP + AFCHS + AFCLEX + AFCMOVB + AFCMOVBE + AFCMOVCC + AFCMOVCS + AFCMOVE + AFCMOVEQ + AFCMOVHI + AFCMOVLS + AFCMOVNB + AFCMOVNBE + AFCMOVNE + AFCMOVNU + AFCMOVU + AFCMOVUN + AFCOMD + AFCOMDP + AFCOMDPP + AFCOMF + AFCOMFP + AFCOMI + AFCOMIP + AFCOML + AFCOMLP + AFCOMW + AFCOMWP + AFCOS + AFDECSTP + AFDIVD + AFDIVDP + AFDIVF + AFDIVL + AFDIVRD + AFDIVRDP + AFDIVRF + AFDIVRL + AFDIVRW + AFDIVW + AFFREE + AFINCSTP + AFINIT + AFLD1 + AFLDCW + AFLDENV + AFLDL2E + AFLDL2T + AFLDLG2 + AFLDLN2 + AFLDPI + AFLDZ + AFMOVB + AFMOVBP + AFMOVD + AFMOVDP + AFMOVF + AFMOVFP + AFMOVL + AFMOVLP + AFMOVV + AFMOVVP + AFMOVW + AFMOVWP + AFMOVX + AFMOVXP + AFMULD + AFMULDP + AFMULF + AFMULL + AFMULW + AFNOP + AFPATAN + AFPREM + AFPREM1 + AFPTAN + AFRNDINT + AFRSTOR + AFSAVE + AFSCALE + AFSIN + AFSINCOS + AFSQRT + AFSTCW + AFSTENV + AFSTSW + AFSUBD + AFSUBDP + AFSUBF + AFSUBL + AFSUBRD + AFSUBRDP + AFSUBRF + AFSUBRL + AFSUBRW + AFSUBW + AFTST + AFUCOM + AFUCOMI + AFUCOMIP + AFUCOMP + AFUCOMPP + AFXAM + AFXCHD + AFXRSTOR + AFXRSTOR64 + AFXSAVE + AFXSAVE64 + AFXTRACT + AFYL2X + AFYL2XP1 + AHADDPD + AHADDPS + AHLT + AHSUBPD + AHSUBPS + AICEBP + AIDIVB + AIDIVL + AIDIVQ + AIDIVW + AIMUL3L + AIMUL3Q + AIMUL3W + AIMULB + AIMULL + AIMULQ + AIMULW + AINB + AINCB + AINCL + AINCQ + AINCW + AINL + AINSB + AINSERTPS + AINSL + AINSW + AINT + AINTO + AINVD + AINVLPG + AINVPCID + AINW + AIRETL + AIRETQ + AIRETW + AJCC // >= unsigned + AJCS // < unsigned + AJCXZL + AJCXZQ + AJCXZW + AJEQ // == (zero) + AJGE // >= signed + AJGT // > signed + AJHI // > unsigned + AJLE // <= signed + AJLS // <= unsigned + AJLT // < signed + AJMI // sign bit set (negative) + AJNE // != (nonzero) + AJOC // overflow clear + AJOS // overflow set + AJPC // parity clear + AJPL // sign bit clear (positive) + AJPS // parity set + AKADDB + AKADDD + AKADDQ + AKADDW + AKANDB + AKANDD + AKANDNB + AKANDND + AKANDNQ + AKANDNW + AKANDQ + AKANDW + AKMOVB + AKMOVD + AKMOVQ + AKMOVW + AKNOTB + AKNOTD + AKNOTQ + AKNOTW + AKORB + AKORD + AKORQ + AKORTESTB + AKORTESTD + AKORTESTQ + AKORTESTW + AKORW + AKSHIFTLB + AKSHIFTLD + AKSHIFTLQ + AKSHIFTLW + AKSHIFTRB + AKSHIFTRD + AKSHIFTRQ + AKSHIFTRW + AKTESTB + AKTESTD + AKTESTQ + AKTESTW + AKUNPCKBW + AKUNPCKDQ + AKUNPCKWD + AKXNORB + AKXNORD + AKXNORQ + AKXNORW + AKXORB + AKXORD + AKXORQ + AKXORW + ALAHF + ALARL + ALARQ + ALARW + ALDDQU + ALDMXCSR + ALEAL + ALEAQ + ALEAVEL + ALEAVEQ + ALEAVEW + ALEAW + ALFENCE + ALFSL + ALFSQ + ALFSW + ALGDT + ALGSL + ALGSQ + ALGSW + ALIDT + ALLDT + ALMSW + ALOCK + ALODSB + ALODSL + ALODSQ + ALODSW + ALONG + ALOOP + ALOOPEQ + ALOOPNE + ALSLL + ALSLQ + ALSLW + ALSSL + ALSSQ + ALSSW + ALTR + ALZCNTL + ALZCNTQ + ALZCNTW + AMASKMOVOU + AMASKMOVQ + AMAXPD + AMAXPS + AMAXSD + AMAXSS + AMFENCE + AMINPD + AMINPS + AMINSD + AMINSS + AMONITOR + AMOVAPD + AMOVAPS + AMOVB + AMOVBEL + AMOVBEQ + AMOVBEW + AMOVBLSX + AMOVBLZX + AMOVBQSX + AMOVBQZX + AMOVBWSX + AMOVBWZX + AMOVDDUP + AMOVHLPS + AMOVHPD + AMOVHPS + AMOVL + AMOVLHPS + AMOVLPD + AMOVLPS + AMOVLQSX + AMOVLQZX + AMOVMSKPD + AMOVMSKPS + AMOVNTDQA + AMOVNTIL + AMOVNTIQ + AMOVNTO + AMOVNTPD + AMOVNTPS + AMOVNTQ + AMOVO + AMOVOU + AMOVQ + AMOVQL + AMOVQOZX + AMOVSB + AMOVSD + AMOVSHDUP + AMOVSL + AMOVSLDUP + AMOVSQ + AMOVSS + AMOVSW + AMOVSWW + AMOVUPD + AMOVUPS + AMOVW + AMOVWLSX + AMOVWLZX + AMOVWQSX + AMOVWQZX + AMOVZWW + AMPSADBW + AMULB + AMULL + AMULPD + AMULPS + AMULQ + AMULSD + AMULSS + AMULW + AMULXL + AMULXQ + AMWAIT + ANEGB + ANEGL + ANEGQ + ANEGW + ANOPL + ANOPW + ANOTB + ANOTL + ANOTQ + ANOTW + AORB + AORL + AORPD + AORPS + AORQ + AORW + AOUTB + AOUTL + AOUTSB + AOUTSL + AOUTSW + AOUTW + APABSB + APABSD + APABSW + APACKSSLW + APACKSSWB + APACKUSDW + APACKUSWB + APADDB + APADDL + APADDQ + APADDSB + APADDSW + APADDUSB + APADDUSW + APADDW + APALIGNR + APAND + APANDN + APAUSE + APAVGB + APAVGW + APBLENDVB + APBLENDW + APCLMULQDQ + APCMPEQB + APCMPEQL + APCMPEQQ + APCMPEQW + APCMPESTRI + APCMPESTRM + APCMPGTB + APCMPGTL + APCMPGTQ + APCMPGTW + APCMPISTRI + APCMPISTRM + APDEPL + APDEPQ + APEXTL + APEXTQ + APEXTRB + APEXTRD + APEXTRQ + APEXTRW + APHADDD + APHADDSW + APHADDW + APHMINPOSUW + APHSUBD + APHSUBSW + APHSUBW + APINSRB + APINSRD + APINSRQ + APINSRW + APMADDUBSW + APMADDWL + APMAXSB + APMAXSD + APMAXSW + APMAXUB + APMAXUD + APMAXUW + APMINSB + APMINSD + APMINSW + APMINUB + APMINUD + APMINUW + APMOVMSKB + APMOVSXBD + APMOVSXBQ + APMOVSXBW + APMOVSXDQ + APMOVSXWD + APMOVSXWQ + APMOVZXBD + APMOVZXBQ + APMOVZXBW + APMOVZXDQ + APMOVZXWD + APMOVZXWQ + APMULDQ + APMULHRSW + APMULHUW + APMULHW + APMULLD + APMULLW + APMULULQ + APOPAL + APOPAW + APOPCNTL + APOPCNTQ + APOPCNTW + APOPFL + APOPFQ + APOPFW + APOPL + APOPQ + APOPW + APOR + APREFETCHNTA + APREFETCHT0 + APREFETCHT1 + APREFETCHT2 + APSADBW + APSHUFB + APSHUFD + APSHUFHW + APSHUFL + APSHUFLW + APSHUFW + APSIGNB + APSIGND + APSIGNW + APSLLL + APSLLO + APSLLQ + APSLLW + APSRAL + APSRAW + APSRLL + APSRLO + APSRLQ + APSRLW + APSUBB + APSUBL + APSUBQ + APSUBSB + APSUBSW + APSUBUSB + APSUBUSW + APSUBW + APTEST + APUNPCKHBW + APUNPCKHLQ + APUNPCKHQDQ + APUNPCKHWL + APUNPCKLBW + APUNPCKLLQ + APUNPCKLQDQ + APUNPCKLWL + APUSHAL + APUSHAW + APUSHFL + APUSHFQ + APUSHFW + APUSHL + APUSHQ + APUSHW + APXOR + AQUAD + ARCLB + ARCLL + ARCLQ + ARCLW + ARCPPS + ARCPSS + ARCRB + ARCRL + ARCRQ + ARCRW + ARDFSBASEL + ARDFSBASEQ + ARDGSBASEL + ARDGSBASEQ + ARDMSR + ARDPKRU + ARDPMC + ARDRANDL + ARDRANDQ + ARDRANDW + ARDSEEDL + ARDSEEDQ + ARDSEEDW + ARDTSC + ARDTSCP + AREP + AREPN + ARETFL + ARETFQ + ARETFW + AROLB + AROLL + AROLQ + AROLW + ARORB + ARORL + ARORQ + ARORW + ARORXL + ARORXQ + AROUNDPD + AROUNDPS + AROUNDSD + AROUNDSS + ARSM + ARSQRTPS + ARSQRTSS + ASAHF + ASALB + ASALL + ASALQ + ASALW + ASARB + ASARL + ASARQ + ASARW + ASARXL + ASARXQ + ASBBB + ASBBL + ASBBQ + ASBBW + ASCASB + ASCASL + ASCASQ + ASCASW + ASETCC + ASETCS + ASETEQ + ASETGE + ASETGT + ASETHI + ASETLE + ASETLS + ASETLT + ASETMI + ASETNE + ASETOC + ASETOS + ASETPC + ASETPL + ASETPS + ASFENCE + ASGDT + ASHA1MSG1 + ASHA1MSG2 + ASHA1NEXTE + ASHA1RNDS4 + ASHA256MSG1 + ASHA256MSG2 + ASHA256RNDS2 + ASHLB + ASHLL + ASHLQ + ASHLW + ASHLXL + ASHLXQ + ASHRB + ASHRL + ASHRQ + ASHRW + ASHRXL + ASHRXQ + ASHUFPD + ASHUFPS + ASIDT + ASLDTL + ASLDTQ + ASLDTW + ASMSWL + ASMSWQ + ASMSWW + ASQRTPD + ASQRTPS + ASQRTSD + ASQRTSS + ASTAC + ASTC + ASTD + ASTI + ASTMXCSR + ASTOSB + ASTOSL + ASTOSQ + ASTOSW + ASTRL + ASTRQ + ASTRW + ASUBB + ASUBL + ASUBPD + ASUBPS + ASUBQ + ASUBSD + ASUBSS + ASUBW + ASWAPGS + ASYSCALL + ASYSENTER + ASYSENTER64 + ASYSEXIT + ASYSEXIT64 + ASYSRET + ATESTB + ATESTL + ATESTQ + ATESTW + ATPAUSE + ATZCNTL + ATZCNTQ + ATZCNTW + AUCOMISD + AUCOMISS + AUD1 + AUD2 + AUMWAIT + AUNPCKHPD + AUNPCKHPS + AUNPCKLPD + AUNPCKLPS + AUMONITOR + AV4FMADDPS + AV4FMADDSS + AV4FNMADDPS + AV4FNMADDSS + AVADDPD + AVADDPS + AVADDSD + AVADDSS + AVADDSUBPD + AVADDSUBPS + AVAESDEC + AVAESDECLAST + AVAESENC + AVAESENCLAST + AVAESIMC + AVAESKEYGENASSIST + AVALIGND + AVALIGNQ + AVANDNPD + AVANDNPS + AVANDPD + AVANDPS + AVBLENDMPD + AVBLENDMPS + AVBLENDPD + AVBLENDPS + AVBLENDVPD + AVBLENDVPS + AVBROADCASTF128 + AVBROADCASTF32X2 + AVBROADCASTF32X4 + AVBROADCASTF32X8 + AVBROADCASTF64X2 + AVBROADCASTF64X4 + AVBROADCASTI128 + AVBROADCASTI32X2 + AVBROADCASTI32X4 + AVBROADCASTI32X8 + AVBROADCASTI64X2 + AVBROADCASTI64X4 + AVBROADCASTSD + AVBROADCASTSS + AVCMPPD + AVCMPPS + AVCMPSD + AVCMPSS + AVCOMISD + AVCOMISS + AVCOMPRESSPD + AVCOMPRESSPS + AVCVTDQ2PD + AVCVTDQ2PS + AVCVTPD2DQ + AVCVTPD2DQX + AVCVTPD2DQY + AVCVTPD2PS + AVCVTPD2PSX + AVCVTPD2PSY + AVCVTPD2QQ + AVCVTPD2UDQ + AVCVTPD2UDQX + AVCVTPD2UDQY + AVCVTPD2UQQ + AVCVTPH2PS + AVCVTPS2DQ + AVCVTPS2PD + AVCVTPS2PH + AVCVTPS2QQ + AVCVTPS2UDQ + AVCVTPS2UQQ + AVCVTQQ2PD + AVCVTQQ2PS + AVCVTQQ2PSX + AVCVTQQ2PSY + AVCVTSD2SI + AVCVTSD2SIQ + AVCVTSD2SS + AVCVTSD2USI + AVCVTSD2USIL + AVCVTSD2USIQ + AVCVTSI2SDL + AVCVTSI2SDQ + AVCVTSI2SSL + AVCVTSI2SSQ + AVCVTSS2SD + AVCVTSS2SI + AVCVTSS2SIQ + AVCVTSS2USI + AVCVTSS2USIL + AVCVTSS2USIQ + AVCVTTPD2DQ + AVCVTTPD2DQX + AVCVTTPD2DQY + AVCVTTPD2QQ + AVCVTTPD2UDQ + AVCVTTPD2UDQX + AVCVTTPD2UDQY + AVCVTTPD2UQQ + AVCVTTPS2DQ + AVCVTTPS2QQ + AVCVTTPS2UDQ + AVCVTTPS2UQQ + AVCVTTSD2SI + AVCVTTSD2SIQ + AVCVTTSD2USI + AVCVTTSD2USIL + AVCVTTSD2USIQ + AVCVTTSS2SI + AVCVTTSS2SIQ + AVCVTTSS2USI + AVCVTTSS2USIL + AVCVTTSS2USIQ + AVCVTUDQ2PD + AVCVTUDQ2PS + AVCVTUQQ2PD + AVCVTUQQ2PS + AVCVTUQQ2PSX + AVCVTUQQ2PSY + AVCVTUSI2SD + AVCVTUSI2SDL + AVCVTUSI2SDQ + AVCVTUSI2SS + AVCVTUSI2SSL + AVCVTUSI2SSQ + AVDBPSADBW + AVDIVPD + AVDIVPS + AVDIVSD + AVDIVSS + AVDPPD + AVDPPS + AVERR + AVERW + AVEXP2PD + AVEXP2PS + AVEXPANDPD + AVEXPANDPS + AVEXTRACTF128 + AVEXTRACTF32X4 + AVEXTRACTF32X8 + AVEXTRACTF64X2 + AVEXTRACTF64X4 + AVEXTRACTI128 + AVEXTRACTI32X4 + AVEXTRACTI32X8 + AVEXTRACTI64X2 + AVEXTRACTI64X4 + AVEXTRACTPS + AVFIXUPIMMPD + AVFIXUPIMMPS + AVFIXUPIMMSD + AVFIXUPIMMSS + AVFMADD132PD + AVFMADD132PS + AVFMADD132SD + AVFMADD132SS + AVFMADD213PD + AVFMADD213PS + AVFMADD213SD + AVFMADD213SS + AVFMADD231PD + AVFMADD231PS + AVFMADD231SD + AVFMADD231SS + AVFMADDSUB132PD + AVFMADDSUB132PS + AVFMADDSUB213PD + AVFMADDSUB213PS + AVFMADDSUB231PD + AVFMADDSUB231PS + AVFMSUB132PD + AVFMSUB132PS + AVFMSUB132SD + AVFMSUB132SS + AVFMSUB213PD + AVFMSUB213PS + AVFMSUB213SD + AVFMSUB213SS + AVFMSUB231PD + AVFMSUB231PS + AVFMSUB231SD + AVFMSUB231SS + AVFMSUBADD132PD + AVFMSUBADD132PS + AVFMSUBADD213PD + AVFMSUBADD213PS + AVFMSUBADD231PD + AVFMSUBADD231PS + AVFNMADD132PD + AVFNMADD132PS + AVFNMADD132SD + AVFNMADD132SS + AVFNMADD213PD + AVFNMADD213PS + AVFNMADD213SD + AVFNMADD213SS + AVFNMADD231PD + AVFNMADD231PS + AVFNMADD231SD + AVFNMADD231SS + AVFNMSUB132PD + AVFNMSUB132PS + AVFNMSUB132SD + AVFNMSUB132SS + AVFNMSUB213PD + AVFNMSUB213PS + AVFNMSUB213SD + AVFNMSUB213SS + AVFNMSUB231PD + AVFNMSUB231PS + AVFNMSUB231SD + AVFNMSUB231SS + AVFPCLASSPD + AVFPCLASSPDX + AVFPCLASSPDY + AVFPCLASSPDZ + AVFPCLASSPS + AVFPCLASSPSX + AVFPCLASSPSY + AVFPCLASSPSZ + AVFPCLASSSD + AVFPCLASSSS + AVGATHERDPD + AVGATHERDPS + AVGATHERPF0DPD + AVGATHERPF0DPS + AVGATHERPF0QPD + AVGATHERPF0QPS + AVGATHERPF1DPD + AVGATHERPF1DPS + AVGATHERPF1QPD + AVGATHERPF1QPS + AVGATHERQPD + AVGATHERQPS + AVGETEXPPD + AVGETEXPPS + AVGETEXPSD + AVGETEXPSS + AVGETMANTPD + AVGETMANTPS + AVGETMANTSD + AVGETMANTSS + AVGF2P8AFFINEINVQB + AVGF2P8AFFINEQB + AVGF2P8MULB + AVHADDPD + AVHADDPS + AVHSUBPD + AVHSUBPS + AVINSERTF128 + AVINSERTF32X4 + AVINSERTF32X8 + AVINSERTF64X2 + AVINSERTF64X4 + AVINSERTI128 + AVINSERTI32X4 + AVINSERTI32X8 + AVINSERTI64X2 + AVINSERTI64X4 + AVINSERTPS + AVLDDQU + AVLDMXCSR + AVMASKMOVDQU + AVMASKMOVPD + AVMASKMOVPS + AVMAXPD + AVMAXPS + AVMAXSD + AVMAXSS + AVMINPD + AVMINPS + AVMINSD + AVMINSS + AVMOVAPD + AVMOVAPS + AVMOVD + AVMOVDDUP + AVMOVDQA + AVMOVDQA32 + AVMOVDQA64 + AVMOVDQU + AVMOVDQU16 + AVMOVDQU32 + AVMOVDQU64 + AVMOVDQU8 + AVMOVHLPS + AVMOVHPD + AVMOVHPS + AVMOVLHPS + AVMOVLPD + AVMOVLPS + AVMOVMSKPD + AVMOVMSKPS + AVMOVNTDQ + AVMOVNTDQA + AVMOVNTPD + AVMOVNTPS + AVMOVQ + AVMOVSD + AVMOVSHDUP + AVMOVSLDUP + AVMOVSS + AVMOVUPD + AVMOVUPS + AVMPSADBW + AVMULPD + AVMULPS + AVMULSD + AVMULSS + AVORPD + AVORPS + AVP4DPWSSD + AVP4DPWSSDS + AVPABSB + AVPABSD + AVPABSQ + AVPABSW + AVPACKSSDW + AVPACKSSWB + AVPACKUSDW + AVPACKUSWB + AVPADDB + AVPADDD + AVPADDQ + AVPADDSB + AVPADDSW + AVPADDUSB + AVPADDUSW + AVPADDW + AVPALIGNR + AVPAND + AVPANDD + AVPANDN + AVPANDND + AVPANDNQ + AVPANDQ + AVPAVGB + AVPAVGW + AVPBLENDD + AVPBLENDMB + AVPBLENDMD + AVPBLENDMQ + AVPBLENDMW + AVPBLENDVB + AVPBLENDW + AVPBROADCASTB + AVPBROADCASTD + AVPBROADCASTMB2Q + AVPBROADCASTMW2D + AVPBROADCASTQ + AVPBROADCASTW + AVPCLMULQDQ + AVPCMPB + AVPCMPD + AVPCMPEQB + AVPCMPEQD + AVPCMPEQQ + AVPCMPEQW + AVPCMPESTRI + AVPCMPESTRM + AVPCMPGTB + AVPCMPGTD + AVPCMPGTQ + AVPCMPGTW + AVPCMPISTRI + AVPCMPISTRM + AVPCMPQ + AVPCMPUB + AVPCMPUD + AVPCMPUQ + AVPCMPUW + AVPCMPW + AVPCOMPRESSB + AVPCOMPRESSD + AVPCOMPRESSQ + AVPCOMPRESSW + AVPCONFLICTD + AVPCONFLICTQ + AVPDPBUSD + AVPDPBUSDS + AVPDPWSSD + AVPDPWSSDS + AVPERM2F128 + AVPERM2I128 + AVPERMB + AVPERMD + AVPERMI2B + AVPERMI2D + AVPERMI2PD + AVPERMI2PS + AVPERMI2Q + AVPERMI2W + AVPERMILPD + AVPERMILPS + AVPERMPD + AVPERMPS + AVPERMQ + AVPERMT2B + AVPERMT2D + AVPERMT2PD + AVPERMT2PS + AVPERMT2Q + AVPERMT2W + AVPERMW + AVPEXPANDB + AVPEXPANDD + AVPEXPANDQ + AVPEXPANDW + AVPEXTRB + AVPEXTRD + AVPEXTRQ + AVPEXTRW + AVPGATHERDD + AVPGATHERDQ + AVPGATHERQD + AVPGATHERQQ + AVPHADDD + AVPHADDSW + AVPHADDW + AVPHMINPOSUW + AVPHSUBD + AVPHSUBSW + AVPHSUBW + AVPINSRB + AVPINSRD + AVPINSRQ + AVPINSRW + AVPLZCNTD + AVPLZCNTQ + AVPMADD52HUQ + AVPMADD52LUQ + AVPMADDUBSW + AVPMADDWD + AVPMASKMOVD + AVPMASKMOVQ + AVPMAXSB + AVPMAXSD + AVPMAXSQ + AVPMAXSW + AVPMAXUB + AVPMAXUD + AVPMAXUQ + AVPMAXUW + AVPMINSB + AVPMINSD + AVPMINSQ + AVPMINSW + AVPMINUB + AVPMINUD + AVPMINUQ + AVPMINUW + AVPMOVB2M + AVPMOVD2M + AVPMOVDB + AVPMOVDW + AVPMOVM2B + AVPMOVM2D + AVPMOVM2Q + AVPMOVM2W + AVPMOVMSKB + AVPMOVQ2M + AVPMOVQB + AVPMOVQD + AVPMOVQW + AVPMOVSDB + AVPMOVSDW + AVPMOVSQB + AVPMOVSQD + AVPMOVSQW + AVPMOVSWB + AVPMOVSXBD + AVPMOVSXBQ + AVPMOVSXBW + AVPMOVSXDQ + AVPMOVSXWD + AVPMOVSXWQ + AVPMOVUSDB + AVPMOVUSDW + AVPMOVUSQB + AVPMOVUSQD + AVPMOVUSQW + AVPMOVUSWB + AVPMOVW2M + AVPMOVWB + AVPMOVZXBD + AVPMOVZXBQ + AVPMOVZXBW + AVPMOVZXDQ + AVPMOVZXWD + AVPMOVZXWQ + AVPMULDQ + AVPMULHRSW + AVPMULHUW + AVPMULHW + AVPMULLD + AVPMULLQ + AVPMULLW + AVPMULTISHIFTQB + AVPMULUDQ + AVPOPCNTB + AVPOPCNTD + AVPOPCNTQ + AVPOPCNTW + AVPOR + AVPORD + AVPORQ + AVPROLD + AVPROLQ + AVPROLVD + AVPROLVQ + AVPRORD + AVPRORQ + AVPRORVD + AVPRORVQ + AVPSADBW + AVPSCATTERDD + AVPSCATTERDQ + AVPSCATTERQD + AVPSCATTERQQ + AVPSHLDD + AVPSHLDQ + AVPSHLDVD + AVPSHLDVQ + AVPSHLDVW + AVPSHLDW + AVPSHRDD + AVPSHRDQ + AVPSHRDVD + AVPSHRDVQ + AVPSHRDVW + AVPSHRDW + AVPSHUFB + AVPSHUFBITQMB + AVPSHUFD + AVPSHUFHW + AVPSHUFLW + AVPSIGNB + AVPSIGND + AVPSIGNW + AVPSLLD + AVPSLLDQ + AVPSLLQ + AVPSLLVD + AVPSLLVQ + AVPSLLVW + AVPSLLW + AVPSRAD + AVPSRAQ + AVPSRAVD + AVPSRAVQ + AVPSRAVW + AVPSRAW + AVPSRLD + AVPSRLDQ + AVPSRLQ + AVPSRLVD + AVPSRLVQ + AVPSRLVW + AVPSRLW + AVPSUBB + AVPSUBD + AVPSUBQ + AVPSUBSB + AVPSUBSW + AVPSUBUSB + AVPSUBUSW + AVPSUBW + AVPTERNLOGD + AVPTERNLOGQ + AVPTEST + AVPTESTMB + AVPTESTMD + AVPTESTMQ + AVPTESTMW + AVPTESTNMB + AVPTESTNMD + AVPTESTNMQ + AVPTESTNMW + AVPUNPCKHBW + AVPUNPCKHDQ + AVPUNPCKHQDQ + AVPUNPCKHWD + AVPUNPCKLBW + AVPUNPCKLDQ + AVPUNPCKLQDQ + AVPUNPCKLWD + AVPXOR + AVPXORD + AVPXORQ + AVRANGEPD + AVRANGEPS + AVRANGESD + AVRANGESS + AVRCP14PD + AVRCP14PS + AVRCP14SD + AVRCP14SS + AVRCP28PD + AVRCP28PS + AVRCP28SD + AVRCP28SS + AVRCPPS + AVRCPSS + AVREDUCEPD + AVREDUCEPS + AVREDUCESD + AVREDUCESS + AVRNDSCALEPD + AVRNDSCALEPS + AVRNDSCALESD + AVRNDSCALESS + AVROUNDPD + AVROUNDPS + AVROUNDSD + AVROUNDSS + AVRSQRT14PD + AVRSQRT14PS + AVRSQRT14SD + AVRSQRT14SS + AVRSQRT28PD + AVRSQRT28PS + AVRSQRT28SD + AVRSQRT28SS + AVRSQRTPS + AVRSQRTSS + AVSCALEFPD + AVSCALEFPS + AVSCALEFSD + AVSCALEFSS + AVSCATTERDPD + AVSCATTERDPS + AVSCATTERPF0DPD + AVSCATTERPF0DPS + AVSCATTERPF0QPD + AVSCATTERPF0QPS + AVSCATTERPF1DPD + AVSCATTERPF1DPS + AVSCATTERPF1QPD + AVSCATTERPF1QPS + AVSCATTERQPD + AVSCATTERQPS + AVSHUFF32X4 + AVSHUFF64X2 + AVSHUFI32X4 + AVSHUFI64X2 + AVSHUFPD + AVSHUFPS + AVSQRTPD + AVSQRTPS + AVSQRTSD + AVSQRTSS + AVSTMXCSR + AVSUBPD + AVSUBPS + AVSUBSD + AVSUBSS + AVTESTPD + AVTESTPS + AVUCOMISD + AVUCOMISS + AVUNPCKHPD + AVUNPCKHPS + AVUNPCKLPD + AVUNPCKLPS + AVXORPD + AVXORPS + AVZEROALL + AVZEROUPPER + AWAIT + AWBINVD + AWORD + AWRFSBASEL + AWRFSBASEQ + AWRGSBASEL + AWRGSBASEQ + AWRMSR + AWRPKRU + AXABORT + AXACQUIRE + AXADDB + AXADDL + AXADDQ + AXADDW + AXBEGIN + AXCHGB + AXCHGL + AXCHGQ + AXCHGW + AXEND + AXGETBV + AXLAT + AXORB + AXORL + AXORPD + AXORPS + AXORQ + AXORW + AXRELEASE + AXRSTOR + AXRSTOR64 + AXRSTORS + AXRSTORS64 + AXSAVE + AXSAVE64 + AXSAVEC + AXSAVEC64 + AXSAVEOPT + AXSAVEOPT64 + AXSAVES + AXSAVES64 + AXSETBV + AXTEST + ALAST +) diff --git a/src/cmd/internal/obj/x86/anames.go b/src/cmd/internal/obj/x86/anames.go new file mode 100644 index 0000000..7869e36 --- /dev/null +++ b/src/cmd/internal/obj/x86/anames.go @@ -0,0 +1,1607 @@ +// Code generated by stringer -i aenum.go -o anames.go -p x86; DO NOT EDIT. + +package x86 + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "AAA", + "AAD", + "AAM", + "AAS", + "ADCB", + "ADCL", + "ADCQ", + "ADCW", + "ADCXL", + "ADCXQ", + "ADDB", + "ADDL", + "ADDPD", + "ADDPS", + "ADDQ", + "ADDSD", + "ADDSS", + "ADDSUBPD", + "ADDSUBPS", + "ADDW", + "ADJSP", + "ADOXL", + "ADOXQ", + "AESDEC", + "AESDECLAST", + "AESENC", + "AESENCLAST", + "AESIMC", + "AESKEYGENASSIST", + "ANDB", + "ANDL", + "ANDNL", + "ANDNPD", + "ANDNPS", + "ANDNQ", + "ANDPD", + "ANDPS", + "ANDQ", + "ANDW", + "ARPL", + "BEXTRL", + "BEXTRQ", + "BLENDPD", + "BLENDPS", + "BLENDVPD", + "BLENDVPS", + "BLSIL", + "BLSIQ", + "BLSMSKL", + "BLSMSKQ", + "BLSRL", + "BLSRQ", + "BOUNDL", + "BOUNDW", + "BSFL", + "BSFQ", + "BSFW", + "BSRL", + "BSRQ", + "BSRW", + "BSWAPL", + "BSWAPQ", + "BTCL", + "BTCQ", + "BTCW", + "BTL", + "BTQ", + "BTRL", + "BTRQ", + "BTRW", + "BTSL", + "BTSQ", + "BTSW", + "BTW", + "BYTE", + "BZHIL", + "BZHIQ", + "CBW", + "CDQ", + "CDQE", + "CLAC", + "CLC", + "CLD", + "CLDEMOTE", + "CLFLUSH", + "CLFLUSHOPT", + "CLI", + "CLTS", + "CLWB", + "CMC", + "CMOVLCC", + "CMOVLCS", + "CMOVLEQ", + "CMOVLGE", + "CMOVLGT", + "CMOVLHI", + "CMOVLLE", + "CMOVLLS", + "CMOVLLT", + "CMOVLMI", + "CMOVLNE", + "CMOVLOC", + "CMOVLOS", + "CMOVLPC", + "CMOVLPL", + "CMOVLPS", + "CMOVQCC", + "CMOVQCS", + "CMOVQEQ", + "CMOVQGE", + "CMOVQGT", + "CMOVQHI", + "CMOVQLE", + "CMOVQLS", + "CMOVQLT", + "CMOVQMI", + "CMOVQNE", + "CMOVQOC", + "CMOVQOS", + "CMOVQPC", + "CMOVQPL", + "CMOVQPS", + "CMOVWCC", + "CMOVWCS", + "CMOVWEQ", + "CMOVWGE", + "CMOVWGT", + "CMOVWHI", + "CMOVWLE", + "CMOVWLS", + "CMOVWLT", + "CMOVWMI", + "CMOVWNE", + "CMOVWOC", + "CMOVWOS", + "CMOVWPC", + "CMOVWPL", + "CMOVWPS", + "CMPB", + "CMPL", + "CMPPD", + "CMPPS", + "CMPQ", + "CMPSB", + "CMPSD", + "CMPSL", + "CMPSQ", + "CMPSS", + "CMPSW", + "CMPW", + "CMPXCHG16B", + "CMPXCHG8B", + "CMPXCHGB", + "CMPXCHGL", + "CMPXCHGQ", + "CMPXCHGW", + "COMISD", + "COMISS", + "CPUID", + "CQO", + "CRC32B", + "CRC32L", + "CRC32Q", + "CRC32W", + "CVTPD2PL", + "CVTPD2PS", + "CVTPL2PD", + "CVTPL2PS", + "CVTPS2PD", + "CVTPS2PL", + "CVTSD2SL", + "CVTSD2SQ", + "CVTSD2SS", + "CVTSL2SD", + "CVTSL2SS", + "CVTSQ2SD", + "CVTSQ2SS", + "CVTSS2SD", + "CVTSS2SL", + "CVTSS2SQ", + "CVTTPD2PL", + "CVTTPS2PL", + "CVTTSD2SL", + "CVTTSD2SQ", + "CVTTSS2SL", + "CVTTSS2SQ", + "CWD", + "CWDE", + "DAA", + "DAS", + "DECB", + "DECL", + "DECQ", + "DECW", + "DIVB", + "DIVL", + "DIVPD", + "DIVPS", + "DIVQ", + "DIVSD", + "DIVSS", + "DIVW", + "DPPD", + "DPPS", + "EMMS", + "ENTER", + "EXTRACTPS", + "F2XM1", + "FABS", + "FADDD", + "FADDDP", + "FADDF", + "FADDL", + "FADDW", + "FBLD", + "FBSTP", + "FCHS", + "FCLEX", + "FCMOVB", + "FCMOVBE", + "FCMOVCC", + "FCMOVCS", + "FCMOVE", + "FCMOVEQ", + "FCMOVHI", + "FCMOVLS", + "FCMOVNB", + "FCMOVNBE", + "FCMOVNE", + "FCMOVNU", + "FCMOVU", + "FCMOVUN", + "FCOMD", + "FCOMDP", + "FCOMDPP", + "FCOMF", + "FCOMFP", + "FCOMI", + "FCOMIP", + "FCOML", + "FCOMLP", + "FCOMW", + "FCOMWP", + "FCOS", + "FDECSTP", + "FDIVD", + "FDIVDP", + "FDIVF", + "FDIVL", + "FDIVRD", + "FDIVRDP", + "FDIVRF", + "FDIVRL", + "FDIVRW", + "FDIVW", + "FFREE", + "FINCSTP", + "FINIT", + "FLD1", + "FLDCW", + "FLDENV", + "FLDL2E", + "FLDL2T", + "FLDLG2", + "FLDLN2", + "FLDPI", + "FLDZ", + "FMOVB", + "FMOVBP", + "FMOVD", + "FMOVDP", + "FMOVF", + "FMOVFP", + "FMOVL", + "FMOVLP", + "FMOVV", + "FMOVVP", + "FMOVW", + "FMOVWP", + "FMOVX", + "FMOVXP", + "FMULD", + "FMULDP", + "FMULF", + "FMULL", + "FMULW", + "FNOP", + "FPATAN", + "FPREM", + "FPREM1", + "FPTAN", + "FRNDINT", + "FRSTOR", + "FSAVE", + "FSCALE", + "FSIN", + "FSINCOS", + "FSQRT", + "FSTCW", + "FSTENV", + "FSTSW", + "FSUBD", + "FSUBDP", + "FSUBF", + "FSUBL", + "FSUBRD", + "FSUBRDP", + "FSUBRF", + "FSUBRL", + "FSUBRW", + "FSUBW", + "FTST", + "FUCOM", + "FUCOMI", + "FUCOMIP", + "FUCOMP", + "FUCOMPP", + "FXAM", + "FXCHD", + "FXRSTOR", + "FXRSTOR64", + "FXSAVE", + "FXSAVE64", + "FXTRACT", + "FYL2X", + "FYL2XP1", + "HADDPD", + "HADDPS", + "HLT", + "HSUBPD", + "HSUBPS", + "ICEBP", + "IDIVB", + "IDIVL", + "IDIVQ", + "IDIVW", + "IMUL3L", + "IMUL3Q", + "IMUL3W", + "IMULB", + "IMULL", + "IMULQ", + "IMULW", + "INB", + "INCB", + "INCL", + "INCQ", + "INCW", + "INL", + "INSB", + "INSERTPS", + "INSL", + "INSW", + "INT", + "INTO", + "INVD", + "INVLPG", + "INVPCID", + "INW", + "IRETL", + "IRETQ", + "IRETW", + "JCC", + "JCS", + "JCXZL", + "JCXZQ", + "JCXZW", + "JEQ", + "JGE", + "JGT", + "JHI", + "JLE", + "JLS", + "JLT", + "JMI", + "JNE", + "JOC", + "JOS", + "JPC", + "JPL", + "JPS", + "KADDB", + "KADDD", + "KADDQ", + "KADDW", + "KANDB", + "KANDD", + "KANDNB", + "KANDND", + "KANDNQ", + "KANDNW", + "KANDQ", + "KANDW", + "KMOVB", + "KMOVD", + "KMOVQ", + "KMOVW", + "KNOTB", + "KNOTD", + "KNOTQ", + "KNOTW", + "KORB", + "KORD", + "KORQ", + "KORTESTB", + "KORTESTD", + "KORTESTQ", + "KORTESTW", + "KORW", + "KSHIFTLB", + "KSHIFTLD", + "KSHIFTLQ", + "KSHIFTLW", + "KSHIFTRB", + "KSHIFTRD", + "KSHIFTRQ", + "KSHIFTRW", + "KTESTB", + "KTESTD", + "KTESTQ", + "KTESTW", + "KUNPCKBW", + "KUNPCKDQ", + "KUNPCKWD", + "KXNORB", + "KXNORD", + "KXNORQ", + "KXNORW", + "KXORB", + "KXORD", + "KXORQ", + "KXORW", + "LAHF", + "LARL", + "LARQ", + "LARW", + "LDDQU", + "LDMXCSR", + "LEAL", + "LEAQ", + "LEAVEL", + "LEAVEQ", + "LEAVEW", + "LEAW", + "LFENCE", + "LFSL", + "LFSQ", + "LFSW", + "LGDT", + "LGSL", + "LGSQ", + "LGSW", + "LIDT", + "LLDT", + "LMSW", + "LOCK", + "LODSB", + "LODSL", + "LODSQ", + "LODSW", + "LONG", + "LOOP", + "LOOPEQ", + "LOOPNE", + "LSLL", + "LSLQ", + "LSLW", + "LSSL", + "LSSQ", + "LSSW", + "LTR", + "LZCNTL", + "LZCNTQ", + "LZCNTW", + "MASKMOVOU", + "MASKMOVQ", + "MAXPD", + "MAXPS", + "MAXSD", + "MAXSS", + "MFENCE", + "MINPD", + "MINPS", + "MINSD", + "MINSS", + "MONITOR", + "MOVAPD", + "MOVAPS", + "MOVB", + "MOVBEL", + "MOVBEQ", + "MOVBEW", + "MOVBLSX", + "MOVBLZX", + "MOVBQSX", + "MOVBQZX", + "MOVBWSX", + "MOVBWZX", + "MOVDDUP", + "MOVHLPS", + "MOVHPD", + "MOVHPS", + "MOVL", + "MOVLHPS", + "MOVLPD", + "MOVLPS", + "MOVLQSX", + "MOVLQZX", + "MOVMSKPD", + "MOVMSKPS", + "MOVNTDQA", + "MOVNTIL", + "MOVNTIQ", + "MOVNTO", + "MOVNTPD", + "MOVNTPS", + "MOVNTQ", + "MOVO", + "MOVOU", + "MOVQ", + "MOVQL", + "MOVQOZX", + "MOVSB", + "MOVSD", + "MOVSHDUP", + "MOVSL", + "MOVSLDUP", + "MOVSQ", + "MOVSS", + "MOVSW", + "MOVSWW", + "MOVUPD", + "MOVUPS", + "MOVW", + "MOVWLSX", + "MOVWLZX", + "MOVWQSX", + "MOVWQZX", + "MOVZWW", + "MPSADBW", + "MULB", + "MULL", + "MULPD", + "MULPS", + "MULQ", + "MULSD", + "MULSS", + "MULW", + "MULXL", + "MULXQ", + "MWAIT", + "NEGB", + "NEGL", + "NEGQ", + "NEGW", + "NOPL", + "NOPW", + "NOTB", + "NOTL", + "NOTQ", + "NOTW", + "ORB", + "ORL", + "ORPD", + "ORPS", + "ORQ", + "ORW", + "OUTB", + "OUTL", + "OUTSB", + "OUTSL", + "OUTSW", + "OUTW", + "PABSB", + "PABSD", + "PABSW", + "PACKSSLW", + "PACKSSWB", + "PACKUSDW", + "PACKUSWB", + "PADDB", + "PADDL", + "PADDQ", + "PADDSB", + "PADDSW", + "PADDUSB", + "PADDUSW", + "PADDW", + "PALIGNR", + "PAND", + "PANDN", + "PAUSE", + "PAVGB", + "PAVGW", + "PBLENDVB", + "PBLENDW", + "PCLMULQDQ", + "PCMPEQB", + "PCMPEQL", + "PCMPEQQ", + "PCMPEQW", + "PCMPESTRI", + "PCMPESTRM", + "PCMPGTB", + "PCMPGTL", + "PCMPGTQ", + "PCMPGTW", + "PCMPISTRI", + "PCMPISTRM", + "PDEPL", + "PDEPQ", + "PEXTL", + "PEXTQ", + "PEXTRB", + "PEXTRD", + "PEXTRQ", + "PEXTRW", + "PHADDD", + "PHADDSW", + "PHADDW", + "PHMINPOSUW", + "PHSUBD", + "PHSUBSW", + "PHSUBW", + "PINSRB", + "PINSRD", + "PINSRQ", + "PINSRW", + "PMADDUBSW", + "PMADDWL", + "PMAXSB", + "PMAXSD", + "PMAXSW", + "PMAXUB", + "PMAXUD", + "PMAXUW", + "PMINSB", + "PMINSD", + "PMINSW", + "PMINUB", + "PMINUD", + "PMINUW", + "PMOVMSKB", + "PMOVSXBD", + "PMOVSXBQ", + "PMOVSXBW", + "PMOVSXDQ", + "PMOVSXWD", + "PMOVSXWQ", + "PMOVZXBD", + "PMOVZXBQ", + "PMOVZXBW", + "PMOVZXDQ", + "PMOVZXWD", + "PMOVZXWQ", + "PMULDQ", + "PMULHRSW", + "PMULHUW", + "PMULHW", + "PMULLD", + "PMULLW", + "PMULULQ", + "POPAL", + "POPAW", + "POPCNTL", + "POPCNTQ", + "POPCNTW", + "POPFL", + "POPFQ", + "POPFW", + "POPL", + "POPQ", + "POPW", + "POR", + "PREFETCHNTA", + "PREFETCHT0", + "PREFETCHT1", + "PREFETCHT2", + "PSADBW", + "PSHUFB", + "PSHUFD", + "PSHUFHW", + "PSHUFL", + "PSHUFLW", + "PSHUFW", + "PSIGNB", + "PSIGND", + "PSIGNW", + "PSLLL", + "PSLLO", + "PSLLQ", + "PSLLW", + "PSRAL", + "PSRAW", + "PSRLL", + "PSRLO", + "PSRLQ", + "PSRLW", + "PSUBB", + "PSUBL", + "PSUBQ", + "PSUBSB", + "PSUBSW", + "PSUBUSB", + "PSUBUSW", + "PSUBW", + "PTEST", + "PUNPCKHBW", + "PUNPCKHLQ", + "PUNPCKHQDQ", + "PUNPCKHWL", + "PUNPCKLBW", + "PUNPCKLLQ", + "PUNPCKLQDQ", + "PUNPCKLWL", + "PUSHAL", + "PUSHAW", + "PUSHFL", + "PUSHFQ", + "PUSHFW", + "PUSHL", + "PUSHQ", + "PUSHW", + "PXOR", + "QUAD", + "RCLB", + "RCLL", + "RCLQ", + "RCLW", + "RCPPS", + "RCPSS", + "RCRB", + "RCRL", + "RCRQ", + "RCRW", + "RDFSBASEL", + "RDFSBASEQ", + "RDGSBASEL", + "RDGSBASEQ", + "RDMSR", + "RDPKRU", + "RDPMC", + "RDRANDL", + "RDRANDQ", + "RDRANDW", + "RDSEEDL", + "RDSEEDQ", + "RDSEEDW", + "RDTSC", + "RDTSCP", + "REP", + "REPN", + "RETFL", + "RETFQ", + "RETFW", + "ROLB", + "ROLL", + "ROLQ", + "ROLW", + "RORB", + "RORL", + "RORQ", + "RORW", + "RORXL", + "RORXQ", + "ROUNDPD", + "ROUNDPS", + "ROUNDSD", + "ROUNDSS", + "RSM", + "RSQRTPS", + "RSQRTSS", + "SAHF", + "SALB", + "SALL", + "SALQ", + "SALW", + "SARB", + "SARL", + "SARQ", + "SARW", + "SARXL", + "SARXQ", + "SBBB", + "SBBL", + "SBBQ", + "SBBW", + "SCASB", + "SCASL", + "SCASQ", + "SCASW", + "SETCC", + "SETCS", + "SETEQ", + "SETGE", + "SETGT", + "SETHI", + "SETLE", + "SETLS", + "SETLT", + "SETMI", + "SETNE", + "SETOC", + "SETOS", + "SETPC", + "SETPL", + "SETPS", + "SFENCE", + "SGDT", + "SHA1MSG1", + "SHA1MSG2", + "SHA1NEXTE", + "SHA1RNDS4", + "SHA256MSG1", + "SHA256MSG2", + "SHA256RNDS2", + "SHLB", + "SHLL", + "SHLQ", + "SHLW", + "SHLXL", + "SHLXQ", + "SHRB", + "SHRL", + "SHRQ", + "SHRW", + "SHRXL", + "SHRXQ", + "SHUFPD", + "SHUFPS", + "SIDT", + "SLDTL", + "SLDTQ", + "SLDTW", + "SMSWL", + "SMSWQ", + "SMSWW", + "SQRTPD", + "SQRTPS", + "SQRTSD", + "SQRTSS", + "STAC", + "STC", + "STD", + "STI", + "STMXCSR", + "STOSB", + "STOSL", + "STOSQ", + "STOSW", + "STRL", + "STRQ", + "STRW", + "SUBB", + "SUBL", + "SUBPD", + "SUBPS", + "SUBQ", + "SUBSD", + "SUBSS", + "SUBW", + "SWAPGS", + "SYSCALL", + "SYSENTER", + "SYSENTER64", + "SYSEXIT", + "SYSEXIT64", + "SYSRET", + "TESTB", + "TESTL", + "TESTQ", + "TESTW", + "TPAUSE", + "TZCNTL", + "TZCNTQ", + "TZCNTW", + "UCOMISD", + "UCOMISS", + "UD1", + "UD2", + "UMWAIT", + "UNPCKHPD", + "UNPCKHPS", + "UNPCKLPD", + "UNPCKLPS", + "UMONITOR", + "V4FMADDPS", + "V4FMADDSS", + "V4FNMADDPS", + "V4FNMADDSS", + "VADDPD", + "VADDPS", + "VADDSD", + "VADDSS", + "VADDSUBPD", + "VADDSUBPS", + "VAESDEC", + "VAESDECLAST", + "VAESENC", + "VAESENCLAST", + "VAESIMC", + "VAESKEYGENASSIST", + "VALIGND", + "VALIGNQ", + "VANDNPD", + "VANDNPS", + "VANDPD", + "VANDPS", + "VBLENDMPD", + "VBLENDMPS", + "VBLENDPD", + "VBLENDPS", + "VBLENDVPD", + "VBLENDVPS", + "VBROADCASTF128", + "VBROADCASTF32X2", + "VBROADCASTF32X4", + "VBROADCASTF32X8", + "VBROADCASTF64X2", + "VBROADCASTF64X4", + "VBROADCASTI128", + "VBROADCASTI32X2", + "VBROADCASTI32X4", + "VBROADCASTI32X8", + "VBROADCASTI64X2", + "VBROADCASTI64X4", + "VBROADCASTSD", + "VBROADCASTSS", + "VCMPPD", + "VCMPPS", + "VCMPSD", + "VCMPSS", + "VCOMISD", + "VCOMISS", + "VCOMPRESSPD", + "VCOMPRESSPS", + "VCVTDQ2PD", + "VCVTDQ2PS", + "VCVTPD2DQ", + "VCVTPD2DQX", + "VCVTPD2DQY", + "VCVTPD2PS", + "VCVTPD2PSX", + "VCVTPD2PSY", + "VCVTPD2QQ", + "VCVTPD2UDQ", + "VCVTPD2UDQX", + "VCVTPD2UDQY", + "VCVTPD2UQQ", + "VCVTPH2PS", + "VCVTPS2DQ", + "VCVTPS2PD", + "VCVTPS2PH", + "VCVTPS2QQ", + "VCVTPS2UDQ", + "VCVTPS2UQQ", + "VCVTQQ2PD", + "VCVTQQ2PS", + "VCVTQQ2PSX", + "VCVTQQ2PSY", + "VCVTSD2SI", + "VCVTSD2SIQ", + "VCVTSD2SS", + "VCVTSD2USI", + "VCVTSD2USIL", + "VCVTSD2USIQ", + "VCVTSI2SDL", + "VCVTSI2SDQ", + "VCVTSI2SSL", + "VCVTSI2SSQ", + "VCVTSS2SD", + "VCVTSS2SI", + "VCVTSS2SIQ", + "VCVTSS2USI", + "VCVTSS2USIL", + "VCVTSS2USIQ", + "VCVTTPD2DQ", + "VCVTTPD2DQX", + "VCVTTPD2DQY", + "VCVTTPD2QQ", + "VCVTTPD2UDQ", + "VCVTTPD2UDQX", + "VCVTTPD2UDQY", + "VCVTTPD2UQQ", + "VCVTTPS2DQ", + "VCVTTPS2QQ", + "VCVTTPS2UDQ", + "VCVTTPS2UQQ", + "VCVTTSD2SI", + "VCVTTSD2SIQ", + "VCVTTSD2USI", + "VCVTTSD2USIL", + "VCVTTSD2USIQ", + "VCVTTSS2SI", + "VCVTTSS2SIQ", + "VCVTTSS2USI", + "VCVTTSS2USIL", + "VCVTTSS2USIQ", + "VCVTUDQ2PD", + "VCVTUDQ2PS", + "VCVTUQQ2PD", + "VCVTUQQ2PS", + "VCVTUQQ2PSX", + "VCVTUQQ2PSY", + "VCVTUSI2SD", + "VCVTUSI2SDL", + "VCVTUSI2SDQ", + "VCVTUSI2SS", + "VCVTUSI2SSL", + "VCVTUSI2SSQ", + "VDBPSADBW", + "VDIVPD", + "VDIVPS", + "VDIVSD", + "VDIVSS", + "VDPPD", + "VDPPS", + "VERR", + "VERW", + "VEXP2PD", + "VEXP2PS", + "VEXPANDPD", + "VEXPANDPS", + "VEXTRACTF128", + "VEXTRACTF32X4", + "VEXTRACTF32X8", + "VEXTRACTF64X2", + "VEXTRACTF64X4", + "VEXTRACTI128", + "VEXTRACTI32X4", + "VEXTRACTI32X8", + "VEXTRACTI64X2", + "VEXTRACTI64X4", + "VEXTRACTPS", + "VFIXUPIMMPD", + "VFIXUPIMMPS", + "VFIXUPIMMSD", + "VFIXUPIMMSS", + "VFMADD132PD", + "VFMADD132PS", + "VFMADD132SD", + "VFMADD132SS", + "VFMADD213PD", + "VFMADD213PS", + "VFMADD213SD", + "VFMADD213SS", + "VFMADD231PD", + "VFMADD231PS", + "VFMADD231SD", + "VFMADD231SS", + "VFMADDSUB132PD", + "VFMADDSUB132PS", + "VFMADDSUB213PD", + "VFMADDSUB213PS", + "VFMADDSUB231PD", + "VFMADDSUB231PS", + "VFMSUB132PD", + "VFMSUB132PS", + "VFMSUB132SD", + "VFMSUB132SS", + "VFMSUB213PD", + "VFMSUB213PS", + "VFMSUB213SD", + "VFMSUB213SS", + "VFMSUB231PD", + "VFMSUB231PS", + "VFMSUB231SD", + "VFMSUB231SS", + "VFMSUBADD132PD", + "VFMSUBADD132PS", + "VFMSUBADD213PD", + "VFMSUBADD213PS", + "VFMSUBADD231PD", + "VFMSUBADD231PS", + "VFNMADD132PD", + "VFNMADD132PS", + "VFNMADD132SD", + "VFNMADD132SS", + "VFNMADD213PD", + "VFNMADD213PS", + "VFNMADD213SD", + "VFNMADD213SS", + "VFNMADD231PD", + "VFNMADD231PS", + "VFNMADD231SD", + "VFNMADD231SS", + "VFNMSUB132PD", + "VFNMSUB132PS", + "VFNMSUB132SD", + "VFNMSUB132SS", + "VFNMSUB213PD", + "VFNMSUB213PS", + "VFNMSUB213SD", + "VFNMSUB213SS", + "VFNMSUB231PD", + "VFNMSUB231PS", + "VFNMSUB231SD", + "VFNMSUB231SS", + "VFPCLASSPD", + "VFPCLASSPDX", + "VFPCLASSPDY", + "VFPCLASSPDZ", + "VFPCLASSPS", + "VFPCLASSPSX", + "VFPCLASSPSY", + "VFPCLASSPSZ", + "VFPCLASSSD", + "VFPCLASSSS", + "VGATHERDPD", + "VGATHERDPS", + "VGATHERPF0DPD", + "VGATHERPF0DPS", + "VGATHERPF0QPD", + "VGATHERPF0QPS", + "VGATHERPF1DPD", + "VGATHERPF1DPS", + "VGATHERPF1QPD", + "VGATHERPF1QPS", + "VGATHERQPD", + "VGATHERQPS", + "VGETEXPPD", + "VGETEXPPS", + "VGETEXPSD", + "VGETEXPSS", + "VGETMANTPD", + "VGETMANTPS", + "VGETMANTSD", + "VGETMANTSS", + "VGF2P8AFFINEINVQB", + "VGF2P8AFFINEQB", + "VGF2P8MULB", + "VHADDPD", + "VHADDPS", + "VHSUBPD", + "VHSUBPS", + "VINSERTF128", + "VINSERTF32X4", + "VINSERTF32X8", + "VINSERTF64X2", + "VINSERTF64X4", + "VINSERTI128", + "VINSERTI32X4", + "VINSERTI32X8", + "VINSERTI64X2", + "VINSERTI64X4", + "VINSERTPS", + "VLDDQU", + "VLDMXCSR", + "VMASKMOVDQU", + "VMASKMOVPD", + "VMASKMOVPS", + "VMAXPD", + "VMAXPS", + "VMAXSD", + "VMAXSS", + "VMINPD", + "VMINPS", + "VMINSD", + "VMINSS", + "VMOVAPD", + "VMOVAPS", + "VMOVD", + "VMOVDDUP", + "VMOVDQA", + "VMOVDQA32", + "VMOVDQA64", + "VMOVDQU", + "VMOVDQU16", + "VMOVDQU32", + "VMOVDQU64", + "VMOVDQU8", + "VMOVHLPS", + "VMOVHPD", + "VMOVHPS", + "VMOVLHPS", + "VMOVLPD", + "VMOVLPS", + "VMOVMSKPD", + "VMOVMSKPS", + "VMOVNTDQ", + "VMOVNTDQA", + "VMOVNTPD", + "VMOVNTPS", + "VMOVQ", + "VMOVSD", + "VMOVSHDUP", + "VMOVSLDUP", + "VMOVSS", + "VMOVUPD", + "VMOVUPS", + "VMPSADBW", + "VMULPD", + "VMULPS", + "VMULSD", + "VMULSS", + "VORPD", + "VORPS", + "VP4DPWSSD", + "VP4DPWSSDS", + "VPABSB", + "VPABSD", + "VPABSQ", + "VPABSW", + "VPACKSSDW", + "VPACKSSWB", + "VPACKUSDW", + "VPACKUSWB", + "VPADDB", + "VPADDD", + "VPADDQ", + "VPADDSB", + "VPADDSW", + "VPADDUSB", + "VPADDUSW", + "VPADDW", + "VPALIGNR", + "VPAND", + "VPANDD", + "VPANDN", + "VPANDND", + "VPANDNQ", + "VPANDQ", + "VPAVGB", + "VPAVGW", + "VPBLENDD", + "VPBLENDMB", + "VPBLENDMD", + "VPBLENDMQ", + "VPBLENDMW", + "VPBLENDVB", + "VPBLENDW", + "VPBROADCASTB", + "VPBROADCASTD", + "VPBROADCASTMB2Q", + "VPBROADCASTMW2D", + "VPBROADCASTQ", + "VPBROADCASTW", + "VPCLMULQDQ", + "VPCMPB", + "VPCMPD", + "VPCMPEQB", + "VPCMPEQD", + "VPCMPEQQ", + "VPCMPEQW", + "VPCMPESTRI", + "VPCMPESTRM", + "VPCMPGTB", + "VPCMPGTD", + "VPCMPGTQ", + "VPCMPGTW", + "VPCMPISTRI", + "VPCMPISTRM", + "VPCMPQ", + "VPCMPUB", + "VPCMPUD", + "VPCMPUQ", + "VPCMPUW", + "VPCMPW", + "VPCOMPRESSB", + "VPCOMPRESSD", + "VPCOMPRESSQ", + "VPCOMPRESSW", + "VPCONFLICTD", + "VPCONFLICTQ", + "VPDPBUSD", + "VPDPBUSDS", + "VPDPWSSD", + "VPDPWSSDS", + "VPERM2F128", + "VPERM2I128", + "VPERMB", + "VPERMD", + "VPERMI2B", + "VPERMI2D", + "VPERMI2PD", + "VPERMI2PS", + "VPERMI2Q", + "VPERMI2W", + "VPERMILPD", + "VPERMILPS", + "VPERMPD", + "VPERMPS", + "VPERMQ", + "VPERMT2B", + "VPERMT2D", + "VPERMT2PD", + "VPERMT2PS", + "VPERMT2Q", + "VPERMT2W", + "VPERMW", + "VPEXPANDB", + "VPEXPANDD", + "VPEXPANDQ", + "VPEXPANDW", + "VPEXTRB", + "VPEXTRD", + "VPEXTRQ", + "VPEXTRW", + "VPGATHERDD", + "VPGATHERDQ", + "VPGATHERQD", + "VPGATHERQQ", + "VPHADDD", + "VPHADDSW", + "VPHADDW", + "VPHMINPOSUW", + "VPHSUBD", + "VPHSUBSW", + "VPHSUBW", + "VPINSRB", + "VPINSRD", + "VPINSRQ", + "VPINSRW", + "VPLZCNTD", + "VPLZCNTQ", + "VPMADD52HUQ", + "VPMADD52LUQ", + "VPMADDUBSW", + "VPMADDWD", + "VPMASKMOVD", + "VPMASKMOVQ", + "VPMAXSB", + "VPMAXSD", + "VPMAXSQ", + "VPMAXSW", + "VPMAXUB", + "VPMAXUD", + "VPMAXUQ", + "VPMAXUW", + "VPMINSB", + "VPMINSD", + "VPMINSQ", + "VPMINSW", + "VPMINUB", + "VPMINUD", + "VPMINUQ", + "VPMINUW", + "VPMOVB2M", + "VPMOVD2M", + "VPMOVDB", + "VPMOVDW", + "VPMOVM2B", + "VPMOVM2D", + "VPMOVM2Q", + "VPMOVM2W", + "VPMOVMSKB", + "VPMOVQ2M", + "VPMOVQB", + "VPMOVQD", + "VPMOVQW", + "VPMOVSDB", + "VPMOVSDW", + "VPMOVSQB", + "VPMOVSQD", + "VPMOVSQW", + "VPMOVSWB", + "VPMOVSXBD", + "VPMOVSXBQ", + "VPMOVSXBW", + "VPMOVSXDQ", + "VPMOVSXWD", + "VPMOVSXWQ", + "VPMOVUSDB", + "VPMOVUSDW", + "VPMOVUSQB", + "VPMOVUSQD", + "VPMOVUSQW", + "VPMOVUSWB", + "VPMOVW2M", + "VPMOVWB", + "VPMOVZXBD", + "VPMOVZXBQ", + "VPMOVZXBW", + "VPMOVZXDQ", + "VPMOVZXWD", + "VPMOVZXWQ", + "VPMULDQ", + "VPMULHRSW", + "VPMULHUW", + "VPMULHW", + "VPMULLD", + "VPMULLQ", + "VPMULLW", + "VPMULTISHIFTQB", + "VPMULUDQ", + "VPOPCNTB", + "VPOPCNTD", + "VPOPCNTQ", + "VPOPCNTW", + "VPOR", + "VPORD", + "VPORQ", + "VPROLD", + "VPROLQ", + "VPROLVD", + "VPROLVQ", + "VPRORD", + "VPRORQ", + "VPRORVD", + "VPRORVQ", + "VPSADBW", + "VPSCATTERDD", + "VPSCATTERDQ", + "VPSCATTERQD", + "VPSCATTERQQ", + "VPSHLDD", + "VPSHLDQ", + "VPSHLDVD", + "VPSHLDVQ", + "VPSHLDVW", + "VPSHLDW", + "VPSHRDD", + "VPSHRDQ", + "VPSHRDVD", + "VPSHRDVQ", + "VPSHRDVW", + "VPSHRDW", + "VPSHUFB", + "VPSHUFBITQMB", + "VPSHUFD", + "VPSHUFHW", + "VPSHUFLW", + "VPSIGNB", + "VPSIGND", + "VPSIGNW", + "VPSLLD", + "VPSLLDQ", + "VPSLLQ", + "VPSLLVD", + "VPSLLVQ", + "VPSLLVW", + "VPSLLW", + "VPSRAD", + "VPSRAQ", + "VPSRAVD", + "VPSRAVQ", + "VPSRAVW", + "VPSRAW", + "VPSRLD", + "VPSRLDQ", + "VPSRLQ", + "VPSRLVD", + "VPSRLVQ", + "VPSRLVW", + "VPSRLW", + "VPSUBB", + "VPSUBD", + "VPSUBQ", + "VPSUBSB", + "VPSUBSW", + "VPSUBUSB", + "VPSUBUSW", + "VPSUBW", + "VPTERNLOGD", + "VPTERNLOGQ", + "VPTEST", + "VPTESTMB", + "VPTESTMD", + "VPTESTMQ", + "VPTESTMW", + "VPTESTNMB", + "VPTESTNMD", + "VPTESTNMQ", + "VPTESTNMW", + "VPUNPCKHBW", + "VPUNPCKHDQ", + "VPUNPCKHQDQ", + "VPUNPCKHWD", + "VPUNPCKLBW", + "VPUNPCKLDQ", + "VPUNPCKLQDQ", + "VPUNPCKLWD", + "VPXOR", + "VPXORD", + "VPXORQ", + "VRANGEPD", + "VRANGEPS", + "VRANGESD", + "VRANGESS", + "VRCP14PD", + "VRCP14PS", + "VRCP14SD", + "VRCP14SS", + "VRCP28PD", + "VRCP28PS", + "VRCP28SD", + "VRCP28SS", + "VRCPPS", + "VRCPSS", + "VREDUCEPD", + "VREDUCEPS", + "VREDUCESD", + "VREDUCESS", + "VRNDSCALEPD", + "VRNDSCALEPS", + "VRNDSCALESD", + "VRNDSCALESS", + "VROUNDPD", + "VROUNDPS", + "VROUNDSD", + "VROUNDSS", + "VRSQRT14PD", + "VRSQRT14PS", + "VRSQRT14SD", + "VRSQRT14SS", + "VRSQRT28PD", + "VRSQRT28PS", + "VRSQRT28SD", + "VRSQRT28SS", + "VRSQRTPS", + "VRSQRTSS", + "VSCALEFPD", + "VSCALEFPS", + "VSCALEFSD", + "VSCALEFSS", + "VSCATTERDPD", + "VSCATTERDPS", + "VSCATTERPF0DPD", + "VSCATTERPF0DPS", + "VSCATTERPF0QPD", + "VSCATTERPF0QPS", + "VSCATTERPF1DPD", + "VSCATTERPF1DPS", + "VSCATTERPF1QPD", + "VSCATTERPF1QPS", + "VSCATTERQPD", + "VSCATTERQPS", + "VSHUFF32X4", + "VSHUFF64X2", + "VSHUFI32X4", + "VSHUFI64X2", + "VSHUFPD", + "VSHUFPS", + "VSQRTPD", + "VSQRTPS", + "VSQRTSD", + "VSQRTSS", + "VSTMXCSR", + "VSUBPD", + "VSUBPS", + "VSUBSD", + "VSUBSS", + "VTESTPD", + "VTESTPS", + "VUCOMISD", + "VUCOMISS", + "VUNPCKHPD", + "VUNPCKHPS", + "VUNPCKLPD", + "VUNPCKLPS", + "VXORPD", + "VXORPS", + "VZEROALL", + "VZEROUPPER", + "WAIT", + "WBINVD", + "WORD", + "WRFSBASEL", + "WRFSBASEQ", + "WRGSBASEL", + "WRGSBASEQ", + "WRMSR", + "WRPKRU", + "XABORT", + "XACQUIRE", + "XADDB", + "XADDL", + "XADDQ", + "XADDW", + "XBEGIN", + "XCHGB", + "XCHGL", + "XCHGQ", + "XCHGW", + "XEND", + "XGETBV", + "XLAT", + "XORB", + "XORL", + "XORPD", + "XORPS", + "XORQ", + "XORW", + "XRELEASE", + "XRSTOR", + "XRSTOR64", + "XRSTORS", + "XRSTORS64", + "XSAVE", + "XSAVE64", + "XSAVEC", + "XSAVEC64", + "XSAVEOPT", + "XSAVEOPT64", + "XSAVES", + "XSAVES64", + "XSETBV", + "XTEST", + "LAST", +} diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go new file mode 100644 index 0000000..953eedc --- /dev/null +++ b/src/cmd/internal/obj/x86/asm6.go @@ -0,0 +1,5441 @@ +// Inferno utils/6l/span.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x86 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "encoding/binary" + "fmt" + "internal/buildcfg" + "log" + "strings" +) + +var ( + plan9privates *obj.LSym +) + +// Instruction layout. + +// Loop alignment constants: +// want to align loop entry to loopAlign-byte boundary, +// and willing to insert at most maxLoopPad bytes of NOP to do so. +// We define a loop entry as the target of a backward jump. +// +// gcc uses maxLoopPad = 10 for its 'generic x86-64' config, +// and it aligns all jump targets, not just backward jump targets. +// +// As of 6/1/2012, the effect of setting maxLoopPad = 10 here +// is very slight but negative, so the alignment is disabled by +// setting MaxLoopPad = 0. The code is here for reference and +// for future experiments. +const ( + loopAlign = 16 + maxLoopPad = 0 +) + +// Bit flags that are used to express jump target properties. +const ( + // branchBackwards marks targets that are located behind. + // Used to express jumps to loop headers. + branchBackwards = (1 << iota) + // branchShort marks branches those target is close, + // with offset is in -128..127 range. + branchShort + // branchLoopHead marks loop entry. + // Used to insert padding for misaligned loops. + branchLoopHead +) + +// opBytes holds optab encoding bytes. +// Each ytab reserves fixed amount of bytes in this array. +// +// The size should be the minimal number of bytes that +// are enough to hold biggest optab op lines. +type opBytes [31]uint8 + +type Optab struct { + as obj.As + ytab []ytab + prefix uint8 + op opBytes +} + +type movtab struct { + as obj.As + ft uint8 + f3t uint8 + tt uint8 + code uint8 + op [4]uint8 +} + +const ( + Yxxx = iota + Ynone + Yi0 // $0 + Yi1 // $1 + Yu2 // $x, x fits in uint2 + Yi8 // $x, x fits in int8 + Yu8 // $x, x fits in uint8 + Yu7 // $x, x in 0..127 (fits in both int8 and uint8) + Ys32 + Yi32 + Yi64 + Yiauto + Yal + Ycl + Yax + Ycx + Yrb + Yrl + Yrl32 // Yrl on 32-bit system + Yrf + Yf0 + Yrx + Ymb + Yml + Ym + Ybr + Ycs + Yss + Yds + Yes + Yfs + Ygs + Ygdtr + Yidtr + Yldtr + Ymsw + Ytask + Ycr0 + Ycr1 + Ycr2 + Ycr3 + Ycr4 + Ycr5 + Ycr6 + Ycr7 + Ycr8 + Ydr0 + Ydr1 + Ydr2 + Ydr3 + Ydr4 + Ydr5 + Ydr6 + Ydr7 + Ytr0 + Ytr1 + Ytr2 + Ytr3 + Ytr4 + Ytr5 + Ytr6 + Ytr7 + Ymr + Ymm + Yxr0 // X0 only. "<XMM0>" notation in Intel manual. + YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex + Yxr // X0..X15 + YxrEvex // X0..X31 + Yxm + YxmEvex // YxrEvex+Ym + Yxvm // VSIB vector array; vm32x/vm64x + YxvmEvex // Yxvm which permits High-16 X register as index. + YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex + Yyr // Y0..Y15 + YyrEvex // Y0..Y31 + Yym + YymEvex // YyrEvex+Ym + Yyvm // VSIB vector array; vm32y/vm64y + YyvmEvex // Yyvm which permits High-16 Y register as index. + YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex + Yzr // Z0..Z31 + Yzm // Yzr+Ym + Yzvm // VSIB vector array; vm32z/vm64z + Yk0 // K0 + Yknot0 // K1..K7; write mask + Yk // K0..K7; used for KOP + Ykm // Yk+Ym; used for KOP + Ytls + Ytextsize + Yindir + Ymax +) + +const ( + Zxxx = iota + Zlit + Zlitm_r + Zlitr_m + Zlit_m_r + Z_rp + Zbr + Zcall + Zcallcon + Zcallduff + Zcallind + Zcallindreg + Zib_ + Zib_rp + Zibo_m + Zibo_m_xm + Zil_ + Zil_rp + Ziq_rp + Zilo_m + Zjmp + Zjmpcon + Zloop + Zo_iw + Zm_o + Zm_r + Z_m_r + Zm2_r + Zm_r_xm + Zm_r_i_xm + Zm_r_xm_nr + Zr_m_xm_nr + Zibm_r // mmx1,mmx2/mem64,imm8 + Zibr_m + Zmb_r + Zaut_r + Zo_m + Zo_m64 + Zpseudo + Zr_m + Zr_m_xm + Zrp_ + Z_ib + Z_il + Zm_ibo + Zm_ilo + Zib_rr + Zil_rr + Zbyte + + Zvex_rm_v_r + Zvex_rm_v_ro + Zvex_r_v_rm + Zvex_i_rm_vo + Zvex_v_rm_r + Zvex_i_rm_r + Zvex_i_r_v + Zvex_i_rm_v_r + Zvex + Zvex_rm_r_vo + Zvex_i_r_rm + Zvex_hr_rm_v_r + + Zevex_first + Zevex_i_r_k_rm + Zevex_i_r_rm + Zevex_i_rm_k_r + Zevex_i_rm_k_vo + Zevex_i_rm_r + Zevex_i_rm_v_k_r + Zevex_i_rm_v_r + Zevex_i_rm_vo + Zevex_k_rmo + Zevex_r_k_rm + Zevex_r_v_k_rm + Zevex_r_v_rm + Zevex_rm_k_r + Zevex_rm_v_k_r + Zevex_rm_v_r + Zevex_last + + Zmax +) + +const ( + Px = 0 + Px1 = 1 // symbolic; exact value doesn't matter + P32 = 0x32 // 32-bit only + Pe = 0x66 // operand escape + Pm = 0x0f // 2byte opcode escape + Pq = 0xff // both escapes: 66 0f + Pb = 0xfe // byte operands + Pf2 = 0xf2 // xmm escape 1: f2 0f + Pf3 = 0xf3 // xmm escape 2: f3 0f + Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f + Pq3 = 0x67 // xmm escape 3: 66 48 0f + Pq4 = 0x68 // xmm escape 4: 66 0F 38 + Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 + Pq5 = 0x6a // xmm escape 5: F3 0F 38 + Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 + Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f + Pw = 0x48 // Rex.w + Pw8 = 0x90 // symbolic; exact value doesn't matter + Py = 0x80 // defaults to 64-bit mode + Py1 = 0x81 // symbolic; exact value doesn't matter + Py3 = 0x83 // symbolic; exact value doesn't matter + Pavx = 0x84 // symbolic: exact value doesn't matter + + RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R + Rxw = 1 << 3 // =1, 64-bit operand size + Rxr = 1 << 2 // extend modrm reg + Rxx = 1 << 1 // extend sib index + Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg +) + +const ( + // Encoding for VEX prefix in tables. + // The P, L, and W fields are chosen to match + // their eventual locations in the VEX prefix bytes. + + // Encoding for VEX prefix in tables. + // The P, L, and W fields are chosen to match + // their eventual locations in the VEX prefix bytes. + + // Using spare bit to make leading [E]VEX encoding byte different from + // 0x0f even if all other VEX fields are 0. + avxEscape = 1 << 6 + + // P field - 2 bits + vex66 = 1 << 0 + vexF3 = 2 << 0 + vexF2 = 3 << 0 + // L field - 1 bit + vexLZ = 0 << 2 + vexLIG = 0 << 2 + vex128 = 0 << 2 + vex256 = 1 << 2 + // W field - 1 bit + vexWIG = 0 << 7 + vexW0 = 0 << 7 + vexW1 = 1 << 7 + // M field - 5 bits, but mostly reserved; we can store up to 3 + vex0F = 1 << 3 + vex0F38 = 2 << 3 + vex0F3A = 3 << 3 +) + +var ycover [Ymax * Ymax]uint8 + +var reg [MAXREG]int + +var regrex [MAXREG + 1]int + +var ynone = []ytab{ + {Zlit, 1, argList{}}, +} + +var ytext = []ytab{ + {Zpseudo, 0, argList{Ymb, Ytextsize}}, + {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, +} + +var ynop = []ytab{ + {Zpseudo, 0, argList{}}, + {Zpseudo, 0, argList{Yiauto}}, + {Zpseudo, 0, argList{Yml}}, + {Zpseudo, 0, argList{Yrf}}, + {Zpseudo, 0, argList{Yxr}}, + {Zpseudo, 0, argList{Yiauto}}, + {Zpseudo, 0, argList{Yml}}, + {Zpseudo, 0, argList{Yrf}}, + {Zpseudo, 1, argList{Yxr}}, +} + +var yfuncdata = []ytab{ + {Zpseudo, 0, argList{Yi32, Ym}}, +} + +var ypcdata = []ytab{ + {Zpseudo, 0, argList{Yi32, Yi32}}, +} + +var yxorb = []ytab{ + {Zib_, 1, argList{Yi32, Yal}}, + {Zibo_m, 2, argList{Yi32, Ymb}}, + {Zr_m, 1, argList{Yrb, Ymb}}, + {Zm_r, 1, argList{Ymb, Yrb}}, +} + +var yaddl = []ytab{ + {Zibo_m, 2, argList{Yi8, Yml}}, + {Zil_, 1, argList{Yi32, Yax}}, + {Zilo_m, 2, argList{Yi32, Yml}}, + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, +} + +var yincl = []ytab{ + {Z_rp, 1, argList{Yrl}}, + {Zo_m, 2, argList{Yml}}, +} + +var yincq = []ytab{ + {Zo_m, 2, argList{Yml}}, +} + +var ycmpb = []ytab{ + {Z_ib, 1, argList{Yal, Yi32}}, + {Zm_ibo, 2, argList{Ymb, Yi32}}, + {Zm_r, 1, argList{Ymb, Yrb}}, + {Zr_m, 1, argList{Yrb, Ymb}}, +} + +var ycmpl = []ytab{ + {Zm_ibo, 2, argList{Yml, Yi8}}, + {Z_il, 1, argList{Yax, Yi32}}, + {Zm_ilo, 2, argList{Yml, Yi32}}, + {Zm_r, 1, argList{Yml, Yrl}}, + {Zr_m, 1, argList{Yrl, Yml}}, +} + +var yshb = []ytab{ + {Zo_m, 2, argList{Yi1, Ymb}}, + {Zibo_m, 2, argList{Yu8, Ymb}}, + {Zo_m, 2, argList{Ycx, Ymb}}, +} + +var yshl = []ytab{ + {Zo_m, 2, argList{Yi1, Yml}}, + {Zibo_m, 2, argList{Yu8, Yml}}, + {Zo_m, 2, argList{Ycl, Yml}}, + {Zo_m, 2, argList{Ycx, Yml}}, +} + +var ytestl = []ytab{ + {Zil_, 1, argList{Yi32, Yax}}, + {Zilo_m, 2, argList{Yi32, Yml}}, + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, +} + +var ymovb = []ytab{ + {Zr_m, 1, argList{Yrb, Ymb}}, + {Zm_r, 1, argList{Ymb, Yrb}}, + {Zib_rp, 1, argList{Yi32, Yrb}}, + {Zibo_m, 2, argList{Yi32, Ymb}}, +} + +var ybtl = []ytab{ + {Zibo_m, 2, argList{Yi8, Yml}}, + {Zr_m, 1, argList{Yrl, Yml}}, +} + +var ymovw = []ytab{ + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, + {Zil_rp, 1, argList{Yi32, Yrl}}, + {Zilo_m, 2, argList{Yi32, Yml}}, + {Zaut_r, 2, argList{Yiauto, Yrl}}, +} + +var ymovl = []ytab{ + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, + {Zil_rp, 1, argList{Yi32, Yrl}}, + {Zilo_m, 2, argList{Yi32, Yml}}, + {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD + {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD + {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) + {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) + {Zaut_r, 2, argList{Yiauto, Yrl}}, +} + +var yret = []ytab{ + {Zo_iw, 1, argList{}}, + {Zo_iw, 1, argList{Yi32}}, +} + +var ymovq = []ytab{ + // valid in 32-bit mode + {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) + {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ + {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q + {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 + {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 + + // valid only in 64-bit mode, usually with 64-bit prefix + {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 + {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b + {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) + {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate + {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) + {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD + {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD + {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load + {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store + {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ +} + +var ymovbe = []ytab{ + {Zlitm_r, 3, argList{Ym, Yrl}}, + {Zlitr_m, 3, argList{Yrl, Ym}}, +} + +var ym_rl = []ytab{ + {Zm_r, 1, argList{Ym, Yrl}}, +} + +var yrl_m = []ytab{ + {Zr_m, 1, argList{Yrl, Ym}}, +} + +var ymb_rl = []ytab{ + {Zmb_r, 1, argList{Ymb, Yrl}}, +} + +var yml_rl = []ytab{ + {Zm_r, 1, argList{Yml, Yrl}}, +} + +var yrl_ml = []ytab{ + {Zr_m, 1, argList{Yrl, Yml}}, +} + +var yml_mb = []ytab{ + {Zr_m, 1, argList{Yrb, Ymb}}, + {Zm_r, 1, argList{Ymb, Yrb}}, +} + +var yrb_mb = []ytab{ + {Zr_m, 1, argList{Yrb, Ymb}}, +} + +var yxchg = []ytab{ + {Z_rp, 1, argList{Yax, Yrl}}, + {Zrp_, 1, argList{Yrl, Yax}}, + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, +} + +var ydivl = []ytab{ + {Zm_o, 2, argList{Yml}}, +} + +var ydivb = []ytab{ + {Zm_o, 2, argList{Ymb}}, +} + +var yimul = []ytab{ + {Zm_o, 2, argList{Yml}}, + {Zib_rr, 1, argList{Yi8, Yrl}}, + {Zil_rr, 1, argList{Yi32, Yrl}}, + {Zm_r, 2, argList{Yml, Yrl}}, +} + +var yimul3 = []ytab{ + {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, + {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, +} + +var ybyte = []ytab{ + {Zbyte, 1, argList{Yi64}}, +} + +var yin = []ytab{ + {Zib_, 1, argList{Yi32}}, + {Zlit, 1, argList{}}, +} + +var yint = []ytab{ + {Zib_, 1, argList{Yi32}}, +} + +var ypushl = []ytab{ + {Zrp_, 1, argList{Yrl}}, + {Zm_o, 2, argList{Ym}}, + {Zib_, 1, argList{Yi8}}, + {Zil_, 1, argList{Yi32}}, +} + +var ypopl = []ytab{ + {Z_rp, 1, argList{Yrl}}, + {Zo_m, 2, argList{Ym}}, +} + +var ywrfsbase = []ytab{ + {Zm_o, 2, argList{Yrl}}, +} + +var yrdrand = []ytab{ + {Zo_m, 2, argList{Yrl}}, +} + +var yclflush = []ytab{ + {Zo_m, 2, argList{Ym}}, +} + +var ybswap = []ytab{ + {Z_rp, 2, argList{Yrl}}, +} + +var yscond = []ytab{ + {Zo_m, 2, argList{Ymb}}, +} + +var yjcond = []ytab{ + {Zbr, 0, argList{Ybr}}, + {Zbr, 0, argList{Yi0, Ybr}}, + {Zbr, 1, argList{Yi1, Ybr}}, +} + +var yloop = []ytab{ + {Zloop, 1, argList{Ybr}}, +} + +var ycall = []ytab{ + {Zcallindreg, 0, argList{Yml}}, + {Zcallindreg, 2, argList{Yrx, Yrx}}, + {Zcallind, 2, argList{Yindir}}, + {Zcall, 0, argList{Ybr}}, + {Zcallcon, 1, argList{Yi32}}, +} + +var yduff = []ytab{ + {Zcallduff, 1, argList{Yi32}}, +} + +var yjmp = []ytab{ + {Zo_m64, 2, argList{Yml}}, + {Zjmp, 0, argList{Ybr}}, + {Zjmpcon, 1, argList{Yi32}}, +} + +var yfmvd = []ytab{ + {Zm_o, 2, argList{Ym, Yf0}}, + {Zo_m, 2, argList{Yf0, Ym}}, + {Zm_o, 2, argList{Yrf, Yf0}}, + {Zo_m, 2, argList{Yf0, Yrf}}, +} + +var yfmvdp = []ytab{ + {Zo_m, 2, argList{Yf0, Ym}}, + {Zo_m, 2, argList{Yf0, Yrf}}, +} + +var yfmvf = []ytab{ + {Zm_o, 2, argList{Ym, Yf0}}, + {Zo_m, 2, argList{Yf0, Ym}}, +} + +var yfmvx = []ytab{ + {Zm_o, 2, argList{Ym, Yf0}}, +} + +var yfmvp = []ytab{ + {Zo_m, 2, argList{Yf0, Ym}}, +} + +var yfcmv = []ytab{ + {Zm_o, 2, argList{Yrf, Yf0}}, +} + +var yfadd = []ytab{ + {Zm_o, 2, argList{Ym, Yf0}}, + {Zm_o, 2, argList{Yrf, Yf0}}, + {Zo_m, 2, argList{Yf0, Yrf}}, +} + +var yfxch = []ytab{ + {Zo_m, 2, argList{Yf0, Yrf}}, + {Zm_o, 2, argList{Yrf, Yf0}}, +} + +var ycompp = []ytab{ + {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 +} + +var ystsw = []ytab{ + {Zo_m, 2, argList{Ym}}, + {Zlit, 1, argList{Yax}}, +} + +var ysvrs_mo = []ytab{ + {Zm_o, 2, argList{Ym}}, +} + +// unaryDst version of "ysvrs_mo". +var ysvrs_om = []ytab{ + {Zo_m, 2, argList{Ym}}, +} + +var ymm = []ytab{ + {Zm_r_xm, 1, argList{Ymm, Ymr}}, + {Zm_r_xm, 2, argList{Yxm, Yxr}}, +} + +var yxm = []ytab{ + {Zm_r_xm, 1, argList{Yxm, Yxr}}, +} + +var yxm_q4 = []ytab{ + {Zm_r, 1, argList{Yxm, Yxr}}, +} + +var yxcvm1 = []ytab{ + {Zm_r_xm, 2, argList{Yxm, Yxr}}, + {Zm_r_xm, 2, argList{Yxm, Ymr}}, +} + +var yxcvm2 = []ytab{ + {Zm_r_xm, 2, argList{Yxm, Yxr}}, + {Zm_r_xm, 2, argList{Ymm, Yxr}}, +} + +var yxr = []ytab{ + {Zm_r_xm, 1, argList{Yxr, Yxr}}, +} + +var yxr_ml = []ytab{ + {Zr_m_xm, 1, argList{Yxr, Yml}}, +} + +var ymr = []ytab{ + {Zm_r, 1, argList{Ymr, Ymr}}, +} + +var ymr_ml = []ytab{ + {Zr_m_xm, 1, argList{Ymr, Yml}}, +} + +var yxcmpi = []ytab{ + {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, +} + +var yxmov = []ytab{ + {Zm_r_xm, 1, argList{Yxm, Yxr}}, + {Zr_m_xm, 1, argList{Yxr, Yxm}}, +} + +var yxcvfl = []ytab{ + {Zm_r_xm, 1, argList{Yxm, Yrl}}, +} + +var yxcvlf = []ytab{ + {Zm_r_xm, 1, argList{Yml, Yxr}}, +} + +var yxcvfq = []ytab{ + {Zm_r_xm, 2, argList{Yxm, Yrl}}, +} + +var yxcvqf = []ytab{ + {Zm_r_xm, 2, argList{Yml, Yxr}}, +} + +var yps = []ytab{ + {Zm_r_xm, 1, argList{Ymm, Ymr}}, + {Zibo_m_xm, 2, argList{Yi8, Ymr}}, + {Zm_r_xm, 2, argList{Yxm, Yxr}}, + {Zibo_m_xm, 3, argList{Yi8, Yxr}}, +} + +var yxrrl = []ytab{ + {Zm_r, 1, argList{Yxr, Yrl}}, +} + +var ymrxr = []ytab{ + {Zm_r, 1, argList{Ymr, Yxr}}, + {Zm_r_xm, 1, argList{Yxm, Yxr}}, +} + +var ymshuf = []ytab{ + {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, +} + +var ymshufb = []ytab{ + {Zm2_r, 2, argList{Yxm, Yxr}}, +} + +// It should never have more than 1 entry, +// because some optab entries you opcode secuences that +// are longer than 2 bytes (zoffset=2 here), +// ROUNDPD and ROUNDPS and recently added BLENDPD, +// to name a few. +var yxshuf = []ytab{ + {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, +} + +var yextrw = []ytab{ + {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, + {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, +} + +var yextr = []ytab{ + {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, +} + +var yinsrw = []ytab{ + {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, +} + +var yinsr = []ytab{ + {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, +} + +var ypsdq = []ytab{ + {Zibo_m, 2, argList{Yi8, Yxr}}, +} + +var ymskb = []ytab{ + {Zm_r_xm, 2, argList{Yxr, Yrl}}, + {Zm_r_xm, 1, argList{Ymr, Yrl}}, +} + +var ycrc32l = []ytab{ + {Zlitm_r, 0, argList{Yml, Yrl}}, +} + +var ycrc32b = []ytab{ + {Zlitm_r, 0, argList{Ymb, Yrl}}, +} + +var yprefetch = []ytab{ + {Zm_o, 2, argList{Ym}}, +} + +var yaes = []ytab{ + {Zlitm_r, 2, argList{Yxm, Yxr}}, +} + +var yxbegin = []ytab{ + {Zjmp, 1, argList{Ybr}}, +} + +var yxabort = []ytab{ + {Zib_, 1, argList{Yu8}}, +} + +var ylddqu = []ytab{ + {Zm_r, 1, argList{Ym, Yxr}}, +} + +var ypalignr = []ytab{ + {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, +} + +var ysha256rnds2 = []ytab{ + {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, +} + +var yblendvpd = []ytab{ + {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, +} + +var ymmxmm0f38 = []ytab{ + {Zlitm_r, 3, argList{Ymm, Ymr}}, + {Zlitm_r, 5, argList{Yxm, Yxr}}, +} + +var yextractps = []ytab{ + {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, +} + +var ysha1rnds4 = []ytab{ + {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, +} + +// You are doasm, holding in your hand a *obj.Prog with p.As set to, say, +// ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab +// to find the entry with the given p.As and then looks through the ytable for +// that instruction (the second field in the optab struct) for a line whose +// first two values match the Ytypes of the p.From and p.To operands. The +// function oclass computes the specific Ytype of an operand and then the set +// of more general Ytypes that it satisfies is implied by the ycover table, set +// up in instinit. For example, oclass distinguishes the constants 0 and 1 +// from the more general 8-bit constants, but instinit says +// +// ycover[Yi0*Ymax+Ys32] = 1 +// ycover[Yi1*Ymax+Ys32] = 1 +// ycover[Yi8*Ymax+Ys32] = 1 +// +// which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) +// if that's what an instruction can handle. +// +// In parallel with the scan through the ytable for the appropriate line, there +// is a z pointer that starts out pointing at the strange magic byte list in +// the Optab struct. With each step past a non-matching ytable line, z +// advances by the 4th entry in the line. When a matching line is found, that +// z pointer has the extra data to use in laying down the instruction bytes. +// The actual bytes laid down are a function of the 3rd entry in the line (that +// is, the Ztype) and the z bytes. +// +// For example, let's look at AADDL. The optab line says: +// +// {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, +// +// and yaddl says +// +// var yaddl = []ytab{ +// {Yi8, Ynone, Yml, Zibo_m, 2}, +// {Yi32, Ynone, Yax, Zil_, 1}, +// {Yi32, Ynone, Yml, Zilo_m, 2}, +// {Yrl, Ynone, Yml, Zr_m, 1}, +// {Yml, Ynone, Yrl, Zm_r, 1}, +// } +// +// so there are 5 possible types of ADDL instruction that can be laid down, and +// possible states used to lay them down (Ztype and z pointer, assuming z +// points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: +// +// Yi8, Yml -> Zibo_m, z (0x83, 00) +// Yi32, Yax -> Zil_, z+2 (0x05) +// Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) +// Yrl, Yml -> Zr_m, z+2+1+2 (0x01) +// Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) +// +// The Pconstant in the optab line controls the prefix bytes to emit. That's +// relatively straightforward as this program goes. +// +// The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for +// example, is an opcode byte (z[0]) then an asmando (which is some kind of +// encoded addressing mode for the Yml arg), and then a single immediate byte. +// Zilo_m is the same but a long (32-bit) immediate. +var optab = +// as, ytab, andproto, opcode +[...]Optab{ + {obj.AXXX, nil, 0, opBytes{}}, + {AAAA, ynone, P32, opBytes{0x37}}, + {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, + {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, + {AAAS, ynone, P32, opBytes{0x3f}}, + {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, + {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, + {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, + {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, + {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, + {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, + {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, + {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, + {AADDPD, yxm, Pq, opBytes{0x58}}, + {AADDPS, yxm, Pm, opBytes{0x58}}, + {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, + {AADDSD, yxm, Pf2, opBytes{0x58}}, + {AADDSS, yxm, Pf3, opBytes{0x58}}, + {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, + {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, + {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, + {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, + {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, + {AADJSP, nil, 0, opBytes{}}, + {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, + {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, + {AANDNPD, yxm, Pq, opBytes{0x55}}, + {AANDNPS, yxm, Pm, opBytes{0x55}}, + {AANDPD, yxm, Pq, opBytes{0x54}}, + {AANDPS, yxm, Pm, opBytes{0x54}}, + {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, + {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, + {AARPL, yrl_ml, P32, opBytes{0x63}}, + {ABOUNDL, yrl_m, P32, opBytes{0x62}}, + {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, + {ABSFL, yml_rl, Pm, opBytes{0xbc}}, + {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, + {ABSFW, yml_rl, Pq, opBytes{0xbc}}, + {ABSRL, yml_rl, Pm, opBytes{0xbd}}, + {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, + {ABSRW, yml_rl, Pq, opBytes{0xbd}}, + {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, + {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, + {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, + {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, + {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, + {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, + {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, + {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, + {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, + {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, + {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, + {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, + {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, + {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, + {ABYTE, ybyte, Px, opBytes{1}}, + {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, + {ACBW, ynone, Pe, opBytes{0x98}}, + {ACDQ, ynone, Px, opBytes{0x99}}, + {ACDQE, ynone, Pw, opBytes{0x98}}, + {ACLAC, ynone, Pm, opBytes{01, 0xca}}, + {ACLC, ynone, Px, opBytes{0xf8}}, + {ACLD, ynone, Px, opBytes{0xfc}}, + {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, + {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, + {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, + {ACLI, ynone, Px, opBytes{0xfa}}, + {ACLTS, ynone, Pm, opBytes{0x06}}, + {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, + {ACMC, ynone, Px, opBytes{0xf5}}, + {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, + {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, + {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, + {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, + {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, + {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, + {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, + {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, + {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, + {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, + {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, + {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, + {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, + {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, + {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, + {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, + {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, + {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, + {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, + {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, + {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, + {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, + {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, + {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, + {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, + {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, + {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, + {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, + {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, + {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, + {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, + {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, + {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, + {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, + {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, + {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, + {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, + {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, + {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, + {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, + {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, + {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, + {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, + {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, + {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, + {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, + {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, + {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, + {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, + {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, + {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, + {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, + {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, + {ACMPSB, ynone, Pb, opBytes{0xa6}}, + {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, + {ACMPSL, ynone, Px, opBytes{0xa7}}, + {ACMPSQ, ynone, Pw, opBytes{0xa7}}, + {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, + {ACMPSW, ynone, Pe, opBytes{0xa7}}, + {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, + {ACOMISD, yxm, Pe, opBytes{0x2f}}, + {ACOMISS, yxm, Pm, opBytes{0x2f}}, + {ACPUID, ynone, Pm, opBytes{0xa2}}, + {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, + {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, + {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, + {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, + {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, + {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, + {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, + {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, + {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, + {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, + {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, + {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, + {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, + {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, + {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, + {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, + {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, + {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, + {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, + {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, + {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, + {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, + {ACWD, ynone, Pe, opBytes{0x99}}, + {ACWDE, ynone, Px, opBytes{0x98}}, + {ACQO, ynone, Pw, opBytes{0x99}}, + {ADAA, ynone, P32, opBytes{0x27}}, + {ADAS, ynone, P32, opBytes{0x2f}}, + {ADECB, yscond, Pb, opBytes{0xfe, 01}}, + {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, + {ADECQ, yincq, Pw, opBytes{0xff, 01}}, + {ADECW, yincq, Pe, opBytes{0xff, 01}}, + {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, + {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, + {ADIVPD, yxm, Pe, opBytes{0x5e}}, + {ADIVPS, yxm, Pm, opBytes{0x5e}}, + {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, + {ADIVSD, yxm, Pf2, opBytes{0x5e}}, + {ADIVSS, yxm, Pf3, opBytes{0x5e}}, + {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, + {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, + {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, + {AEMMS, ynone, Pm, opBytes{0x77}}, + {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, + {AENTER, nil, 0, opBytes{}}, // botch + {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, + {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, + {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, + {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, + {AHLT, ynone, Px, opBytes{0xf4}}, + {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, + {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, + {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, + {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, + {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, + {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, + {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, + {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, + {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, + {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, + {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, + {AINB, yin, Pb, opBytes{0xe4, 0xec}}, + {AINW, yin, Pe, opBytes{0xe5, 0xed}}, + {AINL, yin, Px, opBytes{0xe5, 0xed}}, + {AINCB, yscond, Pb, opBytes{0xfe, 00}}, + {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, + {AINCQ, yincq, Pw, opBytes{0xff, 00}}, + {AINCW, yincq, Pe, opBytes{0xff, 00}}, + {AINSB, ynone, Pb, opBytes{0x6c}}, + {AINSL, ynone, Px, opBytes{0x6d}}, + {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, + {AINSW, ynone, Pe, opBytes{0x6d}}, + {AICEBP, ynone, Px, opBytes{0xf1}}, + {AINT, yint, Px, opBytes{0xcd}}, + {AINTO, ynone, P32, opBytes{0xce}}, + {AIRETL, ynone, Px, opBytes{0xcf}}, + {AIRETQ, ynone, Pw, opBytes{0xcf}}, + {AIRETW, ynone, Pe, opBytes{0xcf}}, + {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, + {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, + {AJCXZL, yloop, Px, opBytes{0xe3}}, + {AJCXZW, yloop, Px, opBytes{0xe3}}, + {AJCXZQ, yloop, Px, opBytes{0xe3}}, + {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, + {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, + {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, + {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, + {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, + {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, + {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, + {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, + {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, + {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, + {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, + {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, + {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, + {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, + {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, + {AHADDPD, yxm, Pq, opBytes{0x7c}}, + {AHADDPS, yxm, Pf2, opBytes{0x7c}}, + {AHSUBPD, yxm, Pq, opBytes{0x7d}}, + {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, + {ALAHF, ynone, Px, opBytes{0x9f}}, + {ALARL, yml_rl, Pm, opBytes{0x02}}, + {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, + {ALARW, yml_rl, Pq, opBytes{0x02}}, + {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, + {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, + {ALEAL, ym_rl, Px, opBytes{0x8d}}, + {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, + {ALEAVEL, ynone, P32, opBytes{0xc9}}, + {ALEAVEQ, ynone, Py, opBytes{0xc9}}, + {ALEAVEW, ynone, Pe, opBytes{0xc9}}, + {ALEAW, ym_rl, Pe, opBytes{0x8d}}, + {ALOCK, ynone, Px, opBytes{0xf0}}, + {ALODSB, ynone, Pb, opBytes{0xac}}, + {ALODSL, ynone, Px, opBytes{0xad}}, + {ALODSQ, ynone, Pw, opBytes{0xad}}, + {ALODSW, ynone, Pe, opBytes{0xad}}, + {ALONG, ybyte, Px, opBytes{4}}, + {ALOOP, yloop, Px, opBytes{0xe2}}, + {ALOOPEQ, yloop, Px, opBytes{0xe1}}, + {ALOOPNE, yloop, Px, opBytes{0xe0}}, + {ALTR, ydivl, Pm, opBytes{0x00, 03}}, + {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, + {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, + {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, + {ALSLL, yml_rl, Pm, opBytes{0x03}}, + {ALSLW, yml_rl, Pq, opBytes{0x03}}, + {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, + {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, + {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, + {AMAXPD, yxm, Pe, opBytes{0x5f}}, + {AMAXPS, yxm, Pm, opBytes{0x5f}}, + {AMAXSD, yxm, Pf2, opBytes{0x5f}}, + {AMAXSS, yxm, Pf3, opBytes{0x5f}}, + {AMINPD, yxm, Pe, opBytes{0x5d}}, + {AMINPS, yxm, Pm, opBytes{0x5d}}, + {AMINSD, yxm, Pf2, opBytes{0x5d}}, + {AMINSS, yxm, Pf3, opBytes{0x5d}}, + {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, + {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, + {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, + {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, + {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, + {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, + {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, + {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, + {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, + {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, + {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, + {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, + {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, + {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, + {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, + {AMOVHLPS, yxr, Pm, opBytes{0x12}}, + {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, + {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, + {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, + {AMOVLHPS, yxr, Pm, opBytes{0x16}}, + {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, + {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, + {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, + {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, + {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, + {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, + {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, + {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, + {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, + {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, + {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, + {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, + {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, + {AMOVSB, ynone, Pb, opBytes{0xa4}}, + {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, + {AMOVSL, ynone, Px, opBytes{0xa5}}, + {AMOVSQ, ynone, Pw, opBytes{0xa5}}, + {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, + {AMOVSW, ynone, Pe, opBytes{0xa5}}, + {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, + {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, + {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, + {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, + {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, + {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, + {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, + {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, + {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, + {AMULL, ydivl, Px, opBytes{0xf7, 04}}, + {AMULPD, yxm, Pe, opBytes{0x59}}, + {AMULPS, yxm, Ym, opBytes{0x59}}, + {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, + {AMULSD, yxm, Pf2, opBytes{0x59}}, + {AMULSS, yxm, Pf3, opBytes{0x59}}, + {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, + {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, + {ANEGL, yscond, Px, opBytes{0xf7, 03}}, + {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, + {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, + {obj.ANOP, ynop, Px, opBytes{0, 0}}, + {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, + {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. + {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, + {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, + {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, + {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, + {AORPD, yxm, Pq, opBytes{0x56}}, + {AORPS, yxm, Pm, opBytes{0x56}}, + {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, + {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, + {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, + {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, + {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, + {AOUTSB, ynone, Pb, opBytes{0x6e}}, + {AOUTSL, ynone, Px, opBytes{0x6f}}, + {AOUTSW, ynone, Pe, opBytes{0x6f}}, + {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, + {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, + {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, + {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, + {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, + {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, + {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, + {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, + {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, + {APADDQ, yxm, Pe, opBytes{0xd4}}, + {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, + {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, + {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, + {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, + {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, + {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, + {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, + {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, + {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, + {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, + {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, + {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, + {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, + {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, + {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, + {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, + {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, + {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, + {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, + {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, + {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, + {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, + {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, + {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, + {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, + {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, + {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, + {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, + {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, + {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, + {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, + {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, + {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, + {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, + {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, + {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, + {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, + {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, + {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, + {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, + {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, + {APMAXSW, yxm, Pe, opBytes{0xee}}, + {APMAXUB, yxm, Pe, opBytes{0xde}}, + {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, + {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, + {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, + {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, + {APMINSW, yxm, Pe, opBytes{0xea}}, + {APMINUB, yxm, Pe, opBytes{0xda}}, + {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, + {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, + {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, + {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, + {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, + {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, + {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, + {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, + {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, + {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, + {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, + {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, + {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, + {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, + {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, + {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, + {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, + {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, + {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, + {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, + {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, + {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, + {APOPAL, ynone, P32, opBytes{0x61}}, + {APOPAW, ynone, Pe, opBytes{0x61}}, + {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, + {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, + {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, + {APOPFL, ynone, P32, opBytes{0x9d}}, + {APOPFQ, ynone, Py, opBytes{0x9d}}, + {APOPFW, ynone, Pe, opBytes{0x9d}}, + {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, + {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, + {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, + {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, + {APSADBW, yxm, Pq, opBytes{0xf6}}, + {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, + {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, + {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, + {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, + {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, + {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, + {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, + {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, + {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, + {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, + {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, + {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, + {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, + {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, + {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, + {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, + {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, + {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, + {APSUBB, yxm, Pe, opBytes{0xf8}}, + {APSUBL, yxm, Pe, opBytes{0xfa}}, + {APSUBQ, yxm, Pe, opBytes{0xfb}}, + {APSUBSB, yxm, Pe, opBytes{0xe8}}, + {APSUBSW, yxm, Pe, opBytes{0xe9}}, + {APSUBUSB, yxm, Pe, opBytes{0xd8}}, + {APSUBUSW, yxm, Pe, opBytes{0xd9}}, + {APSUBW, yxm, Pe, opBytes{0xf9}}, + {APTEST, yxm_q4, Pq4, opBytes{0x17}}, + {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, + {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, + {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, + {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, + {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, + {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, + {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, + {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, + {APUSHAL, ynone, P32, opBytes{0x60}}, + {APUSHAW, ynone, Pe, opBytes{0x60}}, + {APUSHFL, ynone, P32, opBytes{0x9c}}, + {APUSHFQ, ynone, Py, opBytes{0x9c}}, + {APUSHFW, ynone, Pe, opBytes{0x9c}}, + {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, + {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, + {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, + {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, + {AQUAD, ybyte, Px, opBytes{8}}, + {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, + {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, + {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, + {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, + {ARCPPS, yxm, Pm, opBytes{0x53}}, + {ARCPSS, yxm, Pf3, opBytes{0x53}}, + {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, + {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, + {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, + {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, + {AREP, ynone, Px, opBytes{0xf3}}, + {AREPN, ynone, Px, opBytes{0xf2}}, + {obj.ARET, ynone, Px, opBytes{0xc3}}, + {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, + {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, + {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, + {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, + {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, + {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, + {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, + {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, + {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, + {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, + {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, + {ARSQRTPS, yxm, Pm, opBytes{0x52}}, + {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, + {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL + {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, + {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, + {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, + {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, + {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, + {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, + {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, + {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, + {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, + {ASCASB, ynone, Pb, opBytes{0xae}}, + {ASCASL, ynone, Px, opBytes{0xaf}}, + {ASCASQ, ynone, Pw, opBytes{0xaf}}, + {ASCASW, ynone, Pe, opBytes{0xaf}}, + {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, + {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, + {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, + {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, + {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, + {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, + {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, + {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, + {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, + {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, + {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, + {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, + {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, + {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, + {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, + {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, + {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, + {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, + {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, + {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, + {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, + {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, + {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, + {ASQRTPD, yxm, Pe, opBytes{0x51}}, + {ASQRTPS, yxm, Pm, opBytes{0x51}}, + {ASQRTSD, yxm, Pf2, opBytes{0x51}}, + {ASQRTSS, yxm, Pf3, opBytes{0x51}}, + {ASTC, ynone, Px, opBytes{0xf9}}, + {ASTD, ynone, Px, opBytes{0xfd}}, + {ASTI, ynone, Px, opBytes{0xfb}}, + {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, + {ASTOSB, ynone, Pb, opBytes{0xaa}}, + {ASTOSL, ynone, Px, opBytes{0xab}}, + {ASTOSQ, ynone, Pw, opBytes{0xab}}, + {ASTOSW, ynone, Pe, opBytes{0xab}}, + {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, + {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, + {ASUBPD, yxm, Pe, opBytes{0x5c}}, + {ASUBPS, yxm, Pm, opBytes{0x5c}}, + {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, + {ASUBSD, yxm, Pf2, opBytes{0x5c}}, + {ASUBSS, yxm, Pf3, opBytes{0x5c}}, + {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, + {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, + {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall + {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, + {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, + {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, + {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, + {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, + {obj.ATEXT, ytext, Px, opBytes{}}, + {AUCOMISD, yxm, Pe, opBytes{0x2e}}, + {AUCOMISS, yxm, Pm, opBytes{0x2e}}, + {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, + {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, + {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, + {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, + {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, + {AVERR, ydivl, Pm, opBytes{0x00, 04}}, + {AVERW, ydivl, Pm, opBytes{0x00, 05}}, + {AWAIT, ynone, Px, opBytes{0x9b}}, + {AWORD, ybyte, Px, opBytes{2}}, + {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, + {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, + {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, + {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, + {AXLAT, ynone, Px, opBytes{0xd7}}, + {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, + {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, + {AXORPD, yxm, Pe, opBytes{0x57}}, + {AXORPS, yxm, Pm, opBytes{0x57}}, + {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, + {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, + {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, + {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, + {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, + {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, + {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, + {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, + {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, + {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, + {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, + {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, + {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, + {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, + {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, + {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, + {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, + {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, + {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, + {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, + {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, + {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, + {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, + {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, + {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, + {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, + {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, + {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, + {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, + {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, + {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch + {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch + {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, + {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, + {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, + {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, + {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, + {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, + {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, + {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, + {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, + {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, + {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, + {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, + {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, + {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, + {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, + {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, + {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, + {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, + {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, + {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, + {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, + {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, + {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, + {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, + {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, + {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, + {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, + {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, + {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, + {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, + {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, + {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, + {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, + {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, + {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, + {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, + {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, + {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, + {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, + {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, + {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, + {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, + {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, + {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, + {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, + {AFFREE, nil, 0, opBytes{}}, + {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, + {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, + {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, + {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, + {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, + {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, + {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, + {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, + {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, + {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, + {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, + {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, + {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, + {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, + {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, + {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, + {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, + {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, + {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, + {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, + {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, + {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, + {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, + {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, + {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, + {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, + {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, + {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, + {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, + {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, + {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, + {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, + {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, + {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, + {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, + {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, + {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, + {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, + {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, + {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, + {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, + {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, + {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, + {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, + {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, + {AINVD, ynone, Pm, opBytes{0x08}}, + {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, + {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, + {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, + {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, + {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, + {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, + {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, + {ARDMSR, ynone, Pm, opBytes{0x32}}, + {ARDPMC, ynone, Pm, opBytes{0x33}}, + {ARDTSC, ynone, Pm, opBytes{0x31}}, + {ARSM, ynone, Pm, opBytes{0xaa}}, + {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, + {ASYSRET, ynone, Pm, opBytes{0x07}}, + {AWBINVD, ynone, Pm, opBytes{0x09}}, + {AWRMSR, ynone, Pm, opBytes{0x30}}, + {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, + {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, + {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, + {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, + {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, + {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, + {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, + {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, + {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, + {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, + {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, + {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, + {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, + {AMOVQL, yrl_ml, Px, opBytes{0x89}}, + {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, + {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, + {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, + {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, + {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, + {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, + {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, + {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, + {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, + {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, + {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, + {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, + {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, + {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, + {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, + {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, + {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, + {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, + {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, + {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, + {AUD1, ynone, Pm, opBytes{0xb9, 0}}, + {AUD2, ynone, Pm, opBytes{0x0b, 0}}, + {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, + {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, + {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, + {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, + {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, + {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, + {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, + {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, + {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, + {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, + {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, + {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, + {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, + {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, + {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, + {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, + {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, + {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, + {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, + {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, + {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, + {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, + {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, + {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, + {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, + {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, + {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, + {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, + {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, + {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, + {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, + {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, + {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, + {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, + {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, + {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, + {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, + {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, + {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, + {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, + {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, + {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, + {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, + {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, + {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, + {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, + {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, + {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, + {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, + {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, + {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, + {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, + {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, + {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, + {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, + {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, + {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, + {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, + {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, + {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, + {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, + {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, + {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, + {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, + {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, + {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, + {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, + {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, + {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, + {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, + {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, + {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, + {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, + {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, + + {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, + {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, + {AXACQUIRE, ynone, Px, opBytes{0xf2}}, + {AXRELEASE, ynone, Px, opBytes{0xf3}}, + {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, + {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, + {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, + {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, + {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, + {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, + {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, + {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, + {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, + + {obj.AEND, nil, 0, opBytes{}}, + {0, nil, 0, opBytes{}}, +} + +var opindex [(ALAST + 1) & obj.AMask]*Optab + +// useAbs reports whether s describes a symbol that must avoid pc-relative addressing. +// This happens on systems like Solaris that call .so functions instead of system calls. +// It does not seem to be necessary for any other systems. This is probably working +// around a Solaris-specific bug that should be fixed differently, but we don't know +// what that bug is. And this does fix it. +func useAbs(ctxt *obj.Link, s *obj.LSym) bool { + if ctxt.Headtype == objabi.Hsolaris { + // All the Solaris dynamic imports from libc.so begin with "libc_". + return strings.HasPrefix(s.Name, "libc_") + } + return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared +} + +// single-instruction no-ops of various lengths. +// constructed by hand and disassembled with gdb to verify. +// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. +var nop = [][16]uint8{ + {0x90}, + {0x66, 0x90}, + {0x0F, 0x1F, 0x00}, + {0x0F, 0x1F, 0x40, 0x00}, + {0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, +} + +// Native Client rejects the repeated 0x66 prefix. +// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, +func fillnop(p []byte, n int) { + var m int + + for n > 0 { + m = n + if m > len(nop) { + m = len(nop) + } + copy(p[:m], nop[m-1][:m]) + p = p[m:] + n -= m + } +} + +func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { + s.Grow(int64(c) + int64(pad)) + fillnop(s.P[c:], int(pad)) + return c + pad +} + +func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { + if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { + return l + } + return q +} + +// isJump returns whether p is a jump instruction. +// It is used to ensure that no standalone or macro-fused jump will straddle +// or end on a 32 byte boundary by inserting NOPs before the jumps. +func isJump(p *obj.Prog) bool { + return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || + p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO +} + +// lookForJCC returns the first real instruction starting from p, if that instruction is a conditional +// jump. Otherwise, nil is returned. +func lookForJCC(p *obj.Prog) *obj.Prog { + // Skip any PCDATA, FUNCDATA or NOP instructions + var q *obj.Prog + for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { + } + + if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL { + return nil + } + + switch q.As { + case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, + AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: + default: + return nil + } + + return q +} + +// fusedJump determines whether p can be fused with a subsequent conditional jump instruction. +// If it can, we return true followed by the total size of the fused jump. If it can't, we return false. +// Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. +func fusedJump(p *obj.Prog) (bool, uint8) { + var fusedSize uint8 + + // The first instruction in a macro fused pair may be preceded by the LOCK prefix, + // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we + // need to be careful to insert any padding before the locks rather than directly after them. + + if p.As == AXRELEASE || p.As == AXACQUIRE { + fusedSize += p.Isize + for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { + } + if p == nil { + return false, 0 + } + } + if p.As == ALOCK { + fusedSize += p.Isize + for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { + } + if p == nil { + return false, 0 + } + } + cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW + + cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || + p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp + + testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || + p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW + + incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || + p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW + + if !cmpAddSub && !testAnd && !incDec { + return false, 0 + } + + if !incDec { + var argOne obj.AddrType + var argTwo obj.AddrType + if cmp { + argOne = p.From.Type + argTwo = p.To.Type + } else { + argOne = p.To.Type + argTwo = p.From.Type + } + if argOne == obj.TYPE_REG { + if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { + return false, 0 + } + } else if argOne == obj.TYPE_MEM { + if argTwo != obj.TYPE_REG { + return false, 0 + } + } else { + return false, 0 + } + } + + fusedSize += p.Isize + jmp := lookForJCC(p) + if jmp == nil { + return false, 0 + } + + fusedSize += jmp.Isize + + if testAnd { + return true, fusedSize + } + + if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || + jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { + return false, 0 + } + + if cmpAddSub { + return true, fusedSize + } + + if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { + return false, 0 + } + + return true, fusedSize +} + +type padJumpsCtx int32 + +func makePjcCtx(ctxt *obj.Link) padJumpsCtx { + // Disable jump padding on 32 bit builds by settting + // padJumps to 0. + if ctxt.Arch.Family == sys.I386 { + return padJumpsCtx(0) + } + + // Disable jump padding for hand written assembly code. + if ctxt.IsAsm { + return padJumpsCtx(0) + } + + return padJumpsCtx(32) +} + +// padJump detects whether the instruction being assembled is a standalone or a macro-fused +// jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does +// not cross or end on a 32 byte boundary. +func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { + if pjc == 0 { + return c + } + + var toPad int32 + fj, fjSize := fusedJump(p) + mask := int32(pjc - 1) + if fj { + if (c&mask)+int32(fjSize) >= int32(pjc) { + toPad = int32(pjc) - (c & mask) + } + } else if isJump(p) { + if (c&mask)+int32(p.Isize) >= int32(pjc) { + toPad = int32(pjc) - (c & mask) + } + } + if toPad <= 0 { + return c + } + + return noppad(ctxt, s, c, toPad) +} + +// reAssemble is called if an instruction's size changes during assembly. If +// it does and the instruction is a standalone or a macro-fused jump we need to +// reassemble. +func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { + if pjc == 0 { + return false + } + + fj, _ := fusedJump(p) + return fj || isJump(p) +} + +type nopPad struct { + p *obj.Prog // Instruction before the pad + n int32 // Size of the pad +} + +func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 { + ctxt.Diag("-spectre=ret not supported on 386") + ctxt.Retpoline = false // don't keep printing + } + + pjc := makePjcCtx(ctxt) + + if s.P != nil { + return + } + + if ycover[0] == 0 { + ctxt.Diag("x86 tables not initialized, call x86.instinit first") + } + + for p := s.Func().Text; p != nil; p = p.Link { + if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { + p.To.SetTarget(p) + } + if p.As == AADJSP { + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_SP + // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. + // One exception: It is smaller to encode $-0x80 than $0x80. + // For that case, flip the sign and the op: + // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. + switch v := p.From.Offset; { + case v == 0: + p.As = obj.ANOP + case v == 0x80 || (v < 0 && v != -0x80): + p.As = spadjop(ctxt, AADDL, AADDQ) + p.From.Offset *= -1 + default: + p.As = spadjop(ctxt, ASUBL, ASUBQ) + } + } + if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) { + if p.To.Type != obj.TYPE_REG { + ctxt.Diag("non-retpoline-compatible: %v", p) + continue + } + p.To.Type = obj.TYPE_BRANCH + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg))) + p.To.Reg = 0 + p.To.Offset = 0 + } + } + + var count int64 // rough count of number of instructions + for p := s.Func().Text; p != nil; p = p.Link { + count++ + p.Back = branchShort // use short branches first time through + if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { + p.Back |= branchBackwards + q.Back |= branchLoopHead + } + } + s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction + + var ab AsmBuf + var n int + var c int32 + errors := ctxt.Errors + var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) + nrelocs0 := len(s.R) + for { + // This loop continues while there are reasons to re-assemble + // whole block, like the presence of long forward jumps. + reAssemble := false + for i := range s.R[nrelocs0:] { + s.R[nrelocs0+i] = obj.Reloc{} + } + s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler + s.P = s.P[:0] + c = 0 + var pPrev *obj.Prog + nops = nops[:0] + for p := s.Func().Text; p != nil; p = p.Link { + c0 := c + c = pjc.padJump(ctxt, s, p, c) + + if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { + // pad with NOPs + v := -c & (loopAlign - 1) + + if v <= maxLoopPad { + s.Grow(int64(c) + int64(v)) + fillnop(s.P[c:], int(v)) + c += v + } + } + + p.Pc = int64(c) + + // process forward jumps to p + for q := p.Rel; q != nil; q = q.Forwd { + v := int32(p.Pc - (q.Pc + int64(q.Isize))) + if q.Back&branchShort != 0 { + if v > 127 { + reAssemble = true + q.Back ^= branchShort + } + + if q.As == AJCXZL || q.As == AXBEGIN { + s.P[q.Pc+2] = byte(v) + } else { + s.P[q.Pc+1] = byte(v) + } + } else { + binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) + } + } + + p.Rel = nil + + p.Pc = int64(c) + ab.asmins(ctxt, s, p) + m := ab.Len() + if int(p.Isize) != m { + p.Isize = uint8(m) + if pjc.reAssemble(p) { + // We need to re-assemble here to check for jumps and fused jumps + // that span or end on 32 byte boundaries. + reAssemble = true + } + } + + s.Grow(p.Pc + int64(m)) + copy(s.P[p.Pc:], ab.Bytes()) + // If there was padding, remember it. + if pPrev != nil && !ctxt.IsAsm && c > c0 { + nops = append(nops, nopPad{p: pPrev, n: c - c0}) + } + c += int32(m) + pPrev = p + } + + n++ + if n > 1000 { + ctxt.Diag("span must be looping") + log.Fatalf("loop") + } + if !reAssemble { + break + } + if ctxt.Errors > errors { + return + } + } + // splice padding nops into Progs + for _, n := range nops { + pp := n.p + np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} + pp.Link = np + } + + s.Size = int64(c) + + if false { /* debug['a'] > 1 */ + fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) + var i int + for i = 0; i < len(s.P); i++ { + fmt.Printf(" %.2x", s.P[i]) + if i%16 == 15 { + fmt.Printf("\n %.6x", uint(i+1)) + } + } + + if i%16 != 0 { + fmt.Printf("\n") + } + + for i := 0; i < len(s.R); i++ { + r := &s.R[i] + fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) + } + } + + // Mark nonpreemptible instruction sequences. + // The 2-instruction TLS access sequence + // MOVQ TLS, BX + // MOVQ 0(BX)(TLS*1), BX + // is not async preemptible, as if it is preempted and resumed on + // a different thread, the TLS address may become invalid. + if !CanUse1InsnTLS(ctxt) { + useTLS := func(p *obj.Prog) bool { + // Only need to mark the second instruction, which has + // REG_TLS as Index. (It is okay to interrupt and restart + // the first instruction.) + return p.From.Index == REG_TLS + } + obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) + } + + // Now that we know byte offsets, we can generate jump table entries. + // TODO: could this live in obj instead of obj/$ARCH? + for _, jt := range s.Func().JumpTables { + for i, p := range jt.Targets { + // The ith jumptable entry points to the p.Pc'th + // byte in the function symbol s. + jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc) + } + } +} + +func instinit(ctxt *obj.Link) { + if ycover[0] != 0 { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + switch ctxt.Headtype { + case objabi.Hplan9: + plan9privates = ctxt.Lookup("_privates") + } + + for i := range avxOptab { + c := avxOptab[i].as + if opindex[c&obj.AMask] != nil { + ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) + } + opindex[c&obj.AMask] = &avxOptab[i] + } + for i := 1; optab[i].as != 0; i++ { + c := optab[i].as + if opindex[c&obj.AMask] != nil { + ctxt.Diag("phase error in optab: %d (%v)", i, c) + } + opindex[c&obj.AMask] = &optab[i] + } + + for i := 0; i < Ymax; i++ { + ycover[i*Ymax+i] = 1 + } + + ycover[Yi0*Ymax+Yu2] = 1 + ycover[Yi1*Ymax+Yu2] = 1 + + ycover[Yi0*Ymax+Yi8] = 1 + ycover[Yi1*Ymax+Yi8] = 1 + ycover[Yu2*Ymax+Yi8] = 1 + ycover[Yu7*Ymax+Yi8] = 1 + + ycover[Yi0*Ymax+Yu7] = 1 + ycover[Yi1*Ymax+Yu7] = 1 + ycover[Yu2*Ymax+Yu7] = 1 + + ycover[Yi0*Ymax+Yu8] = 1 + ycover[Yi1*Ymax+Yu8] = 1 + ycover[Yu2*Ymax+Yu8] = 1 + ycover[Yu7*Ymax+Yu8] = 1 + + ycover[Yi0*Ymax+Ys32] = 1 + ycover[Yi1*Ymax+Ys32] = 1 + ycover[Yu2*Ymax+Ys32] = 1 + ycover[Yu7*Ymax+Ys32] = 1 + ycover[Yu8*Ymax+Ys32] = 1 + ycover[Yi8*Ymax+Ys32] = 1 + + ycover[Yi0*Ymax+Yi32] = 1 + ycover[Yi1*Ymax+Yi32] = 1 + ycover[Yu2*Ymax+Yi32] = 1 + ycover[Yu7*Ymax+Yi32] = 1 + ycover[Yu8*Ymax+Yi32] = 1 + ycover[Yi8*Ymax+Yi32] = 1 + ycover[Ys32*Ymax+Yi32] = 1 + + ycover[Yi0*Ymax+Yi64] = 1 + ycover[Yi1*Ymax+Yi64] = 1 + ycover[Yu7*Ymax+Yi64] = 1 + ycover[Yu2*Ymax+Yi64] = 1 + ycover[Yu8*Ymax+Yi64] = 1 + ycover[Yi8*Ymax+Yi64] = 1 + ycover[Ys32*Ymax+Yi64] = 1 + ycover[Yi32*Ymax+Yi64] = 1 + + ycover[Yal*Ymax+Yrb] = 1 + ycover[Ycl*Ymax+Yrb] = 1 + ycover[Yax*Ymax+Yrb] = 1 + ycover[Ycx*Ymax+Yrb] = 1 + ycover[Yrx*Ymax+Yrb] = 1 + ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 + + ycover[Ycl*Ymax+Ycx] = 1 + + ycover[Yax*Ymax+Yrx] = 1 + ycover[Ycx*Ymax+Yrx] = 1 + + ycover[Yax*Ymax+Yrl] = 1 + ycover[Ycx*Ymax+Yrl] = 1 + ycover[Yrx*Ymax+Yrl] = 1 + ycover[Yrl32*Ymax+Yrl] = 1 + + ycover[Yf0*Ymax+Yrf] = 1 + + ycover[Yal*Ymax+Ymb] = 1 + ycover[Ycl*Ymax+Ymb] = 1 + ycover[Yax*Ymax+Ymb] = 1 + ycover[Ycx*Ymax+Ymb] = 1 + ycover[Yrx*Ymax+Ymb] = 1 + ycover[Yrb*Ymax+Ymb] = 1 + ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 + ycover[Ym*Ymax+Ymb] = 1 + + ycover[Yax*Ymax+Yml] = 1 + ycover[Ycx*Ymax+Yml] = 1 + ycover[Yrx*Ymax+Yml] = 1 + ycover[Yrl*Ymax+Yml] = 1 + ycover[Yrl32*Ymax+Yml] = 1 + ycover[Ym*Ymax+Yml] = 1 + + ycover[Yax*Ymax+Ymm] = 1 + ycover[Ycx*Ymax+Ymm] = 1 + ycover[Yrx*Ymax+Ymm] = 1 + ycover[Yrl*Ymax+Ymm] = 1 + ycover[Yrl32*Ymax+Ymm] = 1 + ycover[Ym*Ymax+Ymm] = 1 + ycover[Ymr*Ymax+Ymm] = 1 + + ycover[Yxr0*Ymax+Yxr] = 1 + + ycover[Ym*Ymax+Yxm] = 1 + ycover[Yxr0*Ymax+Yxm] = 1 + ycover[Yxr*Ymax+Yxm] = 1 + + ycover[Ym*Ymax+Yym] = 1 + ycover[Yyr*Ymax+Yym] = 1 + + ycover[Yxr0*Ymax+YxrEvex] = 1 + ycover[Yxr*Ymax+YxrEvex] = 1 + + ycover[Ym*Ymax+YxmEvex] = 1 + ycover[Yxr0*Ymax+YxmEvex] = 1 + ycover[Yxr*Ymax+YxmEvex] = 1 + ycover[YxrEvex*Ymax+YxmEvex] = 1 + + ycover[Yyr*Ymax+YyrEvex] = 1 + + ycover[Ym*Ymax+YymEvex] = 1 + ycover[Yyr*Ymax+YymEvex] = 1 + ycover[YyrEvex*Ymax+YymEvex] = 1 + + ycover[Ym*Ymax+Yzm] = 1 + ycover[Yzr*Ymax+Yzm] = 1 + + ycover[Yk0*Ymax+Yk] = 1 + ycover[Yknot0*Ymax+Yk] = 1 + + ycover[Yk0*Ymax+Ykm] = 1 + ycover[Yknot0*Ymax+Ykm] = 1 + ycover[Yk*Ymax+Ykm] = 1 + ycover[Ym*Ymax+Ykm] = 1 + + ycover[Yxvm*Ymax+YxvmEvex] = 1 + + ycover[Yyvm*Ymax+YyvmEvex] = 1 + + for i := 0; i < MAXREG; i++ { + reg[i] = -1 + if i >= REG_AL && i <= REG_R15B { + reg[i] = (i - REG_AL) & 7 + if i >= REG_SPB && i <= REG_DIB { + regrex[i] = 0x40 + } + if i >= REG_R8B && i <= REG_R15B { + regrex[i] = Rxr | Rxx | Rxb + } + } + + if i >= REG_AH && i <= REG_BH { + reg[i] = 4 + ((i - REG_AH) & 7) + } + if i >= REG_AX && i <= REG_R15 { + reg[i] = (i - REG_AX) & 7 + if i >= REG_R8 { + regrex[i] = Rxr | Rxx | Rxb + } + } + + if i >= REG_F0 && i <= REG_F0+7 { + reg[i] = (i - REG_F0) & 7 + } + if i >= REG_M0 && i <= REG_M0+7 { + reg[i] = (i - REG_M0) & 7 + } + if i >= REG_K0 && i <= REG_K0+7 { + reg[i] = (i - REG_K0) & 7 + } + if i >= REG_X0 && i <= REG_X0+15 { + reg[i] = (i - REG_X0) & 7 + if i >= REG_X0+8 { + regrex[i] = Rxr | Rxx | Rxb + } + } + if i >= REG_X16 && i <= REG_X16+15 { + reg[i] = (i - REG_X16) & 7 + if i >= REG_X16+8 { + regrex[i] = Rxr | Rxx | Rxb | RxrEvex + } else { + regrex[i] = RxrEvex + } + } + if i >= REG_Y0 && i <= REG_Y0+15 { + reg[i] = (i - REG_Y0) & 7 + if i >= REG_Y0+8 { + regrex[i] = Rxr | Rxx | Rxb + } + } + if i >= REG_Y16 && i <= REG_Y16+15 { + reg[i] = (i - REG_Y16) & 7 + if i >= REG_Y16+8 { + regrex[i] = Rxr | Rxx | Rxb | RxrEvex + } else { + regrex[i] = RxrEvex + } + } + if i >= REG_Z0 && i <= REG_Z0+15 { + reg[i] = (i - REG_Z0) & 7 + if i > REG_Z0+7 { + regrex[i] = Rxr | Rxx | Rxb + } + } + if i >= REG_Z16 && i <= REG_Z16+15 { + reg[i] = (i - REG_Z16) & 7 + if i >= REG_Z16+8 { + regrex[i] = Rxr | Rxx | Rxb | RxrEvex + } else { + regrex[i] = RxrEvex + } + } + + if i >= REG_CR+8 && i <= REG_CR+15 { + regrex[i] = Rxr + } + } +} + +var isAndroid = buildcfg.GOOS == "android" + +func prefixof(ctxt *obj.Link, a *obj.Addr) int { + if a.Reg < REG_CS && a.Index < REG_CS { // fast path + return 0 + } + if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { + switch a.Reg { + case REG_CS: + return 0x2e + + case REG_DS: + return 0x3e + + case REG_ES: + return 0x26 + + case REG_FS: + return 0x64 + + case REG_GS: + return 0x65 + + case REG_TLS: + // NOTE: Systems listed here should be only systems that + // support direct TLS references like 8(TLS) implemented as + // direct references from FS or GS. Systems that require + // the initial-exec model, where you load the TLS base into + // a register and then index from that register, do not reach + // this code and should not be listed. + if ctxt.Arch.Family == sys.I386 { + switch ctxt.Headtype { + default: + if isAndroid { + return 0x65 // GS + } + log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) + + case objabi.Hdarwin, + objabi.Hdragonfly, + objabi.Hfreebsd, + objabi.Hnetbsd, + objabi.Hopenbsd: + return 0x65 // GS + } + } + + switch ctxt.Headtype { + default: + log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) + + case objabi.Hlinux: + if isAndroid { + return 0x64 // FS + } + + if ctxt.Flag_shared { + log.Fatalf("unknown TLS base register for linux with -shared") + } else { + return 0x64 // FS + } + + case objabi.Hdragonfly, + objabi.Hfreebsd, + objabi.Hnetbsd, + objabi.Hopenbsd, + objabi.Hsolaris: + return 0x64 // FS + + case objabi.Hdarwin: + return 0x65 // GS + } + } + } + + if ctxt.Arch.Family == sys.I386 { + if a.Index == REG_TLS && ctxt.Flag_shared { + // When building for inclusion into a shared library, an instruction of the form + // MOVL off(CX)(TLS*1), AX + // becomes + // mov %gs:off(%ecx), %eax + // which assumes that the correct TLS offset has been loaded into %ecx (today + // there is only one TLS variable -- g -- so this is OK). When not building for + // a shared library the instruction it becomes + // mov 0x0(%ecx), %eax + // and a R_TLS_LE relocation, and so does not require a prefix. + return 0x65 // GS + } + return 0 + } + + switch a.Index { + case REG_CS: + return 0x2e + + case REG_DS: + return 0x3e + + case REG_ES: + return 0x26 + + case REG_TLS: + if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { + // When building for inclusion into a shared library, an instruction of the form + // MOV off(CX)(TLS*1), AX + // becomes + // mov %fs:off(%rcx), %rax + // which assumes that the correct TLS offset has been loaded into %rcx (today + // there is only one TLS variable -- g -- so this is OK). When not building for + // a shared library the instruction does not require a prefix. + return 0x64 + } + + case REG_FS: + return 0x64 + + case REG_GS: + return 0x65 + } + + return 0 +} + +// oclassRegList returns multisource operand class for addr. +func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { + // TODO(quasilyte): when oclass register case is refactored into + // lookup table, use it here to get register kind more easily. + // Helper functions like regIsXmm should go away too (they will become redundant). + + regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } + regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } + regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } + + reg0, reg1 := decodeRegisterRange(addr.Offset) + low := regIndex(int16(reg0)) + high := regIndex(int16(reg1)) + + if ctxt.Arch.Family == sys.I386 { + if low >= 8 || high >= 8 { + return Yxxx + } + } + + switch high - low { + case 3: + switch { + case regIsXmm(reg0) && regIsXmm(reg1): + return YxrEvexMulti4 + case regIsYmm(reg0) && regIsYmm(reg1): + return YyrEvexMulti4 + case regIsZmm(reg0) && regIsZmm(reg1): + return YzrMulti4 + default: + return Yxxx + } + default: + return Yxxx + } +} + +// oclassVMem returns V-mem (vector memory with VSIB) operand class. +// For addr that is not V-mem returns (Yxxx, false). +func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { + switch addr.Index { + case REG_X0 + 0, + REG_X0 + 1, + REG_X0 + 2, + REG_X0 + 3, + REG_X0 + 4, + REG_X0 + 5, + REG_X0 + 6, + REG_X0 + 7: + return Yxvm, true + case REG_X8 + 0, + REG_X8 + 1, + REG_X8 + 2, + REG_X8 + 3, + REG_X8 + 4, + REG_X8 + 5, + REG_X8 + 6, + REG_X8 + 7: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return Yxvm, true + case REG_X16 + 0, + REG_X16 + 1, + REG_X16 + 2, + REG_X16 + 3, + REG_X16 + 4, + REG_X16 + 5, + REG_X16 + 6, + REG_X16 + 7, + REG_X16 + 8, + REG_X16 + 9, + REG_X16 + 10, + REG_X16 + 11, + REG_X16 + 12, + REG_X16 + 13, + REG_X16 + 14, + REG_X16 + 15: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return YxvmEvex, true + + case REG_Y0 + 0, + REG_Y0 + 1, + REG_Y0 + 2, + REG_Y0 + 3, + REG_Y0 + 4, + REG_Y0 + 5, + REG_Y0 + 6, + REG_Y0 + 7: + return Yyvm, true + case REG_Y8 + 0, + REG_Y8 + 1, + REG_Y8 + 2, + REG_Y8 + 3, + REG_Y8 + 4, + REG_Y8 + 5, + REG_Y8 + 6, + REG_Y8 + 7: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return Yyvm, true + case REG_Y16 + 0, + REG_Y16 + 1, + REG_Y16 + 2, + REG_Y16 + 3, + REG_Y16 + 4, + REG_Y16 + 5, + REG_Y16 + 6, + REG_Y16 + 7, + REG_Y16 + 8, + REG_Y16 + 9, + REG_Y16 + 10, + REG_Y16 + 11, + REG_Y16 + 12, + REG_Y16 + 13, + REG_Y16 + 14, + REG_Y16 + 15: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return YyvmEvex, true + + case REG_Z0 + 0, + REG_Z0 + 1, + REG_Z0 + 2, + REG_Z0 + 3, + REG_Z0 + 4, + REG_Z0 + 5, + REG_Z0 + 6, + REG_Z0 + 7: + return Yzvm, true + case REG_Z8 + 0, + REG_Z8 + 1, + REG_Z8 + 2, + REG_Z8 + 3, + REG_Z8 + 4, + REG_Z8 + 5, + REG_Z8 + 6, + REG_Z8 + 7, + REG_Z8 + 8, + REG_Z8 + 9, + REG_Z8 + 10, + REG_Z8 + 11, + REG_Z8 + 12, + REG_Z8 + 13, + REG_Z8 + 14, + REG_Z8 + 15, + REG_Z8 + 16, + REG_Z8 + 17, + REG_Z8 + 18, + REG_Z8 + 19, + REG_Z8 + 20, + REG_Z8 + 21, + REG_Z8 + 22, + REG_Z8 + 23: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return Yzvm, true + } + + return Yxxx, false +} + +func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { + switch a.Type { + case obj.TYPE_REGLIST: + return oclassRegList(ctxt, a) + + case obj.TYPE_NONE: + return Ynone + + case obj.TYPE_BRANCH: + return Ybr + + case obj.TYPE_INDIR: + if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { + return Yindir + } + return Yxxx + + case obj.TYPE_MEM: + // Pseudo registers have negative index, but SP is + // not pseudo on x86, hence REG_SP check is not redundant. + if a.Index == REG_SP || a.Index < 0 { + // Can't use FP/SB/PC/SP as the index register. + return Yxxx + } + + if vmem, ok := oclassVMem(ctxt, a); ok { + return vmem + } + + if ctxt.Arch.Family == sys.AMD64 { + switch a.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: + // Global variables can't use index registers and their + // base register is %rip (%rip is encoded as REG_NONE). + if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { + return Yxxx + } + case obj.NAME_AUTO, obj.NAME_PARAM: + // These names must have a base of SP. The old compiler + // uses 0 for the base register. SSA uses REG_SP. + if a.Reg != REG_SP && a.Reg != 0 { + return Yxxx + } + case obj.NAME_NONE: + // everything is ok + default: + // unknown name + return Yxxx + } + } + return Ym + + case obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_GOTREF: + ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") + return Yxxx + + case obj.NAME_EXTERN, + obj.NAME_STATIC: + if a.Sym != nil && useAbs(ctxt, a.Sym) { + return Yi32 + } + return Yiauto // use pc-relative addressing + + case obj.NAME_AUTO, + obj.NAME_PARAM: + return Yiauto + } + + // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index + // and got Yi32 in an earlier version of this code. + // Keep doing that until we fix yduff etc. + if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { + return Yi32 + } + + if a.Sym != nil || a.Name != obj.NAME_NONE { + ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) + } + fallthrough + + case obj.TYPE_CONST: + if a.Sym != nil { + ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) + } + + v := a.Offset + if ctxt.Arch.Family == sys.I386 { + v = int64(int32(v)) + } + switch { + case v == 0: + return Yi0 + case v == 1: + return Yi1 + case v >= 0 && v <= 3: + return Yu2 + case v >= 0 && v <= 127: + return Yu7 + case v >= 0 && v <= 255: + return Yu8 + case v >= -128 && v <= 127: + return Yi8 + } + if ctxt.Arch.Family == sys.I386 { + return Yi32 + } + l := int32(v) + if int64(l) == v { + return Ys32 // can sign extend + } + if v>>32 == 0 { + return Yi32 // unsigned + } + return Yi64 + + case obj.TYPE_TEXTSIZE: + return Ytextsize + } + + if a.Type != obj.TYPE_REG { + ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) + return Yxxx + } + + switch a.Reg { + case REG_AL: + return Yal + + case REG_AX: + return Yax + + /* + case REG_SPB: + */ + case REG_BPB, + REG_SIB, + REG_DIB, + REG_R8B, + REG_R9B, + REG_R10B, + REG_R11B, + REG_R12B, + REG_R13B, + REG_R14B, + REG_R15B: + if ctxt.Arch.Family == sys.I386 { + return Yxxx + } + fallthrough + + case REG_DL, + REG_BL, + REG_AH, + REG_CH, + REG_DH, + REG_BH: + return Yrb + + case REG_CL: + return Ycl + + case REG_CX: + return Ycx + + case REG_DX, REG_BX: + return Yrx + + case REG_R8, // not really Yrl + REG_R9, + REG_R10, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15: + if ctxt.Arch.Family == sys.I386 { + return Yxxx + } + fallthrough + + case REG_SP, REG_BP, REG_SI, REG_DI: + if ctxt.Arch.Family == sys.I386 { + return Yrl32 + } + return Yrl + + case REG_F0 + 0: + return Yf0 + + case REG_F0 + 1, + REG_F0 + 2, + REG_F0 + 3, + REG_F0 + 4, + REG_F0 + 5, + REG_F0 + 6, + REG_F0 + 7: + return Yrf + + case REG_M0 + 0, + REG_M0 + 1, + REG_M0 + 2, + REG_M0 + 3, + REG_M0 + 4, + REG_M0 + 5, + REG_M0 + 6, + REG_M0 + 7: + return Ymr + + case REG_X0: + return Yxr0 + + case REG_X0 + 1, + REG_X0 + 2, + REG_X0 + 3, + REG_X0 + 4, + REG_X0 + 5, + REG_X0 + 6, + REG_X0 + 7, + REG_X0 + 8, + REG_X0 + 9, + REG_X0 + 10, + REG_X0 + 11, + REG_X0 + 12, + REG_X0 + 13, + REG_X0 + 14, + REG_X0 + 15: + return Yxr + + case REG_X0 + 16, + REG_X0 + 17, + REG_X0 + 18, + REG_X0 + 19, + REG_X0 + 20, + REG_X0 + 21, + REG_X0 + 22, + REG_X0 + 23, + REG_X0 + 24, + REG_X0 + 25, + REG_X0 + 26, + REG_X0 + 27, + REG_X0 + 28, + REG_X0 + 29, + REG_X0 + 30, + REG_X0 + 31: + return YxrEvex + + case REG_Y0 + 0, + REG_Y0 + 1, + REG_Y0 + 2, + REG_Y0 + 3, + REG_Y0 + 4, + REG_Y0 + 5, + REG_Y0 + 6, + REG_Y0 + 7, + REG_Y0 + 8, + REG_Y0 + 9, + REG_Y0 + 10, + REG_Y0 + 11, + REG_Y0 + 12, + REG_Y0 + 13, + REG_Y0 + 14, + REG_Y0 + 15: + return Yyr + + case REG_Y0 + 16, + REG_Y0 + 17, + REG_Y0 + 18, + REG_Y0 + 19, + REG_Y0 + 20, + REG_Y0 + 21, + REG_Y0 + 22, + REG_Y0 + 23, + REG_Y0 + 24, + REG_Y0 + 25, + REG_Y0 + 26, + REG_Y0 + 27, + REG_Y0 + 28, + REG_Y0 + 29, + REG_Y0 + 30, + REG_Y0 + 31: + return YyrEvex + + case REG_Z0 + 0, + REG_Z0 + 1, + REG_Z0 + 2, + REG_Z0 + 3, + REG_Z0 + 4, + REG_Z0 + 5, + REG_Z0 + 6, + REG_Z0 + 7: + return Yzr + + case REG_Z0 + 8, + REG_Z0 + 9, + REG_Z0 + 10, + REG_Z0 + 11, + REG_Z0 + 12, + REG_Z0 + 13, + REG_Z0 + 14, + REG_Z0 + 15, + REG_Z0 + 16, + REG_Z0 + 17, + REG_Z0 + 18, + REG_Z0 + 19, + REG_Z0 + 20, + REG_Z0 + 21, + REG_Z0 + 22, + REG_Z0 + 23, + REG_Z0 + 24, + REG_Z0 + 25, + REG_Z0 + 26, + REG_Z0 + 27, + REG_Z0 + 28, + REG_Z0 + 29, + REG_Z0 + 30, + REG_Z0 + 31: + if ctxt.Arch.Family == sys.I386 { + return Yxxx + } + return Yzr + + case REG_K0: + return Yk0 + + case REG_K0 + 1, + REG_K0 + 2, + REG_K0 + 3, + REG_K0 + 4, + REG_K0 + 5, + REG_K0 + 6, + REG_K0 + 7: + return Yknot0 + + case REG_CS: + return Ycs + case REG_SS: + return Yss + case REG_DS: + return Yds + case REG_ES: + return Yes + case REG_FS: + return Yfs + case REG_GS: + return Ygs + case REG_TLS: + return Ytls + + case REG_GDTR: + return Ygdtr + case REG_IDTR: + return Yidtr + case REG_LDTR: + return Yldtr + case REG_MSW: + return Ymsw + case REG_TASK: + return Ytask + + case REG_CR + 0: + return Ycr0 + case REG_CR + 1: + return Ycr1 + case REG_CR + 2: + return Ycr2 + case REG_CR + 3: + return Ycr3 + case REG_CR + 4: + return Ycr4 + case REG_CR + 5: + return Ycr5 + case REG_CR + 6: + return Ycr6 + case REG_CR + 7: + return Ycr7 + case REG_CR + 8: + return Ycr8 + + case REG_DR + 0: + return Ydr0 + case REG_DR + 1: + return Ydr1 + case REG_DR + 2: + return Ydr2 + case REG_DR + 3: + return Ydr3 + case REG_DR + 4: + return Ydr4 + case REG_DR + 5: + return Ydr5 + case REG_DR + 6: + return Ydr6 + case REG_DR + 7: + return Ydr7 + + case REG_TR + 0: + return Ytr0 + case REG_TR + 1: + return Ytr1 + case REG_TR + 2: + return Ytr2 + case REG_TR + 3: + return Ytr3 + case REG_TR + 4: + return Ytr4 + case REG_TR + 5: + return Ytr5 + case REG_TR + 6: + return Ytr6 + case REG_TR + 7: + return Ytr7 + } + + return Yxxx +} + +// AsmBuf is a simple buffer to assemble variable-length x86 instructions into +// and hold assembly state. +type AsmBuf struct { + buf [100]byte + off int + rexflag int + vexflag bool // Per inst: true for VEX-encoded + evexflag bool // Per inst: true for EVEX-encoded + rep bool + repn bool + lock bool + + evex evexBits // Initialized when evexflag is true +} + +// Put1 appends one byte to the end of the buffer. +func (ab *AsmBuf) Put1(x byte) { + ab.buf[ab.off] = x + ab.off++ +} + +// Put2 appends two bytes to the end of the buffer. +func (ab *AsmBuf) Put2(x, y byte) { + ab.buf[ab.off+0] = x + ab.buf[ab.off+1] = y + ab.off += 2 +} + +// Put3 appends three bytes to the end of the buffer. +func (ab *AsmBuf) Put3(x, y, z byte) { + ab.buf[ab.off+0] = x + ab.buf[ab.off+1] = y + ab.buf[ab.off+2] = z + ab.off += 3 +} + +// Put4 appends four bytes to the end of the buffer. +func (ab *AsmBuf) Put4(x, y, z, w byte) { + ab.buf[ab.off+0] = x + ab.buf[ab.off+1] = y + ab.buf[ab.off+2] = z + ab.buf[ab.off+3] = w + ab.off += 4 +} + +// PutInt16 writes v into the buffer using little-endian encoding. +func (ab *AsmBuf) PutInt16(v int16) { + ab.buf[ab.off+0] = byte(v) + ab.buf[ab.off+1] = byte(v >> 8) + ab.off += 2 +} + +// PutInt32 writes v into the buffer using little-endian encoding. +func (ab *AsmBuf) PutInt32(v int32) { + ab.buf[ab.off+0] = byte(v) + ab.buf[ab.off+1] = byte(v >> 8) + ab.buf[ab.off+2] = byte(v >> 16) + ab.buf[ab.off+3] = byte(v >> 24) + ab.off += 4 +} + +// PutInt64 writes v into the buffer using little-endian encoding. +func (ab *AsmBuf) PutInt64(v int64) { + ab.buf[ab.off+0] = byte(v) + ab.buf[ab.off+1] = byte(v >> 8) + ab.buf[ab.off+2] = byte(v >> 16) + ab.buf[ab.off+3] = byte(v >> 24) + ab.buf[ab.off+4] = byte(v >> 32) + ab.buf[ab.off+5] = byte(v >> 40) + ab.buf[ab.off+6] = byte(v >> 48) + ab.buf[ab.off+7] = byte(v >> 56) + ab.off += 8 +} + +// Put copies b into the buffer. +func (ab *AsmBuf) Put(b []byte) { + copy(ab.buf[ab.off:], b) + ab.off += len(b) +} + +// PutOpBytesLit writes zero terminated sequence of bytes from op, +// starting at specified offset (e.g. z counter value). +// Trailing 0 is not written. +// +// Intended to be used for literal Z cases. +// Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). +func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { + for int(op[offset]) != 0 { + ab.Put1(byte(op[offset])) + offset++ + } +} + +// Insert inserts b at offset i. +func (ab *AsmBuf) Insert(i int, b byte) { + ab.off++ + copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) + ab.buf[i] = b +} + +// Last returns the byte at the end of the buffer. +func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } + +// Len returns the length of the buffer. +func (ab *AsmBuf) Len() int { return ab.off } + +// Bytes returns the contents of the buffer. +func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } + +// Reset empties the buffer. +func (ab *AsmBuf) Reset() { ab.off = 0 } + +// At returns the byte at offset i. +func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } + +// asmidx emits SIB byte. +func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { + var i int + + // X/Y index register is used in VSIB. + switch index { + default: + goto bad + + case REG_NONE: + i = 4 << 3 + goto bas + + case REG_R8, + REG_R9, + REG_R10, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15, + REG_X8, + REG_X9, + REG_X10, + REG_X11, + REG_X12, + REG_X13, + REG_X14, + REG_X15, + REG_X16, + REG_X17, + REG_X18, + REG_X19, + REG_X20, + REG_X21, + REG_X22, + REG_X23, + REG_X24, + REG_X25, + REG_X26, + REG_X27, + REG_X28, + REG_X29, + REG_X30, + REG_X31, + REG_Y8, + REG_Y9, + REG_Y10, + REG_Y11, + REG_Y12, + REG_Y13, + REG_Y14, + REG_Y15, + REG_Y16, + REG_Y17, + REG_Y18, + REG_Y19, + REG_Y20, + REG_Y21, + REG_Y22, + REG_Y23, + REG_Y24, + REG_Y25, + REG_Y26, + REG_Y27, + REG_Y28, + REG_Y29, + REG_Y30, + REG_Y31, + REG_Z8, + REG_Z9, + REG_Z10, + REG_Z11, + REG_Z12, + REG_Z13, + REG_Z14, + REG_Z15, + REG_Z16, + REG_Z17, + REG_Z18, + REG_Z19, + REG_Z20, + REG_Z21, + REG_Z22, + REG_Z23, + REG_Z24, + REG_Z25, + REG_Z26, + REG_Z27, + REG_Z28, + REG_Z29, + REG_Z30, + REG_Z31: + if ctxt.Arch.Family == sys.I386 { + goto bad + } + fallthrough + + case REG_AX, + REG_CX, + REG_DX, + REG_BX, + REG_BP, + REG_SI, + REG_DI, + REG_X0, + REG_X1, + REG_X2, + REG_X3, + REG_X4, + REG_X5, + REG_X6, + REG_X7, + REG_Y0, + REG_Y1, + REG_Y2, + REG_Y3, + REG_Y4, + REG_Y5, + REG_Y6, + REG_Y7, + REG_Z0, + REG_Z1, + REG_Z2, + REG_Z3, + REG_Z4, + REG_Z5, + REG_Z6, + REG_Z7: + i = reg[index] << 3 + } + + switch scale { + default: + goto bad + + case 1: + break + + case 2: + i |= 1 << 6 + + case 4: + i |= 2 << 6 + + case 8: + i |= 3 << 6 + } + +bas: + switch base { + default: + goto bad + + case REG_NONE: // must be mod=00 + i |= 5 + + case REG_R8, + REG_R9, + REG_R10, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15: + if ctxt.Arch.Family == sys.I386 { + goto bad + } + fallthrough + + case REG_AX, + REG_CX, + REG_DX, + REG_BX, + REG_SP, + REG_BP, + REG_SI, + REG_DI: + i |= reg[base] + } + + ab.Put1(byte(i)) + return + +bad: + ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) + ab.Put1(0) +} + +func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { + var rel obj.Reloc + + v := vaddr(ctxt, p, a, &rel) + if rel.Siz != 0 { + if rel.Siz != 4 { + ctxt.Diag("bad reloc") + } + r := obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.PutInt32(int32(v)) +} + +func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { + if r != nil { + *r = obj.Reloc{} + } + + switch a.Name { + case obj.NAME_STATIC, + obj.NAME_GOTREF, + obj.NAME_EXTERN: + s := a.Sym + if r == nil { + ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) + log.Fatalf("reloc") + } + + if a.Name == obj.NAME_GOTREF { + r.Siz = 4 + r.Type = objabi.R_GOTPCREL + } else if useAbs(ctxt, s) { + r.Siz = 4 + r.Type = objabi.R_ADDR + } else { + r.Siz = 4 + r.Type = objabi.R_PCREL + } + + r.Off = -1 // caller must fill in + r.Sym = s + r.Add = a.Offset + + return 0 + } + + if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { + if r == nil { + ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) + log.Fatalf("reloc") + } + + if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { + r.Type = objabi.R_TLS_LE + r.Siz = 4 + r.Off = -1 // caller must fill in + r.Add = a.Offset + } + return 0 + } + + return a.Offset +} + +func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { + var base int + var rel obj.Reloc + + rex &= 0x40 | Rxr + if a.Offset != int64(int32(a.Offset)) { + // The rules are slightly different for 386 and AMD64, + // mostly for historical reasons. We may unify them later, + // but it must be discussed beforehand. + // + // For 64bit mode only LEAL is allowed to overflow. + // It's how https://golang.org/cl/59630 made it. + // crypto/sha1/sha1block_amd64.s depends on this feature. + // + // For 32bit mode rules are more permissive. + // If offset fits uint32, it's permitted. + // This is allowed for assembly that wants to use 32-bit hex + // constants, e.g. LEAL 0x99999999(AX), AX. + overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || + (ctxt.Arch.Family != sys.AMD64 && + int64(uint32(a.Offset)) == a.Offset && + ab.rexflag&Rxw == 0) + if !overflowOK { + ctxt.Diag("offset too large in %s", p) + } + } + v := int32(a.Offset) + rel.Siz = 0 + + switch a.Type { + case obj.TYPE_ADDR: + if a.Name == obj.NAME_NONE { + ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") + } + if a.Index == REG_TLS { + ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") + } + goto bad + + case obj.TYPE_REG: + const regFirst = REG_AL + const regLast = REG_Z31 + if a.Reg < regFirst || regLast < a.Reg { + goto bad + } + if v != 0 { + goto bad + } + ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) + ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex + return + } + + if a.Type != obj.TYPE_MEM { + goto bad + } + + if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) { + base := int(a.Reg) + switch a.Name { + case obj.NAME_EXTERN, + obj.NAME_GOTREF, + obj.NAME_STATIC: + if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { + goto bad + } + if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { + // The base register has already been set. It holds the PC + // of this instruction returned by a PC-reading thunk. + // See obj6.go:rewriteToPcrel. + } else { + base = REG_NONE + } + v = int32(vaddr(ctxt, p, a, &rel)) + + case obj.NAME_AUTO, + obj.NAME_PARAM: + base = REG_SP + } + + ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex + if base == REG_NONE { + ab.Put1(byte(0<<6 | 4<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) + goto putrelv + } + + if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { + ab.Put1(byte(0<<6 | 4<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) + return + } + + if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { + ab.Put1(byte(1<<6 | 4<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) + ab.Put1(disp8) + return + } + + ab.Put1(byte(2<<6 | 4<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) + goto putrelv + } + + base = int(a.Reg) + switch a.Name { + case obj.NAME_STATIC, + obj.NAME_GOTREF, + obj.NAME_EXTERN: + if a.Sym == nil { + ctxt.Diag("bad addr: %v", p) + } + if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { + // The base register has already been set. It holds the PC + // of this instruction returned by a PC-reading thunk. + // See obj6.go:rewriteToPcrel. + } else { + base = REG_NONE + } + v = int32(vaddr(ctxt, p, a, &rel)) + + case obj.NAME_AUTO, + obj.NAME_PARAM: + base = REG_SP + } + + if base == REG_TLS { + v = int32(vaddr(ctxt, p, a, &rel)) + } + + ab.rexflag |= regrex[base]&Rxb | rex + if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { + if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { + if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { + ctxt.Diag("%v has offset against gotref", p) + } + ab.Put1(byte(0<<6 | 5<<0 | r<<3)) + goto putrelv + } + + // temporary + ab.Put2( + byte(0<<6|4<<0|r<<3), // sib present + 0<<6|4<<3|5<<0, // DS:d32 + ) + goto putrelv + } + + if base == REG_SP || base == REG_R12 { + if v == 0 { + ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) + return + } + + if disp8, ok := toDisp8(v, p, ab); ok { + ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) + ab.Put1(disp8) + return + } + + ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) + goto putrelv + } + + if REG_AX <= base && base <= REG_R15 { + if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid && + !(ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64) { + rel = obj.Reloc{} + rel.Type = objabi.R_TLS_LE + rel.Siz = 4 + rel.Sym = nil + rel.Add = int64(v) + v = 0 + } + + if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { + ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) + return + } + + if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { + ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) + return + } + + ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) + goto putrelv + } + + goto bad + +putrelv: + if rel.Siz != 0 { + if rel.Siz != 4 { + ctxt.Diag("bad rel") + goto bad + } + + r := obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.PutInt32(v) + return + +bad: + ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) +} + +func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { + ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) +} + +func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { + ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) +} + +func bytereg(a *obj.Addr, t *uint8) { + if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { + a.Reg += REG_AL - REG_AX + *t = 0 + } +} + +func unbytereg(a *obj.Addr, t *uint8) { + if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { + a.Reg += REG_AX - REG_AL + *t = 0 + } +} + +const ( + movLit uint8 = iota // Like Zlit + movRegMem + movMemReg + movRegMem2op + movMemReg2op + movFullPtr // Load full pointer, trash heap (unsupported) + movDoubleShift + movTLSReg +) + +var ymovtab = []movtab{ + // push + {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, + {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, + {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, + {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, + {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, + {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, + {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, + {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, + {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, + {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, + {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, + {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, + {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, + {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, + + // pop + {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, + {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, + {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, + {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, + {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, + {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, + {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, + {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, + {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, + {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, + {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, + {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, + + // mov seg + {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, + {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, + {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, + {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, + {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, + {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, + {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, + {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, + {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, + {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, + {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, + {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, + + // mov cr + {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, + {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, + {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, + {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, + {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, + {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, + {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, + {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, + {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, + {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, + {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, + {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, + {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, + {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, + {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, + {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, + {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, + {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, + {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, + {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, + + // mov dr + {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, + {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, + {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, + {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, + {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, + {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, + {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, + {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, + {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, + {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, + {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, + {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, + {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, + {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, + {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, + {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, + + // mov tr + {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, + {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, + {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, + {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, + + // lgdt, sgdt, lidt, sidt + {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, + {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, + {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, + {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, + {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, + {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, + {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, + {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, + + // lldt, sldt + {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, + {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, + + // lmsw, smsw + {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, + {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, + + // ltr, str + {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, + {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, + + /* load full pointer - unsupported + {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, + {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, + */ + + // double shift + {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, + {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, + {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, + {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, + {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, + {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, + {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, + {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, + {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, + {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, + {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, + {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, + {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, + {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, + {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, + {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, + {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, + {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, + + // load TLS base + {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, + {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, + {0, 0, 0, 0, 0, [4]uint8{}}, +} + +func isax(a *obj.Addr) bool { + switch a.Reg { + case REG_AX, REG_AL, REG_AH: + return true + } + + return a.Index == REG_AX +} + +func subreg(p *obj.Prog, from int, to int) { + if false { /* debug['Q'] */ + fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) + } + + if int(p.From.Reg) == from { + p.From.Reg = int16(to) + p.Ft = 0 + } + + if int(p.To.Reg) == from { + p.To.Reg = int16(to) + p.Tt = 0 + } + + if int(p.From.Index) == from { + p.From.Index = int16(to) + p.Ft = 0 + } + + if int(p.To.Index) == from { + p.To.Index = int16(to) + p.Tt = 0 + } + + if false { /* debug['Q'] */ + fmt.Printf("%v\n", p) + } +} + +func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { + switch op { + case Pm, Pe, Pf2, Pf3: + if osize != 1 { + if op != Pm { + ab.Put1(byte(op)) + } + ab.Put1(Pm) + z++ + op = int(o.op[z]) + break + } + fallthrough + + default: + if ab.Len() == 0 || ab.Last() != Pm { + ab.Put1(Pm) + } + } + + ab.Put1(byte(op)) + return z +} + +var bpduff1 = []byte{ + 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) + 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP +} + +var bpduff2 = []byte{ + 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP +} + +// asmevex emits EVEX pregis and opcode byte. +// In addition to asmvex r/m, vvvv and reg fields also requires optional +// K-masking register. +// +// Expects asmbuf.evex to be properly initialized. +func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { + ab.evexflag = true + evex := ab.evex + + rexR := byte(1) + evexR := byte(1) + rexX := byte(1) + rexB := byte(1) + if r != nil { + if regrex[r.Reg]&Rxr != 0 { + rexR = 0 // "ModR/M.reg" selector 4th bit. + } + if regrex[r.Reg]&RxrEvex != 0 { + evexR = 0 // "ModR/M.reg" selector 5th bit. + } + } + if rm != nil { + if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { + rexX = 0 + } else if regrex[rm.Index]&Rxx != 0 { + rexX = 0 + } + if regrex[rm.Reg]&Rxb != 0 { + rexB = 0 + } + } + // P0 = [R][X][B][R'][00][mm] + p0 := (rexR << 7) | + (rexX << 6) | + (rexB << 5) | + (evexR << 4) | + (0 << 2) | + (evex.M() << 0) + + vexV := byte(0) + if v != nil { + // 4bit-wide reg index. + vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF + } + vexV ^= 0x0F + // P1 = [W][vvvv][1][pp] + p1 := (evex.W() << 7) | + (vexV << 3) | + (1 << 2) | + (evex.P() << 0) + + suffix := evexSuffixMap[p.Scond] + evexZ := byte(0) + evexLL := evex.L() + evexB := byte(0) + evexV := byte(1) + evexA := byte(0) + if suffix.zeroing { + if !evex.ZeroingEnabled() { + ctxt.Diag("unsupported zeroing: %v", p) + } + evexZ = 1 + } + switch { + case suffix.rounding != rcUnset: + if rm != nil && rm.Type == obj.TYPE_MEM { + ctxt.Diag("illegal rounding with memory argument: %v", p) + } else if !evex.RoundingEnabled() { + ctxt.Diag("unsupported rounding: %v", p) + } + evexB = 1 + evexLL = suffix.rounding + case suffix.broadcast: + if rm == nil || rm.Type != obj.TYPE_MEM { + ctxt.Diag("illegal broadcast without memory argument: %v", p) + } else if !evex.BroadcastEnabled() { + ctxt.Diag("unsupported broadcast: %v", p) + } + evexB = 1 + case suffix.sae: + if rm != nil && rm.Type == obj.TYPE_MEM { + ctxt.Diag("illegal SAE with memory argument: %v", p) + } else if !evex.SaeEnabled() { + ctxt.Diag("unsupported SAE: %v", p) + } + evexB = 1 + } + if rm != nil && regrex[rm.Index]&RxrEvex != 0 { + evexV = 0 + } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { + evexV = 0 // VSR selector 5th bit. + } + if k != nil { + evexA = byte(reg[k.Reg]) + } + // P2 = [z][L'L][b][V'][aaa] + p2 := (evexZ << 7) | + (evexLL << 5) | + (evexB << 4) | + (evexV << 3) | + (evexA << 0) + + const evexEscapeByte = 0x62 + ab.Put4(evexEscapeByte, p0, p1, p2) + ab.Put1(evex.opcode) +} + +// Emit VEX prefix and opcode byte. +// The three addresses are the r/m, vvvv, and reg fields. +// The reg and rm arguments appear in the same order as the +// arguments to asmand, which typically follows the call to asmvex. +// The final two arguments are the VEX prefix (see encoding above) +// and the opcode byte. +// For details about vex prefix see: +// https://en.wikipedia.org/wiki/VEX_prefix#Technical_description +func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { + ab.vexflag = true + rexR := 0 + if r != nil { + rexR = regrex[r.Reg] & Rxr + } + rexB := 0 + rexX := 0 + if rm != nil { + rexB = regrex[rm.Reg] & Rxb + rexX = regrex[rm.Index] & Rxx + } + vexM := (vex >> 3) & 0x7 + vexWLP := vex & 0x87 + vexV := byte(0) + if v != nil { + vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF + } + vexV ^= 0xF + if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { + // Can use 2-byte encoding. + ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) + } else { + // Must use 3-byte encoding. + ab.Put3(0xc4, + (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, + vexV<<3|vexWLP, + ) + } + ab.Put1(opcode) +} + +// regIndex returns register index that fits in 5 bits. +// +// R : 3 bit | legacy instructions | N/A +// [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr +// EVEX.R : 1 bit | EVEX extension bit | RxrEvex +// +// Examples: +// +// REG_Z30 => 30 +// REG_X15 => 15 +// REG_R9 => 9 +// REG_AX => 0 +func regIndex(r int16) int { + lower3bits := reg[r] + high4bit := regrex[r] & Rxr << 1 + high5bit := regrex[r] & RxrEvex << 0 + return lower3bits | high4bit | high5bit +} + +// avx2gatherValid reports whether p satisfies AVX2 gather constraints. +// Reports errors via ctxt. +func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { + // If any pair of the index, mask, or destination registers + // are the same, illegal instruction trap (#UD) is triggered. + index := regIndex(p.GetFrom3().Index) + mask := regIndex(p.From.Reg) + dest := regIndex(p.To.Reg) + if dest == mask || dest == index || mask == index { + ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) + return false + } + + return true +} + +// avx512gatherValid reports whether p satisfies AVX512 gather constraints. +// Reports errors via ctxt. +func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { + // Illegal instruction trap (#UD) is triggered if the destination vector + // register is the same as index vector in VSIB. + index := regIndex(p.From.Index) + dest := regIndex(p.To.Reg) + if dest == index { + ctxt.Diag("index and destination registers should be distinct: %v", p) + return false + } + + return true +} + +func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { + o := opindex[p.As&obj.AMask] + + if o == nil { + ctxt.Diag("asmins: missing op %v", p) + return + } + + if pre := prefixof(ctxt, &p.From); pre != 0 { + ab.Put1(byte(pre)) + } + if pre := prefixof(ctxt, &p.To); pre != 0 { + ab.Put1(byte(pre)) + } + + // Checks to warn about instruction/arguments combinations that + // will unconditionally trigger illegal instruction trap (#UD). + switch p.As { + case AVGATHERDPD, + AVGATHERQPD, + AVGATHERDPS, + AVGATHERQPS, + AVPGATHERDD, + AVPGATHERQD, + AVPGATHERDQ, + AVPGATHERQQ: + // AVX512 gather requires explicit K mask. + if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { + if !avx512gatherValid(ctxt, p) { + return + } + } else { + if !avx2gatherValid(ctxt, p) { + return + } + } + } + + if p.Ft == 0 { + p.Ft = uint8(oclass(ctxt, p, &p.From)) + } + if p.Tt == 0 { + p.Tt = uint8(oclass(ctxt, p, &p.To)) + } + + ft := int(p.Ft) * Ymax + var f3t int + tt := int(p.Tt) * Ymax + + xo := obj.Bool2int(o.op[0] == 0x0f) + z := 0 + var a *obj.Addr + var l int + var op int + var q *obj.Prog + var r *obj.Reloc + var rel obj.Reloc + var v int64 + + args := make([]int, 0, argListMax) + if ft != Ynone*Ymax { + args = append(args, ft) + } + for i := range p.RestArgs { + args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) + } + if tt != Ynone*Ymax { + args = append(args, tt) + } + + for _, yt := range o.ytab { + // ytab matching is purely args-based, + // but AVX512 suffixes like "Z" or "RU_SAE" will + // add EVEX-only filter that will reject non-EVEX matches. + // + // Consider "VADDPD.BCST 2032(DX), X0, X0". + // Without this rule, operands will lead to VEX-encoded form + // and produce "c5b15813" encoding. + if !yt.match(args) { + // "xo" is always zero for VEX/EVEX encoded insts. + z += int(yt.zoffset) + xo + } else { + if p.Scond != 0 && !evexZcase(yt.zcase) { + // Do not signal error and continue to search + // for matching EVEX-encoded form. + z += int(yt.zoffset) + continue + } + + switch o.prefix { + case Px1: // first option valid only in 32-bit mode + if ctxt.Arch.Family == sys.AMD64 && z == 0 { + z += int(yt.zoffset) + xo + continue + } + case Pq: // 16 bit escape and opcode escape + ab.Put2(Pe, Pm) + + case Pq3: // 16 bit escape and opcode escape + REX.W + ab.rexflag |= Pw + ab.Put2(Pe, Pm) + + case Pq4: // 66 0F 38 + ab.Put3(0x66, 0x0F, 0x38) + + case Pq4w: // 66 0F 38 + REX.W + ab.rexflag |= Pw + ab.Put3(0x66, 0x0F, 0x38) + + case Pq5: // F3 0F 38 + ab.Put3(0xF3, 0x0F, 0x38) + + case Pq5w: // F3 0F 38 + REX.W + ab.rexflag |= Pw + ab.Put3(0xF3, 0x0F, 0x38) + + case Pf2, // xmm opcode escape + Pf3: + ab.Put2(o.prefix, Pm) + + case Pef3: + ab.Put3(Pe, Pf3, Pm) + + case Pfw: // xmm opcode escape + REX.W + ab.rexflag |= Pw + ab.Put2(Pf3, Pm) + + case Pm: // opcode escape + ab.Put1(Pm) + + case Pe: // 16 bit escape + ab.Put1(Pe) + + case Pw: // 64-bit escape + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal 64: %v", p) + } + ab.rexflag |= Pw + + case Pw8: // 64-bit escape if z >= 8 + if z >= 8 { + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal 64: %v", p) + } + ab.rexflag |= Pw + } + + case Pb: // botch + if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { + goto bad + } + // NOTE(rsc): This is probably safe to do always, + // but when enabled it chooses different encodings + // than the old cmd/internal/obj/i386 code did, + // which breaks our "same bits out" checks. + // In particular, CMPB AX, $0 encodes as 80 f8 00 + // in the original obj/i386, and it would encode + // (using a valid, shorter form) as 3c 00 if we enabled + // the call to bytereg here. + if ctxt.Arch.Family == sys.AMD64 { + bytereg(&p.From, &p.Ft) + bytereg(&p.To, &p.Tt) + } + + case P32: // 32 bit but illegal if 64-bit mode + if ctxt.Arch.Family == sys.AMD64 { + ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) + } + + case Py: // 64-bit only, no prefix + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) + } + + case Py1: // 64-bit only if z < 1, no prefix + if z < 1 && ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) + } + + case Py3: // 64-bit only if z < 3, no prefix + if z < 3 && ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) + } + } + + if z >= len(o.op) { + log.Fatalf("asmins bad table %v", p) + } + op = int(o.op[z]) + if op == 0x0f { + ab.Put1(byte(op)) + z++ + op = int(o.op[z]) + } + + switch yt.zcase { + default: + ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) + return + + case Zpseudo: + break + + case Zlit: + ab.PutOpBytesLit(z, &o.op) + + case Zlitr_m: + ab.PutOpBytesLit(z, &o.op) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zlitm_r: + ab.PutOpBytesLit(z, &o.op) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zlit_m_r: + ab.PutOpBytesLit(z, &o.op) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + + case Zmb_r: + bytereg(&p.From, &p.Ft) + fallthrough + + case Zm_r: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Z_m_r: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + + case Zm2_r: + ab.Put2(byte(op), o.op[z+1]) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zm_r_xm: + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zm_r_xm_nr: + ab.rexflag = 0 + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zm_r_i_xm: + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) + ab.Put1(byte(p.To.Offset)) + + case Zibm_r, Zibr_m: + ab.PutOpBytesLit(z, &o.op) + if yt.zcase == Zibr_m { + ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) + } else { + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + } + switch { + default: + ab.Put1(byte(p.From.Offset)) + case yt.args[0] == Yi32 && o.prefix == Pe: + ab.PutInt16(int16(p.From.Offset)) + case yt.args[0] == Yi32: + ab.PutInt32(int32(p.From.Offset)) + } + + case Zaut_r: + ab.Put1(0x8d) // leal + if p.From.Type != obj.TYPE_ADDR { + ctxt.Diag("asmins: Zaut sb type ADDR") + } + p.From.Type = obj.TYPE_MEM + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + p.From.Type = obj.TYPE_ADDR + + case Zm_o: + ab.Put1(byte(op)) + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) + + case Zr_m: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zvex: + ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) + + case Zvex_rm_v_r: + ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zvex_rm_v_ro: + ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) + + case Zvex_i_rm_vo: + ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) + ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) + ab.Put1(byte(p.From.Offset)) + + case Zvex_i_r_v: + ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) + regnum := byte(0x7) + if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { + regnum &= byte(p.GetFrom3().Reg - REG_X0) + } else { + regnum &= byte(p.GetFrom3().Reg - REG_Y0) + } + ab.Put1(o.op[z+2] | regnum) + ab.Put1(byte(p.From.Offset)) + + case Zvex_i_rm_v_r: + imm, from, from3, to := unpackOps4(p) + ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(imm.Offset)) + + case Zvex_i_rm_r: + ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + ab.Put1(byte(p.From.Offset)) + + case Zvex_v_rm_r: + ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + + case Zvex_r_v_rm: + ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zvex_rm_r_vo: + ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) + + case Zvex_i_r_rm: + ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) + ab.Put1(byte(p.From.Offset)) + + case Zvex_hr_rm_v_r: + hr, from, from3, to := unpackOps4(p) + ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(regIndex(hr.Reg) << 4)) + + case Zevex_k_rmo: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) + + case Zevex_i_rm_vo: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) + ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) + ab.Put1(byte(p.From.Offset)) + + case Zevex_i_rm_k_vo: + imm, from, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, to, nil, kmask) + ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) + ab.Put1(byte(imm.Offset)) + + case Zevex_i_r_rm: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) + ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) + ab.Put1(byte(p.From.Offset)) + + case Zevex_i_r_k_rm: + imm, from, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, to, nil, from, kmask) + ab.asmand(ctxt, cursym, p, to, from) + ab.Put1(byte(imm.Offset)) + + case Zevex_i_rm_r: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + ab.Put1(byte(p.From.Offset)) + + case Zevex_i_rm_k_r: + imm, from, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, nil, to, kmask) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(imm.Offset)) + + case Zevex_i_rm_v_r: + imm, from, from3, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, from3, to, nil) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(imm.Offset)) + + case Zevex_i_rm_v_k_r: + imm, from, from3, kmask, to := unpackOps5(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, from3, to, kmask) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(imm.Offset)) + + case Zevex_r_v_rm: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zevex_rm_v_r: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zevex_rm_k_r: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zevex_r_k_rm: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zevex_rm_v_k_r: + from, from3, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, from3, to, kmask) + ab.asmand(ctxt, cursym, p, from, to) + + case Zevex_r_v_k_rm: + from, from3, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, to, from3, from, kmask) + ab.asmand(ctxt, cursym, p, to, from) + + case Zr_m_xm: + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zr_m_xm_nr: + ab.rexflag = 0 + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zo_m: + ab.Put1(byte(op)) + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) + + case Zcallindreg: + r = obj.Addrel(cursym) + r.Off = int32(p.Pc) + r.Type = objabi.R_CALLIND + r.Siz = 0 + fallthrough + + case Zo_m64: + ab.Put1(byte(op)) + ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) + + case Zm_ibo: + ab.Put1(byte(op)) + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) + ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) + + case Zibo_m: + ab.Put1(byte(op)) + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) + ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) + + case Zibo_m_xm: + z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) + ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) + + case Z_ib, Zib_: + if yt.zcase == Zib_ { + a = &p.From + } else { + a = &p.To + } + ab.Put1(byte(op)) + if p.As == AXABORT { + ab.Put1(o.op[z+1]) + } + ab.Put1(byte(vaddr(ctxt, p, a, nil))) + + case Zib_rp: + ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) + ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) + + case Zil_rp: + ab.rexflag |= regrex[p.To.Reg] & Rxb + ab.Put1(byte(op + reg[p.To.Reg])) + if o.prefix == Pe { + v = vaddr(ctxt, p, &p.From, nil) + ab.PutInt16(int16(v)) + } else { + ab.relput4(ctxt, cursym, p, &p.From) + } + + case Zo_iw: + ab.Put1(byte(op)) + if p.From.Type != obj.TYPE_NONE { + v = vaddr(ctxt, p, &p.From, nil) + ab.PutInt16(int16(v)) + } + + case Ziq_rp: + v = vaddr(ctxt, p, &p.From, &rel) + l = int(v >> 32) + if l == 0 && rel.Siz != 8 { + ab.rexflag &^= (0x40 | Rxw) + + ab.rexflag |= regrex[p.To.Reg] & Rxb + ab.Put1(byte(0xb8 + reg[p.To.Reg])) + if rel.Type != 0 { + r = obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.PutInt32(int32(v)) + } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend + ab.Put1(0xc7) + ab.asmando(ctxt, cursym, p, &p.To, 0) + + ab.PutInt32(int32(v)) // need all 8 + } else { + ab.rexflag |= regrex[p.To.Reg] & Rxb + ab.Put1(byte(op + reg[p.To.Reg])) + if rel.Type != 0 { + r = obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.PutInt64(v) + } + + case Zib_rr: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, &p.To, &p.To) + ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) + + case Z_il, Zil_: + if yt.zcase == Zil_ { + a = &p.From + } else { + a = &p.To + } + ab.Put1(byte(op)) + if o.prefix == Pe { + v = vaddr(ctxt, p, a, nil) + ab.PutInt16(int16(v)) + } else { + ab.relput4(ctxt, cursym, p, a) + } + + case Zm_ilo, Zilo_m: + ab.Put1(byte(op)) + if yt.zcase == Zilo_m { + a = &p.From + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) + } else { + a = &p.To + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) + } + + if o.prefix == Pe { + v = vaddr(ctxt, p, a, nil) + ab.PutInt16(int16(v)) + } else { + ab.relput4(ctxt, cursym, p, a) + } + + case Zil_rr: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, &p.To, &p.To) + if o.prefix == Pe { + v = vaddr(ctxt, p, &p.From, nil) + ab.PutInt16(int16(v)) + } else { + ab.relput4(ctxt, cursym, p, &p.From) + } + + case Z_rp: + ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) + ab.Put1(byte(op + reg[p.To.Reg])) + + case Zrp_: + ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) + ab.Put1(byte(op + reg[p.From.Reg])) + + case Zcallcon, Zjmpcon: + if yt.zcase == Zcallcon { + ab.Put1(byte(op)) + } else { + ab.Put1(o.op[z+1]) + } + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Type = objabi.R_PCREL + r.Siz = 4 + r.Add = p.To.Offset + ab.PutInt32(0) + + case Zcallind: + ab.Put2(byte(op), o.op[z+1]) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + if ctxt.Arch.Family == sys.AMD64 { + r.Type = objabi.R_PCREL + } else { + r.Type = objabi.R_ADDR + } + r.Siz = 4 + r.Add = p.To.Offset + r.Sym = p.To.Sym + ab.PutInt32(0) + + case Zcall, Zcallduff: + if p.To.Sym == nil { + ctxt.Diag("call without target") + ctxt.DiagFlush() + log.Fatalf("bad code") + } + + if yt.zcase == Zcallduff && ctxt.Flag_dynlink { + ctxt.Diag("directly calling duff when dynamically linking Go") + } + + if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { + // Maintain BP around call, since duffcopy/duffzero can't do it + // (the call jumps into the middle of the function). + // This makes it possible to see call sites for duffcopy/duffzero in + // BP-based profiling tools like Linux perf (which is the + // whole point of maintaining frame pointers in Go). + // MOVQ BP, -16(SP) + // LEAQ -16(SP), BP + ab.Put(bpduff1) + } + ab.Put1(byte(op)) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Sym = p.To.Sym + r.Add = p.To.Offset + r.Type = objabi.R_CALL + r.Siz = 4 + ab.PutInt32(0) + + if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { + // Pop BP pushed above. + // MOVQ 0(BP), BP + ab.Put(bpduff2) + } + + // TODO: jump across functions needs reloc + case Zbr, Zjmp, Zloop: + if p.As == AXBEGIN { + ab.Put1(byte(op)) + } + if p.To.Sym != nil { + if yt.zcase != Zjmp { + ctxt.Diag("branch to ATEXT") + ctxt.DiagFlush() + log.Fatalf("bad code") + } + + ab.Put1(o.op[z+1]) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Sym = p.To.Sym + // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that + // it can point to a trampoline instead of the destination itself. + r.Type = objabi.R_CALL + r.Siz = 4 + ab.PutInt32(0) + break + } + + // Assumes q is in this function. + // TODO: Check in input, preserve in brchain. + + // Fill in backward jump now. + q = p.To.Target() + + if q == nil { + ctxt.Diag("jmp/branch/loop without target") + ctxt.DiagFlush() + log.Fatalf("bad code") + } + + if p.Back&branchBackwards != 0 { + v = q.Pc - (p.Pc + 2) + if v >= -128 && p.As != AXBEGIN { + if p.As == AJCXZL { + ab.Put1(0x67) + } + ab.Put2(byte(op), byte(v)) + } else if yt.zcase == Zloop { + ctxt.Diag("loop too far: %v", p) + } else { + v -= 5 - 2 + if p.As == AXBEGIN { + v-- + } + if yt.zcase == Zbr { + ab.Put1(0x0f) + v-- + } + + ab.Put1(o.op[z+1]) + ab.PutInt32(int32(v)) + } + + break + } + + // Annotate target; will fill in later. + p.Forwd = q.Rel + + q.Rel = p + if p.Back&branchShort != 0 && p.As != AXBEGIN { + if p.As == AJCXZL { + ab.Put1(0x67) + } + ab.Put2(byte(op), 0) + } else if yt.zcase == Zloop { + ctxt.Diag("loop too far: %v", p) + } else { + if yt.zcase == Zbr { + ab.Put1(0x0f) + } + ab.Put1(o.op[z+1]) + ab.PutInt32(0) + } + + case Zbyte: + v = vaddr(ctxt, p, &p.From, &rel) + if rel.Siz != 0 { + rel.Siz = uint8(op) + r = obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.Put1(byte(v)) + if op > 1 { + ab.Put1(byte(v >> 8)) + if op > 2 { + ab.PutInt16(int16(v >> 16)) + if op > 4 { + ab.PutInt32(int32(v >> 32)) + } + } + } + } + + return + } + } + f3t = Ynone * Ymax + if p.GetFrom3() != nil { + f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax + } + for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { + var pp obj.Prog + var t []byte + if p.As == mo[0].as { + if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { + t = mo[0].op[:] + switch mo[0].code { + default: + ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) + + case movLit: + for z = 0; t[z] != 0; z++ { + ab.Put1(t[z]) + } + + case movRegMem: + ab.Put1(t[0]) + ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) + + case movMemReg: + ab.Put1(t[0]) + ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) + + case movRegMem2op: // r,m - 2op + ab.Put2(t[0], t[1]) + ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) + ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) + + case movMemReg2op: + ab.Put2(t[0], t[1]) + ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) + ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) + + case movFullPtr: + if t[0] != 0 { + ab.Put1(t[0]) + } + switch p.To.Index { + default: + goto bad + + case REG_DS: + ab.Put1(0xc5) + + case REG_SS: + ab.Put2(0x0f, 0xb2) + + case REG_ES: + ab.Put1(0xc4) + + case REG_FS: + ab.Put2(0x0f, 0xb4) + + case REG_GS: + ab.Put2(0x0f, 0xb5) + } + + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case movDoubleShift: + if t[0] == Pw { + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal 64: %v", p) + } + ab.rexflag |= Pw + t = t[1:] + } else if t[0] == Pe { + ab.Put1(Pe) + t = t[1:] + } + + switch p.From.Type { + default: + goto bad + + case obj.TYPE_CONST: + ab.Put2(0x0f, t[0]) + ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) + ab.Put1(byte(p.From.Offset)) + + case obj.TYPE_REG: + switch p.From.Reg { + default: + goto bad + + case REG_CL, REG_CX: + ab.Put2(0x0f, t[1]) + ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) + } + } + + // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, + // where you load the TLS base register into a register and then index off that + // register to access the actual TLS variables. Systems that allow direct TLS access + // are handled in prefixof above and should not be listed here. + case movTLSReg: + if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { + ctxt.Diag("invalid load of TLS: %v", p) + } + + if ctxt.Arch.Family == sys.I386 { + // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, + // where you load the TLS base register into a register and then index off that + // register to access the actual TLS variables. Systems that allow direct TLS access + // are handled in prefixof above and should not be listed here. + switch ctxt.Headtype { + default: + log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) + + case objabi.Hlinux, objabi.Hfreebsd: + if ctxt.Flag_shared { + // Note that this is not generating the same insns as the other cases. + // MOV TLS, dst + // becomes + // call __x86.get_pc_thunk.dst + // movl (gotpc + g@gotntpoff)(dst), dst + // which is encoded as + // call __x86.get_pc_thunk.dst + // movq 0(dst), dst + // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access + // is g, which we can't check here, but will when we assemble the second + // instruction. + dst := p.To.Reg + ab.Put1(0xe8) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Type = objabi.R_CALL + r.Siz = 4 + r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) + ab.PutInt32(0) + + ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Type = objabi.R_TLS_IE + r.Siz = 4 + r.Add = 2 + ab.PutInt32(0) + } else { + // ELF TLS base is 0(GS). + pp.From = p.From + + pp.From.Type = obj.TYPE_MEM + pp.From.Reg = REG_GS + pp.From.Offset = 0 + pp.From.Index = REG_NONE + pp.From.Scale = 0 + ab.Put2(0x65, // GS + 0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + } + case objabi.Hplan9: + pp.From = obj.Addr{} + pp.From.Type = obj.TYPE_MEM + pp.From.Name = obj.NAME_EXTERN + pp.From.Sym = plan9privates + pp.From.Offset = 0 + pp.From.Index = REG_NONE + ab.Put1(0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + + case objabi.Hwindows: + // Windows TLS base is always 0x14(FS). + pp.From = p.From + + pp.From.Type = obj.TYPE_MEM + pp.From.Reg = REG_FS + pp.From.Offset = 0x14 + pp.From.Index = REG_NONE + pp.From.Scale = 0 + ab.Put2(0x64, // FS + 0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + } + break + } + + switch ctxt.Headtype { + default: + log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) + + case objabi.Hlinux, objabi.Hfreebsd: + if !ctxt.Flag_shared { + log.Fatalf("unknown TLS base location for linux/freebsd without -shared") + } + // Note that this is not generating the same insn as the other cases. + // MOV TLS, R_to + // becomes + // movq g@gottpoff(%rip), R_to + // which is encoded as + // movq 0(%rip), R_to + // and a R_TLS_IE reloc. This all assumes the only tls variable we access + // is g, which we can't check here, but will when we assemble the second + // instruction. + ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) + + ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Type = objabi.R_TLS_IE + r.Siz = 4 + r.Add = -4 + ab.PutInt32(0) + + case objabi.Hplan9: + pp.From = obj.Addr{} + pp.From.Type = obj.TYPE_MEM + pp.From.Name = obj.NAME_EXTERN + pp.From.Sym = plan9privates + pp.From.Offset = 0 + pp.From.Index = REG_NONE + ab.rexflag |= Pw + ab.Put1(0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + + case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. + // TLS base is 0(FS). + pp.From = p.From + + pp.From.Type = obj.TYPE_MEM + pp.From.Name = obj.NAME_NONE + pp.From.Reg = REG_NONE + pp.From.Offset = 0 + pp.From.Index = REG_NONE + pp.From.Scale = 0 + ab.rexflag |= Pw + ab.Put2(0x64, // FS + 0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + } + } + return + } + } + } + goto bad + +bad: + if ctxt.Arch.Family != sys.AMD64 { + // here, the assembly has failed. + // if it's a byte instruction that has + // unaddressable registers, try to + // exchange registers and reissue the + // instruction with the operands renamed. + pp := *p + + unbytereg(&pp.From, &pp.Ft) + unbytereg(&pp.To, &pp.Tt) + + z := int(p.From.Reg) + if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { + // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. + // For now, different to keep bit-for-bit compatibility. + if ctxt.Arch.Family == sys.I386 { + breg := byteswapreg(ctxt, &p.To) + if breg != REG_AX { + ab.Put1(0x87) // xchg lhs,bx + ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) + subreg(&pp, z, breg) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(0x87) // xchg lhs,bx + ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) + } else { + ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax + subreg(&pp, z, REG_AX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax + } + return + } + + if isax(&p.To) || p.To.Type == obj.TYPE_NONE { + // We certainly don't want to exchange + // with AX if the op is MUL or DIV. + ab.Put1(0x87) // xchg lhs,bx + ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) + subreg(&pp, z, REG_BX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(0x87) // xchg lhs,bx + ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) + } else { + ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax + subreg(&pp, z, REG_AX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax + } + return + } + + z = int(p.To.Reg) + if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { + // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. + // For now, different to keep bit-for-bit compatibility. + if ctxt.Arch.Family == sys.I386 { + breg := byteswapreg(ctxt, &p.From) + if breg != REG_AX { + ab.Put1(0x87) //xchg rhs,bx + ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) + subreg(&pp, z, breg) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(0x87) // xchg rhs,bx + ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) + } else { + ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax + subreg(&pp, z, REG_AX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax + } + return + } + + if isax(&p.From) { + ab.Put1(0x87) // xchg rhs,bx + ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) + subreg(&pp, z, REG_BX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(0x87) // xchg rhs,bx + ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) + } else { + ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax + subreg(&pp, z, REG_AX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax + } + return + } + } + + ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p) +} + +// byteswapreg returns a byte-addressable register (AX, BX, CX, DX) +// which is not referenced in a. +// If a is empty, it returns BX to account for MULB-like instructions +// that might use DX and AX. +func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { + cana, canb, canc, cand := true, true, true, true + if a.Type == obj.TYPE_NONE { + cana, cand = false, false + } + + if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { + switch a.Reg { + case REG_NONE: + cana, cand = false, false + case REG_AX, REG_AL, REG_AH: + cana = false + case REG_BX, REG_BL, REG_BH: + canb = false + case REG_CX, REG_CL, REG_CH: + canc = false + case REG_DX, REG_DL, REG_DH: + cand = false + } + } + + if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { + switch a.Index { + case REG_AX: + cana = false + case REG_BX: + canb = false + case REG_CX: + canc = false + case REG_DX: + cand = false + } + } + + switch { + case cana: + return REG_AX + case canb: + return REG_BX + case canc: + return REG_CX + case cand: + return REG_DX + default: + ctxt.Diag("impossible byte register") + ctxt.DiagFlush() + log.Fatalf("bad code") + return 0 + } +} + +func isbadbyte(a *obj.Addr) bool { + return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) +} + +func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { + ab.Reset() + + ab.rexflag = 0 + ab.vexflag = false + ab.evexflag = false + mark := ab.Len() + ab.doasm(ctxt, cursym, p) + if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { + // as befits the whole approach of the architecture, + // the rex prefix must appear before the first opcode byte + // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but + // before the 0f opcode escape!), or it might be ignored. + // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) + } + n := ab.Len() + var np int + for np = mark; np < n; np++ { + c := ab.At(np) + if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { + break + } + } + ab.Insert(np, byte(0x40|ab.rexflag)) + } + + n := ab.Len() + for i := len(cursym.R) - 1; i >= 0; i-- { + r := &cursym.R[i] + if int64(r.Off) < p.Pc { + break + } + if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { + r.Off++ + } + if r.Type == objabi.R_PCREL { + if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { + // PC-relative addressing is relative to the end of the instruction, + // but the relocations applied by the linker are relative to the end + // of the relocation. Because immediate instruction + // arguments can follow the PC-relative memory reference in the + // instruction encoding, the two may not coincide. In this case, + // adjust addend so that linker can keep relocating relative to the + // end of the relocation. + r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) + } else if ctxt.Arch.Family == sys.I386 { + // On 386 PC-relative addressing (for non-call/jmp instructions) + // assumes that the previous instruction loaded the PC of the end + // of that instruction into CX, so the adjustment is relative to + // that. + r.Add += int64(r.Off) - p.Pc + int64(r.Siz) + } + } + if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { + // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. + r.Add += int64(r.Off) - p.Pc + int64(r.Siz) + } + + } +} + +// unpackOps4 extracts 4 operands from p. +func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To +} + +// unpackOps5 extracts 5 operands from p. +func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To +} diff --git a/src/cmd/internal/obj/x86/asm_test.go b/src/cmd/internal/obj/x86/asm_test.go new file mode 100644 index 0000000..36c8fce --- /dev/null +++ b/src/cmd/internal/obj/x86/asm_test.go @@ -0,0 +1,291 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "testing" +) + +type oclassTest struct { + arg *obj.Addr + want int // Expected oclass return value for a given arg +} + +// Filled inside init, because it's easier to do with helper functions. +var ( + oclassTestsAMD64 []*oclassTest + oclassTests386 []*oclassTest +) + +func init() { + // Required for tests that access any of + // opindex/ycover/reg/regrex global tables. + var ctxt obj.Link + instinit(&ctxt) + + regAddr := func(reg int16) *obj.Addr { + return &obj.Addr{Type: obj.TYPE_REG, Reg: reg} + } + immAddr := func(v int64) *obj.Addr { + return &obj.Addr{Type: obj.TYPE_CONST, Offset: v} + } + regListAddr := func(regFrom, regTo int16) *obj.Addr { + return &obj.Addr{Type: obj.TYPE_REGLIST, Offset: EncodeRegisterRange(regFrom, regTo)} + } + memAddr := func(base, index int16) *obj.Addr { + return &obj.Addr{Type: obj.TYPE_MEM, Reg: base, Index: index} + } + + // TODO(quasilyte): oclass doesn't return Yxxx for X/Y regs with + // ID higher than 7. We don't encode such instructions, but this + // behavior seems inconsistent. It should probably either + // never check for arch or do it in all cases. + + oclassTestsCommon := []*oclassTest{ + {&obj.Addr{Type: obj.TYPE_NONE}, Ynone}, + {&obj.Addr{Type: obj.TYPE_BRANCH}, Ybr}, + {&obj.Addr{Type: obj.TYPE_TEXTSIZE}, Ytextsize}, + + {&obj.Addr{Type: obj.TYPE_INDIR, Name: obj.NAME_EXTERN}, Yindir}, + {&obj.Addr{Type: obj.TYPE_INDIR, Name: obj.NAME_GOTREF}, Yindir}, + + {&obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_AUTO}, Yiauto}, + {&obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_PARAM}, Yiauto}, + {&obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN}, Yiauto}, + {&obj.Addr{Type: obj.TYPE_ADDR, Sym: &obj.LSym{Name: "runtime.duff"}}, Yi32}, + {&obj.Addr{Type: obj.TYPE_ADDR, Offset: 4}, Yu7}, + {&obj.Addr{Type: obj.TYPE_ADDR, Offset: 255}, Yu8}, + + {immAddr(0), Yi0}, + {immAddr(1), Yi1}, + {immAddr(2), Yu2}, + {immAddr(3), Yu2}, + {immAddr(4), Yu7}, + {immAddr(86), Yu7}, + {immAddr(127), Yu7}, + {immAddr(128), Yu8}, + {immAddr(200), Yu8}, + {immAddr(255), Yu8}, + {immAddr(-1), Yi8}, + {immAddr(-100), Yi8}, + {immAddr(-128), Yi8}, + + {regAddr(REG_AL), Yal}, + {regAddr(REG_AX), Yax}, + {regAddr(REG_DL), Yrb}, + {regAddr(REG_DH), Yrb}, + {regAddr(REG_BH), Yrb}, + {regAddr(REG_CL), Ycl}, + {regAddr(REG_CX), Ycx}, + {regAddr(REG_DX), Yrx}, + {regAddr(REG_BX), Yrx}, + {regAddr(REG_F0), Yf0}, + {regAddr(REG_F3), Yrf}, + {regAddr(REG_F7), Yrf}, + {regAddr(REG_M0), Ymr}, + {regAddr(REG_M3), Ymr}, + {regAddr(REG_M7), Ymr}, + {regAddr(REG_X0), Yxr0}, + {regAddr(REG_X6), Yxr}, + {regAddr(REG_X13), Yxr}, + {regAddr(REG_X20), YxrEvex}, + {regAddr(REG_X31), YxrEvex}, + {regAddr(REG_Y0), Yyr}, + {regAddr(REG_Y6), Yyr}, + {regAddr(REG_Y13), Yyr}, + {regAddr(REG_Y20), YyrEvex}, + {regAddr(REG_Y31), YyrEvex}, + {regAddr(REG_Z0), Yzr}, + {regAddr(REG_Z6), Yzr}, + {regAddr(REG_K0), Yk0}, + {regAddr(REG_K5), Yknot0}, + {regAddr(REG_K7), Yknot0}, + {regAddr(REG_CS), Ycs}, + {regAddr(REG_SS), Yss}, + {regAddr(REG_DS), Yds}, + {regAddr(REG_ES), Yes}, + {regAddr(REG_FS), Yfs}, + {regAddr(REG_GS), Ygs}, + {regAddr(REG_TLS), Ytls}, + {regAddr(REG_GDTR), Ygdtr}, + {regAddr(REG_IDTR), Yidtr}, + {regAddr(REG_LDTR), Yldtr}, + {regAddr(REG_MSW), Ymsw}, + {regAddr(REG_TASK), Ytask}, + {regAddr(REG_CR0), Ycr0}, + {regAddr(REG_CR5), Ycr5}, + {regAddr(REG_CR8), Ycr8}, + {regAddr(REG_DR0), Ydr0}, + {regAddr(REG_DR5), Ydr5}, + {regAddr(REG_DR7), Ydr7}, + {regAddr(REG_TR0), Ytr0}, + {regAddr(REG_TR5), Ytr5}, + {regAddr(REG_TR7), Ytr7}, + + {regListAddr(REG_X0, REG_X3), YxrEvexMulti4}, + {regListAddr(REG_X4, REG_X7), YxrEvexMulti4}, + {regListAddr(REG_Y0, REG_Y3), YyrEvexMulti4}, + {regListAddr(REG_Y4, REG_Y7), YyrEvexMulti4}, + {regListAddr(REG_Z0, REG_Z3), YzrMulti4}, + {regListAddr(REG_Z4, REG_Z7), YzrMulti4}, + + {memAddr(REG_AL, REG_NONE), Ym}, + {memAddr(REG_AL, REG_SI), Ym}, + {memAddr(REG_SI, REG_CX), Ym}, + {memAddr(REG_DI, REG_X0), Yxvm}, + {memAddr(REG_DI, REG_X7), Yxvm}, + {memAddr(REG_DI, REG_Y0), Yyvm}, + {memAddr(REG_DI, REG_Y7), Yyvm}, + {memAddr(REG_DI, REG_Z0), Yzvm}, + {memAddr(REG_DI, REG_Z7), Yzvm}, + } + + oclassTestsAMD64 = []*oclassTest{ + {immAddr(-200), Ys32}, + {immAddr(500), Ys32}, + {immAddr(0x7FFFFFFF), Ys32}, + {immAddr(0x7FFFFFFF + 1), Yi32}, + {immAddr(0xFFFFFFFF), Yi32}, + {immAddr(0xFFFFFFFF + 1), Yi64}, + + {regAddr(REG_BPB), Yrb}, + {regAddr(REG_SIB), Yrb}, + {regAddr(REG_DIB), Yrb}, + {regAddr(REG_R8B), Yrb}, + {regAddr(REG_R12B), Yrb}, + {regAddr(REG_R8), Yrl}, + {regAddr(REG_R13), Yrl}, + {regAddr(REG_R15), Yrl}, + {regAddr(REG_SP), Yrl}, + {regAddr(REG_SI), Yrl}, + {regAddr(REG_DI), Yrl}, + {regAddr(REG_Z13), Yzr}, + {regAddr(REG_Z20), Yzr}, + {regAddr(REG_Z31), Yzr}, + + {regListAddr(REG_X10, REG_X13), YxrEvexMulti4}, + {regListAddr(REG_X24, REG_X27), YxrEvexMulti4}, + {regListAddr(REG_Y10, REG_Y13), YyrEvexMulti4}, + {regListAddr(REG_Y24, REG_Y27), YyrEvexMulti4}, + {regListAddr(REG_Z10, REG_Z13), YzrMulti4}, + {regListAddr(REG_Z24, REG_Z27), YzrMulti4}, + + {memAddr(REG_DI, REG_X20), YxvmEvex}, + {memAddr(REG_DI, REG_X27), YxvmEvex}, + {memAddr(REG_DI, REG_Y20), YyvmEvex}, + {memAddr(REG_DI, REG_Y27), YyvmEvex}, + {memAddr(REG_DI, REG_Z20), Yzvm}, + {memAddr(REG_DI, REG_Z27), Yzvm}, + } + + oclassTests386 = []*oclassTest{ + {&obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN, Sym: &obj.LSym{}}, Yi32}, + + {immAddr(-200), Yi32}, + + {regAddr(REG_SP), Yrl32}, + {regAddr(REG_SI), Yrl32}, + {regAddr(REG_DI), Yrl32}, + } + + // Add tests that are arch-independent for all sets. + oclassTestsAMD64 = append(oclassTestsAMD64, oclassTestsCommon...) + oclassTests386 = append(oclassTests386, oclassTestsCommon...) +} + +func TestOclass(t *testing.T) { + runTest := func(t *testing.T, ctxt *obj.Link, tests []*oclassTest) { + var p obj.Prog + for _, test := range tests { + have := oclass(ctxt, &p, test.arg) + if have != test.want { + t.Errorf("oclass(%q):\nhave: %d\nwant: %d", + obj.Dconv(&p, test.arg), have, test.want) + } + } + } + + // TODO(quasilyte): test edge cases for Hsolaris, etc? + + t.Run("linux/AMD64", func(t *testing.T) { + ctxtAMD64 := obj.Linknew(&Linkamd64) + ctxtAMD64.Headtype = objabi.Hlinux // See #32028 + runTest(t, ctxtAMD64, oclassTestsAMD64) + }) + + t.Run("linux/386", func(t *testing.T) { + ctxt386 := obj.Linknew(&Link386) + ctxt386.Headtype = objabi.Hlinux // See #32028 + runTest(t, ctxt386, oclassTests386) + }) +} + +func TestRegisterListEncDec(t *testing.T) { + tests := []struct { + printed string + reg0 int16 + reg1 int16 + }{ + {"[R10-R13]", REG_R10, REG_R13}, + {"[X0-AX]", REG_X0, REG_AX}, + + {"[X0-X3]", REG_X0, REG_X3}, + {"[X21-X24]", REG_X21, REG_X24}, + + {"[Y0-Y3]", REG_Y0, REG_Y3}, + {"[Y21-Y24]", REG_Y21, REG_Y24}, + + {"[Z0-Z3]", REG_Z0, REG_Z3}, + {"[Z21-Z24]", REG_Z21, REG_Z24}, + } + + for _, test := range tests { + enc := EncodeRegisterRange(test.reg0, test.reg1) + reg0, reg1 := decodeRegisterRange(enc) + + if int16(reg0) != test.reg0 { + t.Errorf("%s reg0 mismatch: have %d, want %d", + test.printed, reg0, test.reg0) + } + if int16(reg1) != test.reg1 { + t.Errorf("%s reg1 mismatch: have %d, want %d", + test.printed, reg1, test.reg1) + } + wantPrinted := test.printed + if rlconv(enc) != wantPrinted { + t.Errorf("%s string mismatch: have %s, want %s", + test.printed, rlconv(enc), wantPrinted) + } + } +} + +func TestRegIndex(t *testing.T) { + tests := []struct { + regFrom int + regTo int + }{ + {REG_AL, REG_R15B}, + {REG_AX, REG_R15}, + {REG_M0, REG_M7}, + {REG_K0, REG_K7}, + {REG_X0, REG_X31}, + {REG_Y0, REG_Y31}, + {REG_Z0, REG_Z31}, + } + + for _, test := range tests { + for index, reg := 0, test.regFrom; reg <= test.regTo; index, reg = index+1, reg+1 { + have := regIndex(int16(reg)) + want := index + if have != want { + regName := rconv(int(reg)) + t.Errorf("regIndex(%s):\nhave: %d\nwant: %d", + regName, have, want) + } + } + } +} diff --git a/src/cmd/internal/obj/x86/avx_optabs.go b/src/cmd/internal/obj/x86/avx_optabs.go new file mode 100644 index 0000000..b8ff469 --- /dev/null +++ b/src/cmd/internal/obj/x86/avx_optabs.go @@ -0,0 +1,4628 @@ +// Code generated by x86avxgen. DO NOT EDIT. + +package x86 + +// VEX instructions that come in two forms: +// VTHING xmm2/m128, xmmV, xmm1 +// VTHING ymm2/m256, ymmV, ymm1 +// +// The opcode array in the corresponding Optab entry +// should contain the (VEX prefixes, opcode byte) pair +// for each of the two forms. +// For example, the entries for VPXOR are: +// +// VPXOR xmm2/m128, xmmV, xmm1 +// VEX.NDS.128.66.0F.WIG EF /r +// +// VPXOR ymm2/m256, ymmV, ymm1 +// VEX.NDS.256.66.0F.WIG EF /r +// +// Produce this optab entry: +// +// {AVPXOR, yvex_xy3, Pavx, opBytes{vex128|vex66|vex0F|vexWIG, 0xEF, vex256|vex66|vex0F|vexWIG, 0xEF}} +// +// VEX requires at least 2 bytes inside opBytes: +// - VEX prefixes (vex-prefixed constants) +// - Opcode byte +// +// EVEX instructions extend VEX form variety: +// VTHING zmm2/m512, zmmV, zmm1 -- implicit K0 (merging) +// VTHING zmm2/m512, zmmV, K, zmm1 -- explicit K mask (can't use K0) +// +// EVEX requires at least 3 bytes inside opBytes: +// - EVEX prefixes (evex-prefixed constants); similar to VEX +// - Displacement multiplier info (scale / broadcast scale) +// - Opcode byte; similar to VEX +// +// Both VEX and EVEX instructions may have opdigit (opcode extension) byte +// which follows the primary opcode byte. +// Because it can only have value of 0-7, it is written in octal notation. +// +// x86.csv can be very useful for figuring out proper [E]VEX parts. + +var _yandnl = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yml, Yrl, Yrl}}, +} + +var _ybextrl = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yrl, Yml, Yrl}}, +} + +var _yblsil = []ytab{ + {zcase: Zvex_rm_r_vo, zoffset: 3, args: argList{Yml, Yrl}}, +} + +var _ykaddb = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yk, Yk, Yk}}, +} + +var _ykmovb = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yk, Ym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yk, Yrl}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ykm, Yk}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yrl, Yk}}, +} + +var _yknotb = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yk, Yk}}, +} + +var _ykshiftlb = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yu8, Yk, Yk}}, +} + +var _yrorxl = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yml, Yrl}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yml, Yrl}}, +} + +var _yv4fmaddps = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, YzrMulti4, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Ym, YzrMulti4, Yknot0, Yzr}}, +} + +var _yv4fmaddss = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, YxrEvexMulti4, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Ym, YxrEvexMulti4, Yknot0, YxrEvex}}, +} + +var _yvaddpd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, +} + +var _yvaddsd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, +} + +var _yvaddsubpd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, +} + +var _yvaesdec = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yzm, Yzr, Yzr}}, +} + +var _yvaesimc = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, +} + +var _yvaeskeygenassist = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxm, Yxr}}, +} + +var _yvalignd = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvandnpd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvblendmpd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvblendpd = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, +} + +var _yvblendvpd = []ytab{ + {zcase: Zvex_hr_rm_v_r, zoffset: 2, args: argList{Yxr, Yxm, Yxr, Yxr}}, + {zcase: Zvex_hr_rm_v_r, zoffset: 2, args: argList{Yyr, Yym, Yyr, Yyr}}, +} + +var _yvbroadcastf128 = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yyr}}, +} + +var _yvbroadcastf32x2 = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvbroadcastf32x4 = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Ym, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Ym, Yknot0, Yzr}}, +} + +var _yvbroadcastf32x8 = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Ym, Yknot0, Yzr}}, +} + +var _yvbroadcasti32x2 = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvbroadcastsd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvbroadcastss = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvcmppd = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yk}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, Yk}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, Yk}}, +} + +var _yvcmpsd = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, Yk}}, +} + +var _yvcomisd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, YxrEvex}}, +} + +var _yvcompresspd = []ytab{ + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YymEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YymEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, Yzm}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, Yzm}}, +} + +var _yvcvtdq2pd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, Yzr}}, +} + +var _yvcvtdq2ps = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, +} + +var _yvcvtpd2dq = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, YyrEvex}}, +} + +var _yvcvtpd2dqx = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, +} + +var _yvcvtpd2dqy = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YxrEvex}}, +} + +var _yvcvtpd2qq = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, +} + +var _yvcvtpd2udqx = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, +} + +var _yvcvtpd2udqy = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YxrEvex}}, +} + +var _yvcvtph2ps = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, +} + +var _yvcvtps2ph = []ytab{ + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yxr, Yxm}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yxr, Yxm}}, + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yyr, Yxm}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yyr, Yxm}}, + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, Yzr, YymEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, Yzr, Yknot0, YymEvex}}, + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, YxrEvex, YxmEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, YyrEvex, YxmEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, YyrEvex, Yknot0, YxmEvex}}, +} + +var _yvcvtps2qq = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, +} + +var _yvcvtsd2si = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yrl}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, Yrl}}, +} + +var _yvcvtsd2usil = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, Yrl}}, +} + +var _yvcvtsi2sdl = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yml, Yxr, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yml, YxrEvex, YxrEvex}}, +} + +var _yvcvtudq2pd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, Yzr}}, +} + +var _yvcvtusi2sdl = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yml, YxrEvex, YxrEvex}}, +} + +var _yvdppd = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, +} + +var _yvexp2pd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvexpandpd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvextractf128 = []ytab{ + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yyr, Yxm}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yyr, Yxm}}, +} + +var _yvextractf32x4 = []ytab{ + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, YyrEvex, YxmEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, YyrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, Yzr, YxmEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, Yzr, Yknot0, YxmEvex}}, +} + +var _yvextractf32x8 = []ytab{ + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, Yzr, YymEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, Yzr, Yknot0, YymEvex}}, +} + +var _yvextractps = []ytab{ + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yxr, Yml}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yxr, Yml}}, + {zcase: Zevex_i_r_rm, zoffset: 3, args: argList{Yu8, YxrEvex, Yml}}, +} + +var _yvfixupimmpd = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yzr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, YyrEvex}}, +} + +var _yvfixupimmsd = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, YxrEvex}}, +} + +var _yvfpclasspdx = []ytab{ + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YxmEvex, Yk}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yknot0, Yk}}, +} + +var _yvfpclasspdy = []ytab{ + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, Yk}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, Yk}}, +} + +var _yvfpclasspdz = []ytab{ + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yk}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yk}}, +} + +var _yvgatherdpd = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yxr, Yxvm, Yxr}}, + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yyr, Yxvm, Yyr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxvmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxvmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YyvmEvex, Yknot0, Yzr}}, +} + +var _yvgatherdps = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yxr, Yxvm, Yxr}}, + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yyr, Yyvm, Yyr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxvmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YyvmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzvm, Yknot0, Yzr}}, +} + +var _yvgatherpf0dpd = []ytab{ + {zcase: Zevex_k_rmo, zoffset: 4, args: argList{Yknot0, YyvmEvex}}, +} + +var _yvgatherpf0dps = []ytab{ + {zcase: Zevex_k_rmo, zoffset: 4, args: argList{Yknot0, Yzvm}}, +} + +var _yvgatherqps = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yxr, Yxvm, Yxr}}, + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yxr, Yyvm, Yxr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxvmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YyvmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzvm, Yknot0, YyrEvex}}, +} + +var _yvgetexpsd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, +} + +var _yvgetmantpd = []ytab{ + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, +} + +var _yvgf2p8affineinvqb = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvinsertf128 = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yyr, Yyr}}, +} + +var _yvinsertf32x4 = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yzr, Yknot0, Yzr}}, +} + +var _yvinsertf32x8 = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yzr, Yknot0, Yzr}}, +} + +var _yvinsertps = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, +} + +var _yvlddqu = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yyr}}, +} + +var _yvldmxcsr = []ytab{ + {zcase: Zvex_rm_v_ro, zoffset: 3, args: argList{Ym}}, +} + +var _yvmaskmovdqu = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxr, Yxr}}, +} + +var _yvmaskmovpd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxr, Ym}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yyr, Yyr, Ym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yyr, Yyr}}, +} + +var _yvmovapd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxm}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yyr, Yym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YymEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YymEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, Yzm}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, Yzm}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvmovd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yml}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yml, Yxr}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, Yml}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yml, YxrEvex}}, +} + +var _yvmovddup = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvmovdqa = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxm}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yyr, Yym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, +} + +var _yvmovdqa32 = []ytab{ + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YymEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YymEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, Yzm}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, Yzm}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvmovhlps = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxr, Yxr, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxrEvex, YxrEvex, YxrEvex}}, +} + +var _yvmovhpd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Ym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr, Yxr}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, Ym}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Ym, YxrEvex, YxrEvex}}, +} + +var _yvmovmskpd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxr, Yrl}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yyr, Yrl}}, +} + +var _yvmovntdq = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Ym}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yyr, Ym}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, Ym}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YyrEvex, Ym}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{Yzr, Ym}}, +} + +var _yvmovntdqa = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Ym, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Ym, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Ym, Yzr}}, +} + +var _yvmovq = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yml}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxm}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yml, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, Yml}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yml, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, YxrEvex}}, +} + +var _yvmovsd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxr, Yxr}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Ym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxr, Yxr, Yxr}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_r_v_k_rm, zoffset: 3, args: argList{YxrEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, Ym}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, Ym}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Ym, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxrEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxrEvex, YxrEvex, Yknot0, YxrEvex}}, +} + +var _yvpbroadcastb = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yrl, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yrl, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yrl, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yrl, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yrl, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yrl, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvpbroadcastmb2q = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yk, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yk, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yk, Yzr}}, +} + +var _yvpclmulqdq = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yzr}}, +} + +var _yvpcmpb = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, Yk}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, Yk}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yk}}, +} + +var _yvpcmpeqb = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yk}}, +} + +var _yvperm2f128 = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, +} + +var _yvpermd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvpermilpd = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yym, Yyr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvpermpd = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvpermq = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yym, Yyr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvpextrw = []ytab{ + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yxr, Yml}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yxr, Yml}}, + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxr, Yrl}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxr, Yrl}}, + {zcase: Zevex_i_r_rm, zoffset: 3, args: argList{Yu8, YxrEvex, Yml}}, + {zcase: Zevex_i_rm_r, zoffset: 3, args: argList{Yu8, YxrEvex, Yrl}}, +} + +var _yvpinsrb = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yml, Yxr, Yxr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, Yml, YxrEvex, YxrEvex}}, +} + +var _yvpmovb2m = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxrEvex, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YyrEvex, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yzr, Yk}}, +} + +var _yvpmovdb = []ytab{ + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, YxmEvex}}, +} + +var _yvpmovdw = []ytab{ + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, YymEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, YymEvex}}, +} + +var _yvprold = []ytab{ + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, Yzm, Yknot0, Yzr}}, +} + +var _yvpscatterdd = []ytab{ + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YyvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, Yzvm}}, +} + +var _yvpscatterdq = []ytab{ + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YxvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, YyvmEvex}}, +} + +var _yvpscatterqd = []ytab{ + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YyvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, Yzvm}}, +} + +var _yvpshufbitqmb = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yk}}, +} + +var _yvpshufd = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yym, Yyr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, +} + +var _yvpslld = []ytab{ + {zcase: Zvex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yxr, Yxr}}, + {zcase: Zvex_i_rm_vo, zoffset: 3, args: argList{Yi8, Yxr, Yxr}}, + {zcase: Zvex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yyr, Yyr}}, + {zcase: Zvex_i_rm_vo, zoffset: 3, args: argList{Yi8, Yyr, Yyr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr, Yyr}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, Yzr, Yknot0, Yzr}}, +} + +var _yvpslldq = []ytab{ + {zcase: Zvex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yxr, Yxr}}, + {zcase: Zvex_i_rm_vo, zoffset: 3, args: argList{Yi8, Yxr, Yxr}}, + {zcase: Zvex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yyr, Yyr}}, + {zcase: Zvex_i_rm_vo, zoffset: 3, args: argList{Yi8, Yyr, Yyr}}, + {zcase: Zevex_i_rm_vo, zoffset: 4, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 4, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 4, args: argList{Yu8, Yzm, Yzr}}, +} + +var _yvpsraq = []ytab{ + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, Yzr, Yknot0, Yzr}}, +} + +var _yvptest = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, +} + +var _yvrcpss = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, +} + +var _yvroundpd = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yym, Yyr}}, +} + +var _yvscalefpd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, +} + +var _yvshuff32x4 = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvzeroall = []ytab{ + {zcase: Zvex, zoffset: 2, args: argList{}}, +} + +var avxOptab = [...]Optab{ + {as: AANDNL, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF2, + }}, + {as: AANDNQ, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF2, + }}, + {as: ABEXTRL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF7, + }}, + {as: ABEXTRQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF7, + }}, + {as: ABLSIL, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF3, 03, + }}, + {as: ABLSIQ, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF3, 03, + }}, + {as: ABLSMSKL, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF3, 02, + }}, + {as: ABLSMSKQ, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF3, 02, + }}, + {as: ABLSRL, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF3, 01, + }}, + {as: ABLSRQ, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF3, 01, + }}, + {as: ABZHIL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF5, + }}, + {as: ABZHIQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF5, + }}, + {as: AKADDB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x4A, + }}, + {as: AKADDD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x4A, + }}, + {as: AKADDQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x4A, + }}, + {as: AKADDW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x4A, + }}, + {as: AKANDB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x41, + }}, + {as: AKANDD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x41, + }}, + {as: AKANDNB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x42, + }}, + {as: AKANDND, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x42, + }}, + {as: AKANDNQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x42, + }}, + {as: AKANDNW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x42, + }}, + {as: AKANDQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x41, + }}, + {as: AKANDW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x41, + }}, + {as: AKMOVB, ytab: _ykmovb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x91, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x93, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x90, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x92, + }}, + {as: AKMOVD, ytab: _ykmovb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x91, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x93, + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x90, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x92, + }}, + {as: AKMOVQ, ytab: _ykmovb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW1, 0x91, + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x93, + avxEscape | vex128 | vex0F | vexW1, 0x90, + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x92, + }}, + {as: AKMOVW, ytab: _ykmovb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x91, + avxEscape | vex128 | vex0F | vexW0, 0x93, + avxEscape | vex128 | vex0F | vexW0, 0x90, + avxEscape | vex128 | vex0F | vexW0, 0x92, + }}, + {as: AKNOTB, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x44, + }}, + {as: AKNOTD, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x44, + }}, + {as: AKNOTQ, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW1, 0x44, + }}, + {as: AKNOTW, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x44, + }}, + {as: AKORB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x45, + }}, + {as: AKORD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x45, + }}, + {as: AKORQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x45, + }}, + {as: AKORTESTB, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x98, + }}, + {as: AKORTESTD, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x98, + }}, + {as: AKORTESTQ, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW1, 0x98, + }}, + {as: AKORTESTW, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x98, + }}, + {as: AKORW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x45, + }}, + {as: AKSHIFTLB, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x32, + }}, + {as: AKSHIFTLD, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x33, + }}, + {as: AKSHIFTLQ, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x33, + }}, + {as: AKSHIFTLW, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x32, + }}, + {as: AKSHIFTRB, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x30, + }}, + {as: AKSHIFTRD, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x31, + }}, + {as: AKSHIFTRQ, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x31, + }}, + {as: AKSHIFTRW, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x30, + }}, + {as: AKTESTB, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x99, + }}, + {as: AKTESTD, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x99, + }}, + {as: AKTESTQ, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW1, 0x99, + }}, + {as: AKTESTW, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x99, + }}, + {as: AKUNPCKBW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x4B, + }}, + {as: AKUNPCKDQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x4B, + }}, + {as: AKUNPCKWD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x4B, + }}, + {as: AKXNORB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x46, + }}, + {as: AKXNORD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x46, + }}, + {as: AKXNORQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x46, + }}, + {as: AKXNORW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x46, + }}, + {as: AKXORB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x47, + }}, + {as: AKXORD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x47, + }}, + {as: AKXORQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x47, + }}, + {as: AKXORW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x47, + }}, + {as: AMULXL, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0xF6, + }}, + {as: AMULXQ, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW1, 0xF6, + }}, + {as: APDEPL, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0xF5, + }}, + {as: APDEPQ, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW1, 0xF5, + }}, + {as: APEXTL, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW0, 0xF5, + }}, + {as: APEXTQ, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW1, 0xF5, + }}, + {as: ARORXL, ytab: _yrorxl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F3A | vexW0, 0xF0, + }}, + {as: ARORXQ, ytab: _yrorxl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F3A | vexW1, 0xF0, + }}, + {as: ASARXL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW0, 0xF7, + }}, + {as: ASARXQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW1, 0xF7, + }}, + {as: ASHLXL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xF7, + }}, + {as: ASHLXQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xF7, + }}, + {as: ASHRXL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0xF7, + }}, + {as: ASHRXQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW1, 0xF7, + }}, + {as: AV4FMADDPS, ytab: _yv4fmaddps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x9A, + }}, + {as: AV4FMADDSS, ytab: _yv4fmaddss, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x9B, + }}, + {as: AV4FNMADDPS, ytab: _yv4fmaddps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0xAA, + }}, + {as: AV4FNMADDSS, ytab: _yv4fmaddss, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0xAB, + }}, + {as: AVADDPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x58, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x58, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x58, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x58, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x58, + }}, + {as: AVADDPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x58, + avxEscape | vex256 | vex0F | vexW0, 0x58, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x58, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x58, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x58, + }}, + {as: AVADDSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x58, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x58, + }}, + {as: AVADDSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x58, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x58, + }}, + {as: AVADDSUBPD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD0, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD0, + }}, + {as: AVADDSUBPS, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0xD0, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0xD0, + }}, + {as: AVAESDEC, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDE, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xDE, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0xDE, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0xDE, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0xDE, + }}, + {as: AVAESDECLAST, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDF, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xDF, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0xDF, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0xDF, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0xDF, + }}, + {as: AVAESENC, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDC, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xDC, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0xDC, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0xDC, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0xDC, + }}, + {as: AVAESENCLAST, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDD, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xDD, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0xDD, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0xDD, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0xDD, + }}, + {as: AVAESIMC, ytab: _yvaesimc, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDB, + }}, + {as: AVAESKEYGENASSIST, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0xDF, + }}, + {as: AVALIGND, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x03, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x03, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x03, + }}, + {as: AVALIGNQ, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x03, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x03, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x03, + }}, + {as: AVANDNPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x55, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x55, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x55, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x55, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x55, + }}, + {as: AVANDNPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x55, + avxEscape | vex256 | vex0F | vexW0, 0x55, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x55, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x55, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x55, + }}, + {as: AVANDPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x54, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x54, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x54, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x54, + }}, + {as: AVANDPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x54, + avxEscape | vex256 | vex0F | vexW0, 0x54, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x54, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x54, + }}, + {as: AVBLENDMPD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x65, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x65, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x65, + }}, + {as: AVBLENDMPS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x65, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x65, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x65, + }}, + {as: AVBLENDPD, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0D, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x0D, + }}, + {as: AVBLENDPS, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0C, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x0C, + }}, + {as: AVBLENDVPD, ytab: _yvblendvpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x4B, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x4B, + }}, + {as: AVBLENDVPS, ytab: _yvblendvpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x4A, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x4A, + }}, + {as: AVBROADCASTF128, ytab: _yvbroadcastf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x1A, + }}, + {as: AVBROADCASTF32X2, ytab: _yvbroadcastf32x2, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x19, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x19, + }}, + {as: AVBROADCASTF32X4, ytab: _yvbroadcastf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x1A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x1A, + }}, + {as: AVBROADCASTF32X8, ytab: _yvbroadcastf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x1B, + }}, + {as: AVBROADCASTF64X2, ytab: _yvbroadcastf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x1A, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x1A, + }}, + {as: AVBROADCASTF64X4, ytab: _yvbroadcastf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x1B, + }}, + {as: AVBROADCASTI128, ytab: _yvbroadcastf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x5A, + }}, + {as: AVBROADCASTI32X2, ytab: _yvbroadcasti32x2, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x59, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x59, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x59, + }}, + {as: AVBROADCASTI32X4, ytab: _yvbroadcastf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x5A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x5A, + }}, + {as: AVBROADCASTI32X8, ytab: _yvbroadcastf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x5B, + }}, + {as: AVBROADCASTI64X2, ytab: _yvbroadcastf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x5A, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x5A, + }}, + {as: AVBROADCASTI64X4, ytab: _yvbroadcastf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x5B, + }}, + {as: AVBROADCASTSD, ytab: _yvbroadcastsd, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x19, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x19, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x19, + }}, + {as: AVBROADCASTSS, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x18, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x18, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x18, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x18, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x18, + }}, + {as: AVCMPPD, ytab: _yvcmppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xC2, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xC2, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled, 0xC2, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8, 0xC2, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8, 0xC2, + }}, + {as: AVCMPPS, ytab: _yvcmppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0xC2, + avxEscape | vex256 | vex0F | vexW0, 0xC2, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled, 0xC2, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4, 0xC2, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4, 0xC2, + }}, + {as: AVCMPSD, ytab: _yvcmpsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0xC2, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0xC2, + }}, + {as: AVCMPSS, ytab: _yvcmpsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0xC2, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0xC2, + }}, + {as: AVCOMISD, ytab: _yvcomisd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x2F, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0x2F, + }}, + {as: AVCOMISS, ytab: _yvcomisd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x2F, + avxEscape | evex128 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0x2F, + }}, + {as: AVCOMPRESSPD, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8A, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8A, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8A, + }}, + {as: AVCOMPRESSPS, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8A, + }}, + {as: AVCVTDQ2PD, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0xE6, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0xE6, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0xE6, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xE6, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTDQ2PS, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5B, + avxEscape | vex256 | vex0F | vexW0, 0x5B, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5B, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5B, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTPD2DQ, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTPD2DQX, ytab: _yvcvtpd2dqx, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0xE6, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTPD2DQY, ytab: _yvcvtpd2dqy, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0xE6, + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTPD2PS, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTPD2PSX, ytab: _yvcvtpd2dqx, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5A, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTPD2PSY, ytab: _yvcvtpd2dqy, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5A, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTPD2QQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x7B, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7B, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7B, + }}, + {as: AVCVTPD2UDQ, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPD2UDQX, ytab: _yvcvtpd2udqx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPD2UDQY, ytab: _yvcvtpd2udqy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPD2UQQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x79, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x79, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPH2PS, ytab: _yvcvtph2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x13, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x13, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexSaeEnabled | evexZeroingEnabled, 0x13, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x13, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x13, + }}, + {as: AVCVTPS2DQ, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5B, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5B, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5B, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5B, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTPS2PD, ytab: _yvcvtph2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5A, + avxEscape | vex256 | vex0F | vexW0, 0x5A, + avxEscape | evex512 | evex0F | evexW0, evexN32 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x5A, + avxEscape | evex128 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x5A, + avxEscape | evex256 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTPS2PH, ytab: _yvcvtps2ph, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x1D, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x1D, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexSaeEnabled | evexZeroingEnabled, 0x1D, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN8 | evexZeroingEnabled, 0x1D, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x1D, + }}, + {as: AVCVTPS2QQ, ytab: _yvcvtps2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x7B, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x7B, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7B, + }}, + {as: AVCVTPS2UDQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x79, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x79, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPS2UQQ, ytab: _yvcvtps2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x79, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x79, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTQQ2PD, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF3 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xE6, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xE6, + avxEscape | evex256 | evexF3 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTQQ2PS, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTQQ2PSX, ytab: _yvcvtpd2udqx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTQQ2PSY, ytab: _yvcvtpd2udqy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTSD2SI, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x2D, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN8 | evexRoundingEnabled, 0x2D, + }}, + {as: AVCVTSD2SIQ, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x2D, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x2D, + }}, + {as: AVCVTSD2SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5A, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTSD2USIL, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN8 | evexRoundingEnabled, 0x79, + }}, + {as: AVCVTSD2USIQ, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x79, + }}, + {as: AVCVTSI2SDL, ytab: _yvcvtsi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x2A, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN4, 0x2A, + }}, + {as: AVCVTSI2SDQ, ytab: _yvcvtsi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x2A, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x2A, + }}, + {as: AVCVTSI2SSL, ytab: _yvcvtsi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x2A, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled, 0x2A, + }}, + {as: AVCVTSI2SSQ, ytab: _yvcvtsi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW1, 0x2A, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x2A, + }}, + {as: AVCVTSS2SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5A, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTSS2SI, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x2D, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled, 0x2D, + }}, + {as: AVCVTSS2SIQ, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW1, 0x2D, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN4 | evexRoundingEnabled, 0x2D, + }}, + {as: AVCVTSS2USIL, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled, 0x79, + }}, + {as: AVCVTSS2USIQ, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN4 | evexRoundingEnabled, 0x79, + }}, + {as: AVCVTTPD2DQ, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTTPD2DQX, ytab: _yvcvtpd2dqx, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE6, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTTPD2DQY, ytab: _yvcvtpd2dqy, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE6, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTTPD2QQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTTPD2UDQ, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPD2UDQX, ytab: _yvcvtpd2udqx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPD2UDQY, ytab: _yvcvtpd2udqy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPD2UQQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x78, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x78, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPS2DQ, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5B, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x5B, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x5B, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5B, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTTPS2QQ, ytab: _yvcvtps2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTTPS2UDQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x78, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x78, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPS2UQQ, ytab: _yvcvtps2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x78, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x78, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTSD2SI, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x2C, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN8 | evexSaeEnabled, 0x2C, + }}, + {as: AVCVTTSD2SIQ, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x2C, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0x2C, + }}, + {as: AVCVTTSD2USIL, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN8 | evexSaeEnabled, 0x78, + }}, + {as: AVCVTTSD2USIQ, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0x78, + }}, + {as: AVCVTTSS2SI, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x2C, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0x2C, + }}, + {as: AVCVTTSS2SIQ, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW1, 0x2C, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN4 | evexSaeEnabled, 0x2C, + }}, + {as: AVCVTTSS2USIL, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0x78, + }}, + {as: AVCVTTSS2USIQ, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN4 | evexSaeEnabled, 0x78, + }}, + {as: AVCVTUDQ2PD, ytab: _yvcvtudq2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7A, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUDQ2PS, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evexF2 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUQQ2PD, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF3 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evexF3 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUQQ2PS, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUQQ2PSX, ytab: _yvcvtpd2udqx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUQQ2PSY, ytab: _yvcvtpd2udqy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUSI2SDL, ytab: _yvcvtusi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN4, 0x7B, + }}, + {as: AVCVTUSI2SDQ, ytab: _yvcvtusi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x7B, + }}, + {as: AVCVTUSI2SSL, ytab: _yvcvtusi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled, 0x7B, + }}, + {as: AVCVTUSI2SSQ, ytab: _yvcvtusi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x7B, + }}, + {as: AVDBPSADBW, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x42, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x42, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexZeroingEnabled, 0x42, + }}, + {as: AVDIVPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5E, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5E, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5E, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5E, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5E, + }}, + {as: AVDIVPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5E, + avxEscape | vex256 | vex0F | vexW0, 0x5E, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5E, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5E, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5E, + }}, + {as: AVDIVSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5E, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5E, + }}, + {as: AVDIVSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5E, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5E, + }}, + {as: AVDPPD, ytab: _yvdppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x41, + }}, + {as: AVDPPS, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x40, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x40, + }}, + {as: AVEXP2PD, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0xC8, + }}, + {as: AVEXP2PS, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0xC8, + }}, + {as: AVEXPANDPD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x88, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x88, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x88, + }}, + {as: AVEXPANDPS, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x88, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x88, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x88, + }}, + {as: AVEXTRACTF128, ytab: _yvextractf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x19, + }}, + {as: AVEXTRACTF32X4, ytab: _yvextractf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x19, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x19, + }}, + {as: AVEXTRACTF32X8, ytab: _yvextractf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x1B, + }}, + {as: AVEXTRACTF64X2, ytab: _yvextractf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x19, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x19, + }}, + {as: AVEXTRACTF64X4, ytab: _yvextractf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x1B, + }}, + {as: AVEXTRACTI128, ytab: _yvextractf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x39, + }}, + {as: AVEXTRACTI32X4, ytab: _yvextractf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x39, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x39, + }}, + {as: AVEXTRACTI32X8, ytab: _yvextractf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x3B, + }}, + {as: AVEXTRACTI64X2, ytab: _yvextractf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x39, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x39, + }}, + {as: AVEXTRACTI64X4, ytab: _yvextractf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x3B, + }}, + {as: AVEXTRACTPS, ytab: _yvextractps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x17, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x17, + }}, + {as: AVFIXUPIMMPD, ytab: _yvfixupimmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x54, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x54, + }}, + {as: AVFIXUPIMMPS, ytab: _yvfixupimmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x54, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x54, + }}, + {as: AVFIXUPIMMSD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x55, + }}, + {as: AVFIXUPIMMSS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x55, + }}, + {as: AVFMADD132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x98, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x98, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x98, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x98, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x98, + }}, + {as: AVFMADD132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x98, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x98, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x98, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x98, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x98, + }}, + {as: AVFMADD132SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x99, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x99, + }}, + {as: AVFMADD132SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x99, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x99, + }}, + {as: AVFMADD213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xA8, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xA8, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xA8, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xA8, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xA8, + }}, + {as: AVFMADD213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xA8, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xA8, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xA8, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xA8, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xA8, + }}, + {as: AVFMADD213SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xA9, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xA9, + }}, + {as: AVFMADD213SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xA9, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xA9, + }}, + {as: AVFMADD231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xB8, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xB8, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xB8, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB8, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB8, + }}, + {as: AVFMADD231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xB8, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xB8, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xB8, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xB8, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xB8, + }}, + {as: AVFMADD231SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xB9, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xB9, + }}, + {as: AVFMADD231SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xB9, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xB9, + }}, + {as: AVFMADDSUB132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x96, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x96, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x96, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x96, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x96, + }}, + {as: AVFMADDSUB132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x96, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x96, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x96, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x96, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x96, + }}, + {as: AVFMADDSUB213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xA6, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xA6, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xA6, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xA6, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xA6, + }}, + {as: AVFMADDSUB213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xA6, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xA6, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xA6, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xA6, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xA6, + }}, + {as: AVFMADDSUB231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xB6, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xB6, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xB6, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB6, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB6, + }}, + {as: AVFMADDSUB231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xB6, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xB6, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xB6, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xB6, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xB6, + }}, + {as: AVFMSUB132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9A, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x9A, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9A, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x9A, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x9A, + }}, + {as: AVFMSUB132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9A, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x9A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9A, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x9A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x9A, + }}, + {as: AVFMSUB132SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9B, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9B, + }}, + {as: AVFMSUB132SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9B, + }}, + {as: AVFMSUB213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAA, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xAA, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAA, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xAA, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xAA, + }}, + {as: AVFMSUB213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAA, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xAA, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAA, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xAA, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xAA, + }}, + {as: AVFMSUB213SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAB, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAB, + }}, + {as: AVFMSUB213SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAB, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAB, + }}, + {as: AVFMSUB231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBA, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xBA, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBA, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xBA, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xBA, + }}, + {as: AVFMSUB231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBA, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xBA, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBA, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xBA, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xBA, + }}, + {as: AVFMSUB231SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBB, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBB, + }}, + {as: AVFMSUB231SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBB, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBB, + }}, + {as: AVFMSUBADD132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x97, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x97, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x97, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x97, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x97, + }}, + {as: AVFMSUBADD132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x97, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x97, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x97, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x97, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x97, + }}, + {as: AVFMSUBADD213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xA7, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xA7, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xA7, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xA7, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xA7, + }}, + {as: AVFMSUBADD213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xA7, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xA7, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xA7, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xA7, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xA7, + }}, + {as: AVFMSUBADD231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xB7, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xB7, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xB7, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB7, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB7, + }}, + {as: AVFMSUBADD231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xB7, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xB7, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xB7, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xB7, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xB7, + }}, + {as: AVFNMADD132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9C, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x9C, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9C, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x9C, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x9C, + }}, + {as: AVFNMADD132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x9C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x9C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x9C, + }}, + {as: AVFNMADD132SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9D, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9D, + }}, + {as: AVFNMADD132SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9D, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9D, + }}, + {as: AVFNMADD213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAC, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xAC, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAC, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xAC, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xAC, + }}, + {as: AVFNMADD213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAC, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xAC, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAC, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xAC, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xAC, + }}, + {as: AVFNMADD213SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAD, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAD, + }}, + {as: AVFNMADD213SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAD, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAD, + }}, + {as: AVFNMADD231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBC, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xBC, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBC, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xBC, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xBC, + }}, + {as: AVFNMADD231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBC, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xBC, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBC, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xBC, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xBC, + }}, + {as: AVFNMADD231SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBD, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBD, + }}, + {as: AVFNMADD231SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBD, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBD, + }}, + {as: AVFNMSUB132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9E, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x9E, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9E, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x9E, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x9E, + }}, + {as: AVFNMSUB132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x9E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9E, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x9E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x9E, + }}, + {as: AVFNMSUB132SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9F, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9F, + }}, + {as: AVFNMSUB132SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9F, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9F, + }}, + {as: AVFNMSUB213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAE, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xAE, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAE, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xAE, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xAE, + }}, + {as: AVFNMSUB213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAE, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xAE, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAE, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xAE, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xAE, + }}, + {as: AVFNMSUB213SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAF, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAF, + }}, + {as: AVFNMSUB213SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAF, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAF, + }}, + {as: AVFNMSUB231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBE, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xBE, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBE, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xBE, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xBE, + }}, + {as: AVFNMSUB231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBE, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xBE, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBE, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xBE, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xBE, + }}, + {as: AVFNMSUB231SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBF, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBF, + }}, + {as: AVFNMSUB231SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBF, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBF, + }}, + {as: AVFPCLASSPDX, ytab: _yvfpclasspdx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8, 0x66, + }}, + {as: AVFPCLASSPDY, ytab: _yvfpclasspdy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8, 0x66, + }}, + {as: AVFPCLASSPDZ, ytab: _yvfpclasspdz, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8, 0x66, + }}, + {as: AVFPCLASSPSX, ytab: _yvfpclasspdx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4, 0x66, + }}, + {as: AVFPCLASSPSY, ytab: _yvfpclasspdy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4, 0x66, + }}, + {as: AVFPCLASSPSZ, ytab: _yvfpclasspdz, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4, 0x66, + }}, + {as: AVFPCLASSSD, ytab: _yvfpclasspdx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8, 0x67, + }}, + {as: AVFPCLASSSS, ytab: _yvfpclasspdx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x67, + }}, + {as: AVGATHERDPD, ytab: _yvgatherdpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x92, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x92, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0x92, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0x92, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0x92, + }}, + {as: AVGATHERDPS, ytab: _yvgatherdps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x92, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x92, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0x92, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0x92, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0x92, + }}, + {as: AVGATHERPF0DPD, ytab: _yvgatherpf0dpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC6, 01, + }}, + {as: AVGATHERPF0DPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC6, 01, + }}, + {as: AVGATHERPF0QPD, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC7, 01, + }}, + {as: AVGATHERPF0QPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC7, 01, + }}, + {as: AVGATHERPF1DPD, ytab: _yvgatherpf0dpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC6, 02, + }}, + {as: AVGATHERPF1DPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC6, 02, + }}, + {as: AVGATHERPF1QPD, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC7, 02, + }}, + {as: AVGATHERPF1QPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC7, 02, + }}, + {as: AVGATHERQPD, ytab: _yvgatherdps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x93, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x93, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0x93, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0x93, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0x93, + }}, + {as: AVGATHERQPS, ytab: _yvgatherqps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x93, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x93, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0x93, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0x93, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0x93, + }}, + {as: AVGETEXPPD, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x42, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x42, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x42, + }}, + {as: AVGETEXPPS, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x42, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x42, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x42, + }}, + {as: AVGETEXPSD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x43, + }}, + {as: AVGETEXPSS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x43, + }}, + {as: AVGETMANTPD, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x26, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x26, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x26, + }}, + {as: AVGETMANTPS, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x26, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x26, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x26, + }}, + {as: AVGETMANTSD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x27, + }}, + {as: AVGETMANTSS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x27, + }}, + {as: AVGF2P8AFFINEINVQB, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0xCF, + avxEscape | vex256 | vex66 | vex0F3A | vexW1, 0xCF, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xCF, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xCF, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xCF, + }}, + {as: AVGF2P8AFFINEQB, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0xCE, + avxEscape | vex256 | vex66 | vex0F3A | vexW1, 0xCE, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xCE, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xCE, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xCE, + }}, + {as: AVGF2P8MULB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xCF, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xCF, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0xCF, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0xCF, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0xCF, + }}, + {as: AVHADDPD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x7C, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x7C, + }}, + {as: AVHADDPS, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x7C, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0x7C, + }}, + {as: AVHSUBPD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x7D, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x7D, + }}, + {as: AVHSUBPS, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x7D, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0x7D, + }}, + {as: AVINSERTF128, ytab: _yvinsertf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x18, + }}, + {as: AVINSERTF32X4, ytab: _yvinsertf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x18, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x18, + }}, + {as: AVINSERTF32X8, ytab: _yvinsertf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x1A, + }}, + {as: AVINSERTF64X2, ytab: _yvinsertf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x18, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x18, + }}, + {as: AVINSERTF64X4, ytab: _yvinsertf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x1A, + }}, + {as: AVINSERTI128, ytab: _yvinsertf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x38, + }}, + {as: AVINSERTI32X4, ytab: _yvinsertf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x38, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x38, + }}, + {as: AVINSERTI32X8, ytab: _yvinsertf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x3A, + }}, + {as: AVINSERTI64X2, ytab: _yvinsertf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x38, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x38, + }}, + {as: AVINSERTI64X4, ytab: _yvinsertf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x3A, + }}, + {as: AVINSERTPS, ytab: _yvinsertps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x21, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x21, + }}, + {as: AVLDDQU, ytab: _yvlddqu, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0xF0, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0xF0, + }}, + {as: AVLDMXCSR, ytab: _yvldmxcsr, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0xAE, 02, + }}, + {as: AVMASKMOVDQU, ytab: _yvmaskmovdqu, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF7, + }}, + {as: AVMASKMOVPD, ytab: _yvmaskmovpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2F, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2F, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2D, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2D, + }}, + {as: AVMASKMOVPS, ytab: _yvmaskmovpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2E, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2C, + }}, + {as: AVMAXPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5F, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5F, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x5F, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5F, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5F, + }}, + {as: AVMAXPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5F, + avxEscape | vex256 | vex0F | vexW0, 0x5F, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x5F, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5F, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5F, + }}, + {as: AVMAXSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5F, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x5F, + }}, + {as: AVMAXSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5F, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x5F, + }}, + {as: AVMINPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5D, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5D, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x5D, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5D, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5D, + }}, + {as: AVMINPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5D, + avxEscape | vex256 | vex0F | vexW0, 0x5D, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x5D, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5D, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5D, + }}, + {as: AVMINSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5D, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x5D, + }}, + {as: AVMINSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5D, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x5D, + }}, + {as: AVMOVAPD, ytab: _yvmovapd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x29, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x29, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x28, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x28, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x29, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x29, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x29, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x28, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x28, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x28, + }}, + {as: AVMOVAPS, ytab: _yvmovapd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x29, + avxEscape | vex256 | vex0F | vexW0, 0x29, + avxEscape | vex128 | vex0F | vexW0, 0x28, + avxEscape | vex256 | vex0F | vexW0, 0x28, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x29, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x29, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x29, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x28, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x28, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x28, + }}, + {as: AVMOVD, ytab: _yvmovd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x7E, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6E, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN4, 0x7E, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN4, 0x6E, + }}, + {as: AVMOVDDUP, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x12, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexZeroingEnabled, 0x12, + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x12, + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x12, + }}, + {as: AVMOVDQA, ytab: _yvmovdqa, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x7F, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x7F, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6F, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6F, + }}, + {as: AVMOVDQA32, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQA64, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQU, ytab: _yvmovdqa, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x7F, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x7F, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x6F, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x6F, + }}, + {as: AVMOVDQU16, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQU32, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQU64, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evexF3 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evexF3 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evexF3 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evexF3 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQU8, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evexF2 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evexF2 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evexF2 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evexF2 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVHLPS, ytab: _yvmovhlps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evex0F | evexW0, 0, 0x12, + }}, + {as: AVMOVHPD, ytab: _yvmovhpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x17, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x16, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x17, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x16, + }}, + {as: AVMOVHPS, ytab: _yvmovhpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x17, + avxEscape | vex128 | vex0F | vexW0, 0x16, + avxEscape | evex128 | evex0F | evexW0, evexN8, 0x17, + avxEscape | evex128 | evex0F | evexW0, evexN8, 0x16, + }}, + {as: AVMOVLHPS, ytab: _yvmovhlps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x16, + avxEscape | evex128 | evex0F | evexW0, 0, 0x16, + }}, + {as: AVMOVLPD, ytab: _yvmovhpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x13, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x13, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x12, + }}, + {as: AVMOVLPS, ytab: _yvmovhpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x13, + avxEscape | vex128 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evex0F | evexW0, evexN8, 0x13, + avxEscape | evex128 | evex0F | evexW0, evexN8, 0x12, + }}, + {as: AVMOVMSKPD, ytab: _yvmovmskpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x50, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x50, + }}, + {as: AVMOVMSKPS, ytab: _yvmovmskpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x50, + avxEscape | vex256 | vex0F | vexW0, 0x50, + }}, + {as: AVMOVNTDQ, ytab: _yvmovntdq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE7, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE7, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0xE7, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0xE7, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0xE7, + }}, + {as: AVMOVNTDQA, ytab: _yvmovntdqa, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2A, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2A, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0x2A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0x2A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0x2A, + }}, + {as: AVMOVNTPD, ytab: _yvmovntdq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x2B, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x2B, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16, 0x2B, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32, 0x2B, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64, 0x2B, + }}, + {as: AVMOVNTPS, ytab: _yvmovntdq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x2B, + avxEscape | vex256 | vex0F | vexW0, 0x2B, + avxEscape | evex128 | evex0F | evexW0, evexN16, 0x2B, + avxEscape | evex256 | evex0F | evexW0, evexN32, 0x2B, + avxEscape | evex512 | evex0F | evexW0, evexN64, 0x2B, + }}, + {as: AVMOVQ, ytab: _yvmovq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x7E, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD6, + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x6E, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x7E, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x7E, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0xD6, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x6E, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN8, 0x7E, + }}, + {as: AVMOVSD, ytab: _yvmovsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x10, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x10, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexZeroingEnabled, 0x11, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8, 0x11, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexZeroingEnabled, 0x10, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexZeroingEnabled, 0x10, + }}, + {as: AVMOVSHDUP, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x16, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x16, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x16, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x16, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x16, + }}, + {as: AVMOVSLDUP, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x12, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x12, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x12, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x12, + }}, + {as: AVMOVSS, ytab: _yvmovsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x10, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x10, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexZeroingEnabled, 0x11, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4, 0x11, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexZeroingEnabled, 0x10, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexZeroingEnabled, 0x10, + }}, + {as: AVMOVUPD, ytab: _yvmovapd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x11, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x10, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x10, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x11, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x11, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x11, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x10, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x10, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x10, + }}, + {as: AVMOVUPS, ytab: _yvmovapd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x11, + avxEscape | vex256 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vex0F | vexW0, 0x10, + avxEscape | vex256 | vex0F | vexW0, 0x10, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x11, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x11, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x11, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x10, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x10, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x10, + }}, + {as: AVMPSADBW, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x42, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x42, + }}, + {as: AVMULPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x59, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x59, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x59, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x59, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x59, + }}, + {as: AVMULPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x59, + avxEscape | vex256 | vex0F | vexW0, 0x59, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x59, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x59, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x59, + }}, + {as: AVMULSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x59, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x59, + }}, + {as: AVMULSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x59, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x59, + }}, + {as: AVORPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x56, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x56, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x56, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x56, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x56, + }}, + {as: AVORPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x56, + avxEscape | vex256 | vex0F | vexW0, 0x56, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x56, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x56, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x56, + }}, + {as: AVP4DPWSSD, ytab: _yv4fmaddps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x52, + }}, + {as: AVP4DPWSSDS, ytab: _yv4fmaddps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x53, + }}, + {as: AVPABSB, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x1C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x1C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x1C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x1C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x1C, + }}, + {as: AVPABSD, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x1E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x1E, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x1E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x1E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x1E, + }}, + {as: AVPABSQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x1F, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x1F, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x1F, + }}, + {as: AVPABSW, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x1D, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x1D, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x1D, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x1D, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x1D, + }}, + {as: AVPACKSSDW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6B, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6B, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x6B, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x6B, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x6B, + }}, + {as: AVPACKSSWB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x63, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x63, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x63, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x63, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x63, + }}, + {as: AVPACKUSDW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2B, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x2B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x2B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x2B, + }}, + {as: AVPACKUSWB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x67, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x67, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x67, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x67, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x67, + }}, + {as: AVPADDB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFC, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFC, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xFC, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xFC, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xFC, + }}, + {as: AVPADDD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFE, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFE, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xFE, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xFE, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xFE, + }}, + {as: AVPADDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD4, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD4, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xD4, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xD4, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xD4, + }}, + {as: AVPADDSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEC, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEC, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xEC, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xEC, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xEC, + }}, + {as: AVPADDSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xED, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xED, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xED, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xED, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xED, + }}, + {as: AVPADDUSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDC, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDC, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xDC, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xDC, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xDC, + }}, + {as: AVPADDUSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDD, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDD, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xDD, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xDD, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xDD, + }}, + {as: AVPADDW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFD, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFD, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xFD, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xFD, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xFD, + }}, + {as: AVPALIGNR, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0F, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x0F, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x0F, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x0F, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexZeroingEnabled, 0x0F, + }}, + {as: AVPAND, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDB, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDB, + }}, + {as: AVPANDD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xDB, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xDB, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xDB, + }}, + {as: AVPANDN, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDF, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDF, + }}, + {as: AVPANDND, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xDF, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xDF, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xDF, + }}, + {as: AVPANDNQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xDF, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xDF, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xDF, + }}, + {as: AVPANDQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xDB, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xDB, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xDB, + }}, + {as: AVPAVGB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE0, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE0, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE0, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE0, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE0, + }}, + {as: AVPAVGW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE3, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE3, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE3, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE3, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE3, + }}, + {as: AVPBLENDD, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x02, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x02, + }}, + {as: AVPBLENDMB, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x66, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x66, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x66, + }}, + {as: AVPBLENDMD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x64, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x64, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x64, + }}, + {as: AVPBLENDMQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x64, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x64, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x64, + }}, + {as: AVPBLENDMW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x66, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x66, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x66, + }}, + {as: AVPBLENDVB, ytab: _yvblendvpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x4C, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x4C, + }}, + {as: AVPBLENDW, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0E, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x0E, + }}, + {as: AVPBROADCASTB, ytab: _yvpbroadcastb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x78, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x78, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x78, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x78, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x78, + }}, + {as: AVPBROADCASTD, ytab: _yvpbroadcastb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x58, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x58, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x58, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x58, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x58, + }}, + {as: AVPBROADCASTMB2Q, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x2A, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x2A, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x2A, + }}, + {as: AVPBROADCASTMW2D, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x3A, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x3A, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x3A, + }}, + {as: AVPBROADCASTQ, ytab: _yvpbroadcastb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x59, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x59, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexZeroingEnabled, 0x7C, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexZeroingEnabled, 0x7C, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexZeroingEnabled, 0x7C, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x59, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x59, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x59, + }}, + {as: AVPBROADCASTW, ytab: _yvpbroadcastb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x79, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x79, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x79, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x79, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x79, + }}, + {as: AVPCLMULQDQ, ytab: _yvpclmulqdq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x44, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x44, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16, 0x44, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32, 0x44, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64, 0x44, + }}, + {as: AVPCMPB, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16, 0x3F, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32, 0x3F, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64, 0x3F, + }}, + {as: AVPCMPD, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4, 0x1F, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4, 0x1F, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4, 0x1F, + }}, + {as: AVPCMPEQB, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x74, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x74, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x74, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x74, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x74, + }}, + {as: AVPCMPEQD, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x76, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x76, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4, 0x76, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4, 0x76, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4, 0x76, + }}, + {as: AVPCMPEQQ, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x29, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x29, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8, 0x29, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8, 0x29, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8, 0x29, + }}, + {as: AVPCMPEQW, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x75, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x75, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x75, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x75, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x75, + }}, + {as: AVPCMPESTRI, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexWIG, 0x61, + }}, + {as: AVPCMPESTRM, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexWIG, 0x60, + }}, + {as: AVPCMPGTB, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x64, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x64, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x64, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x64, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x64, + }}, + {as: AVPCMPGTD, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x66, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x66, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4, 0x66, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4, 0x66, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4, 0x66, + }}, + {as: AVPCMPGTQ, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x37, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x37, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8, 0x37, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8, 0x37, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8, 0x37, + }}, + {as: AVPCMPGTW, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x65, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x65, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x65, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x65, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x65, + }}, + {as: AVPCMPISTRI, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexWIG, 0x63, + }}, + {as: AVPCMPISTRM, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x62, + }}, + {as: AVPCMPQ, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8, 0x1F, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8, 0x1F, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8, 0x1F, + }}, + {as: AVPCMPUB, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16, 0x3E, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32, 0x3E, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64, 0x3E, + }}, + {as: AVPCMPUD, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4, 0x1E, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4, 0x1E, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4, 0x1E, + }}, + {as: AVPCMPUQ, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8, 0x1E, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8, 0x1E, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8, 0x1E, + }}, + {as: AVPCMPUW, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16, 0x3E, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32, 0x3E, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64, 0x3E, + }}, + {as: AVPCMPW, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16, 0x3F, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32, 0x3F, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64, 0x3F, + }}, + {as: AVPCOMPRESSB, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x63, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x63, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x63, + }}, + {as: AVPCOMPRESSD, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8B, + }}, + {as: AVPCOMPRESSQ, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8B, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8B, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8B, + }}, + {as: AVPCOMPRESSW, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x63, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x63, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x63, + }}, + {as: AVPCONFLICTD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xC4, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xC4, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xC4, + }}, + {as: AVPCONFLICTQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xC4, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xC4, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xC4, + }}, + {as: AVPDPBUSD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x50, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x50, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x50, + }}, + {as: AVPDPBUSDS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x51, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x51, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x51, + }}, + {as: AVPDPWSSD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x52, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x52, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x52, + }}, + {as: AVPDPWSSDS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x53, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x53, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x53, + }}, + {as: AVPERM2F128, ytab: _yvperm2f128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x06, + }}, + {as: AVPERM2I128, ytab: _yvperm2f128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x46, + }}, + {as: AVPERMB, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x8D, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x8D, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x8D, + }}, + {as: AVPERMD, ytab: _yvpermd, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x36, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x36, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x36, + }}, + {as: AVPERMI2B, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x75, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x75, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x75, + }}, + {as: AVPERMI2D, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x76, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x76, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x76, + }}, + {as: AVPERMI2PD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x77, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x77, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x77, + }}, + {as: AVPERMI2PS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x77, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x77, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x77, + }}, + {as: AVPERMI2Q, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x76, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x76, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x76, + }}, + {as: AVPERMI2W, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x75, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x75, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x75, + }}, + {as: AVPERMILPD, ytab: _yvpermilpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x05, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x05, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0D, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0D, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x05, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x05, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x05, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x0D, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x0D, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x0D, + }}, + {as: AVPERMILPS, ytab: _yvpermilpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x04, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x04, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0C, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x04, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x04, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x04, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x0C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x0C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x0C, + }}, + {as: AVPERMPD, ytab: _yvpermq, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW1, 0x01, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x01, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x01, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x16, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x16, + }}, + {as: AVPERMPS, ytab: _yvpermd, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x16, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x16, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x16, + }}, + {as: AVPERMQ, ytab: _yvpermq, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW1, 0x00, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x00, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x00, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x36, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x36, + }}, + {as: AVPERMT2B, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x7D, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x7D, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x7D, + }}, + {as: AVPERMT2D, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x7E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x7E, + }}, + {as: AVPERMT2PD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x7F, + }}, + {as: AVPERMT2PS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x7F, + }}, + {as: AVPERMT2Q, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7E, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7E, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x7E, + }}, + {as: AVPERMT2W, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x7D, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x7D, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x7D, + }}, + {as: AVPERMW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x8D, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x8D, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x8D, + }}, + {as: AVPEXPANDB, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x62, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x62, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x62, + }}, + {as: AVPEXPANDD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x89, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x89, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x89, + }}, + {as: AVPEXPANDQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x89, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x89, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x89, + }}, + {as: AVPEXPANDW, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x62, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x62, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x62, + }}, + {as: AVPEXTRB, ytab: _yvextractps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x14, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN1, 0x14, + }}, + {as: AVPEXTRD, ytab: _yvextractps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x16, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x16, + }}, + {as: AVPEXTRQ, ytab: _yvextractps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x16, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8, 0x16, + }}, + {as: AVPEXTRW, ytab: _yvpextrw, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x15, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xC5, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN2, 0x15, + avxEscape | evex128 | evex66 | evex0F | evexW0, 0, 0xC5, + }}, + {as: AVPGATHERDD, ytab: _yvgatherdps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x90, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x90, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0x90, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0x90, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0x90, + }}, + {as: AVPGATHERDQ, ytab: _yvgatherdpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x90, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x90, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0x90, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0x90, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0x90, + }}, + {as: AVPGATHERQD, ytab: _yvgatherqps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x91, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x91, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0x91, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0x91, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0x91, + }}, + {as: AVPGATHERQQ, ytab: _yvgatherdps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x91, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x91, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0x91, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0x91, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0x91, + }}, + {as: AVPHADDD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x02, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x02, + }}, + {as: AVPHADDSW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x03, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x03, + }}, + {as: AVPHADDW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x01, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x01, + }}, + {as: AVPHMINPOSUW, ytab: _yvaesimc, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x41, + }}, + {as: AVPHSUBD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x06, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x06, + }}, + {as: AVPHSUBSW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x07, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x07, + }}, + {as: AVPHSUBW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x05, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x05, + }}, + {as: AVPINSRB, ytab: _yvpinsrb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x20, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN1, 0x20, + }}, + {as: AVPINSRD, ytab: _yvpinsrb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x22, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x22, + }}, + {as: AVPINSRQ, ytab: _yvpinsrb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x22, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8, 0x22, + }}, + {as: AVPINSRW, ytab: _yvpinsrb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xC4, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN2, 0xC4, + }}, + {as: AVPLZCNTD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x44, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x44, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x44, + }}, + {as: AVPLZCNTQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x44, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x44, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x44, + }}, + {as: AVPMADD52HUQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB5, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB5, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xB5, + }}, + {as: AVPMADD52LUQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB4, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB4, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xB4, + }}, + {as: AVPMADDUBSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x04, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x04, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x04, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x04, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x04, + }}, + {as: AVPMADDWD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF5, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF5, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF5, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xF5, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xF5, + }}, + {as: AVPMASKMOVD, ytab: _yvmaskmovpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x8E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x8E, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x8C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x8C, + }}, + {as: AVPMASKMOVQ, ytab: _yvmaskmovpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x8E, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x8E, + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x8C, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x8C, + }}, + {as: AVPMAXSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x3C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x3C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x3C, + }}, + {as: AVPMAXSD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3D, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3D, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x3D, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x3D, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x3D, + }}, + {as: AVPMAXSQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x3D, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x3D, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x3D, + }}, + {as: AVPMAXSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEE, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEE, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xEE, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xEE, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xEE, + }}, + {as: AVPMAXUB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDE, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDE, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xDE, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xDE, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xDE, + }}, + {as: AVPMAXUD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3F, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3F, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x3F, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x3F, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x3F, + }}, + {as: AVPMAXUQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x3F, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x3F, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x3F, + }}, + {as: AVPMAXUW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3E, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x3E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x3E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x3E, + }}, + {as: AVPMINSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x38, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x38, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x38, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x38, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x38, + }}, + {as: AVPMINSD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x39, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x39, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x39, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x39, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x39, + }}, + {as: AVPMINSQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x39, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x39, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x39, + }}, + {as: AVPMINSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEA, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEA, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xEA, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xEA, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xEA, + }}, + {as: AVPMINUB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDA, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDA, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xDA, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xDA, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xDA, + }}, + {as: AVPMINUD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3B, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x3B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x3B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x3B, + }}, + {as: AVPMINUQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x3B, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x3B, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x3B, + }}, + {as: AVPMINUW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3A, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3A, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x3A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x3A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x3A, + }}, + {as: AVPMOVB2M, ytab: _yvpmovb2m, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x29, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x29, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x29, + }}, + {as: AVPMOVD2M, ytab: _yvpmovb2m, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x39, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x39, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x39, + }}, + {as: AVPMOVDB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x31, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x31, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x31, + }}, + {as: AVPMOVDW, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x33, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x33, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x33, + }}, + {as: AVPMOVM2B, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x28, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x28, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x28, + }}, + {as: AVPMOVM2D, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x38, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x38, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x38, + }}, + {as: AVPMOVM2Q, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x38, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x38, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x38, + }}, + {as: AVPMOVM2W, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x28, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x28, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x28, + }}, + {as: AVPMOVMSKB, ytab: _yvmovmskpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD7, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD7, + }}, + {as: AVPMOVQ2M, ytab: _yvpmovb2m, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x39, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x39, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x39, + }}, + {as: AVPMOVQB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x32, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x32, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x32, + }}, + {as: AVPMOVQD, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x35, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x35, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x35, + }}, + {as: AVPMOVQW, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x34, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x34, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x34, + }}, + {as: AVPMOVSDB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x21, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x21, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x21, + }}, + {as: AVPMOVSDW, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x23, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x23, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x23, + }}, + {as: AVPMOVSQB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x22, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x22, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x22, + }}, + {as: AVPMOVSQD, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x25, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x25, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x25, + }}, + {as: AVPMOVSQW, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x24, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x24, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x24, + }}, + {as: AVPMOVSWB, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x20, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x20, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x20, + }}, + {as: AVPMOVSXBD, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x21, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x21, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x21, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x21, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x21, + }}, + {as: AVPMOVSXBQ, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x22, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x22, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x22, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x22, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x22, + }}, + {as: AVPMOVSXBW, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x20, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x20, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x20, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x20, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x20, + }}, + {as: AVPMOVSXDQ, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x25, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x25, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x25, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x25, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x25, + }}, + {as: AVPMOVSXWD, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x23, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x23, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x23, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x23, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x23, + }}, + {as: AVPMOVSXWQ, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x24, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x24, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x24, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x24, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x24, + }}, + {as: AVPMOVUSDB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x11, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x11, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x11, + }}, + {as: AVPMOVUSDW, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x13, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x13, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x13, + }}, + {as: AVPMOVUSQB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x12, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x12, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x12, + }}, + {as: AVPMOVUSQD, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x15, + }}, + {as: AVPMOVUSQW, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x14, + }}, + {as: AVPMOVUSWB, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x10, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x10, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x10, + }}, + {as: AVPMOVW2M, ytab: _yvpmovb2m, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x29, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x29, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x29, + }}, + {as: AVPMOVWB, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x30, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x30, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x30, + }}, + {as: AVPMOVZXBD, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x31, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x31, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x31, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x31, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x31, + }}, + {as: AVPMOVZXBQ, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x32, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x32, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x32, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x32, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x32, + }}, + {as: AVPMOVZXBW, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x30, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x30, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x30, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x30, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x30, + }}, + {as: AVPMOVZXDQ, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x35, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x35, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x35, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x35, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x35, + }}, + {as: AVPMOVZXWD, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x33, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x33, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x33, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x33, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x33, + }}, + {as: AVPMOVZXWQ, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x34, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x34, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x34, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x34, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x34, + }}, + {as: AVPMULDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x28, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x28, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x28, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x28, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x28, + }}, + {as: AVPMULHRSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0B, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x0B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x0B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x0B, + }}, + {as: AVPMULHUW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE4, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE4, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE4, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE4, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE4, + }}, + {as: AVPMULHW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE5, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE5, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE5, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE5, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE5, + }}, + {as: AVPMULLD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x40, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x40, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x40, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x40, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x40, + }}, + {as: AVPMULLQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x40, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x40, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x40, + }}, + {as: AVPMULLW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD5, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD5, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD5, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xD5, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xD5, + }}, + {as: AVPMULTISHIFTQB, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x83, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x83, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x83, + }}, + {as: AVPMULUDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF4, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF4, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xF4, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xF4, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xF4, + }}, + {as: AVPOPCNTB, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x54, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x54, + }}, + {as: AVPOPCNTD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x55, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x55, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x55, + }}, + {as: AVPOPCNTQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x55, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x55, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x55, + }}, + {as: AVPOPCNTW, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x54, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x54, + }}, + {as: AVPOR, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEB, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEB, + }}, + {as: AVPORD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xEB, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xEB, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xEB, + }}, + {as: AVPORQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xEB, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xEB, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xEB, + }}, + {as: AVPROLD, ytab: _yvprold, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 01, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 01, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 01, + }}, + {as: AVPROLQ, ytab: _yvprold, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x72, 01, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x72, 01, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x72, 01, + }}, + {as: AVPROLVD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x15, + }}, + {as: AVPROLVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x15, + }}, + {as: AVPRORD, ytab: _yvprold, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 00, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 00, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 00, + }}, + {as: AVPRORQ, ytab: _yvprold, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x72, 00, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x72, 00, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x72, 00, + }}, + {as: AVPRORVD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x14, + }}, + {as: AVPRORVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x14, + }}, + {as: AVPSADBW, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF6, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF6, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0xF6, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0xF6, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0xF6, + }}, + {as: AVPSCATTERDD, ytab: _yvpscatterdd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0xA0, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0xA0, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xA0, + }}, + {as: AVPSCATTERDQ, ytab: _yvpscatterdq, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0xA0, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0xA0, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xA0, + }}, + {as: AVPSCATTERQD, ytab: _yvpscatterqd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0xA1, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0xA1, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xA1, + }}, + {as: AVPSCATTERQQ, ytab: _yvpscatterdd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0xA1, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0xA1, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xA1, + }}, + {as: AVPSHLDD, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x71, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x71, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x71, + }}, + {as: AVPSHLDQ, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x71, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x71, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x71, + }}, + {as: AVPSHLDVD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x71, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x71, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x71, + }}, + {as: AVPSHLDVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x71, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x71, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x71, + }}, + {as: AVPSHLDVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSHLDW, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSHRDD, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x73, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x73, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x73, + }}, + {as: AVPSHRDQ, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x73, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x73, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x73, + }}, + {as: AVPSHRDVD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x73, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x73, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x73, + }}, + {as: AVPSHRDVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x73, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x73, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x73, + }}, + {as: AVPSHRDVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x72, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x72, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x72, + }}, + {as: AVPSHRDW, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x72, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x72, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexZeroingEnabled, 0x72, + }}, + {as: AVPSHUFB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x00, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x00, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x00, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x00, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x00, + }}, + {as: AVPSHUFBITQMB, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0x8F, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0x8F, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0x8F, + }}, + {as: AVPSHUFD, ytab: _yvpshufd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x70, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x70, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSHUFHW, ytab: _yvpshufd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x70, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x70, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSHUFLW, ytab: _yvpshufd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x70, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0x70, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evexF2 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evexF2 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSIGNB, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x08, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x08, + }}, + {as: AVPSIGND, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0A, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0A, + }}, + {as: AVPSIGNW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x09, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x09, + }}, + {as: AVPSLLD, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x72, 06, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x72, 06, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF2, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF2, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 06, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 06, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 06, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF2, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF2, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF2, + }}, + {as: AVPSLLDQ, ytab: _yvpslldq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x73, 07, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x73, 07, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x73, 07, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x73, 07, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x73, 07, + }}, + {as: AVPSLLQ, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x73, 06, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x73, 06, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF3, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF3, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x73, 06, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x73, 06, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x73, 06, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xF3, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xF3, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xF3, + }}, + {as: AVPSLLVD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x47, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x47, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x47, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x47, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x47, + }}, + {as: AVPSLLVQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x47, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x47, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x47, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x47, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x47, + }}, + {as: AVPSLLVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x12, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x12, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x12, + }}, + {as: AVPSLLW, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x71, 06, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x71, 06, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF1, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF1, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x71, 06, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x71, 06, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x71, 06, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF1, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF1, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF1, + }}, + {as: AVPSRAD, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x72, 04, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x72, 04, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE2, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE2, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE2, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE2, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE2, + }}, + {as: AVPSRAQ, ytab: _yvpsraq, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xE2, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xE2, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xE2, + }}, + {as: AVPSRAVD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x46, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x46, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x46, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x46, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x46, + }}, + {as: AVPSRAVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x46, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x46, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x46, + }}, + {as: AVPSRAVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x11, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x11, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x11, + }}, + {as: AVPSRAW, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x71, 04, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x71, 04, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE1, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE1, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x71, 04, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x71, 04, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x71, 04, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE1, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE1, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE1, + }}, + {as: AVPSRLD, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x72, 02, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x72, 02, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD2, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD2, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 02, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 02, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 02, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD2, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD2, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD2, + }}, + {as: AVPSRLDQ, ytab: _yvpslldq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x73, 03, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x73, 03, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x73, 03, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x73, 03, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x73, 03, + }}, + {as: AVPSRLQ, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x73, 02, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x73, 02, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD3, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD3, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x73, 02, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x73, 02, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x73, 02, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xD3, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xD3, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xD3, + }}, + {as: AVPSRLVD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x45, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x45, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x45, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x45, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x45, + }}, + {as: AVPSRLVQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x45, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x45, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x45, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x45, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x45, + }}, + {as: AVPSRLVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x10, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x10, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x10, + }}, + {as: AVPSRLW, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x71, 02, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x71, 02, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD1, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD1, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x71, 02, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x71, 02, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x71, 02, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD1, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD1, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD1, + }}, + {as: AVPSUBB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF8, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF8, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF8, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xF8, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xF8, + }}, + {as: AVPSUBD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFA, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFA, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xFA, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xFA, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xFA, + }}, + {as: AVPSUBQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFB, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFB, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xFB, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xFB, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xFB, + }}, + {as: AVPSUBSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE8, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE8, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE8, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE8, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE8, + }}, + {as: AVPSUBSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE9, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE9, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE9, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE9, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE9, + }}, + {as: AVPSUBUSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD8, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD8, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD8, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xD8, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xD8, + }}, + {as: AVPSUBUSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD9, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD9, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD9, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xD9, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xD9, + }}, + {as: AVPSUBW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF9, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF9, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF9, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xF9, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xF9, + }}, + {as: AVPTERNLOGD, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x25, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x25, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x25, + }}, + {as: AVPTERNLOGQ, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x25, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x25, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x25, + }}, + {as: AVPTEST, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x17, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x17, + }}, + {as: AVPTESTMB, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0x26, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0x26, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0x26, + }}, + {as: AVPTESTMD, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4, 0x27, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4, 0x27, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4, 0x27, + }}, + {as: AVPTESTMQ, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8, 0x27, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8, 0x27, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8, 0x27, + }}, + {as: AVPTESTMW, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16, 0x26, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32, 0x26, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64, 0x26, + }}, + {as: AVPTESTNMB, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN16, 0x26, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN32, 0x26, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN64, 0x26, + }}, + {as: AVPTESTNMD, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN16 | evexBcstN4, 0x27, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN32 | evexBcstN4, 0x27, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN64 | evexBcstN4, 0x27, + }}, + {as: AVPTESTNMQ, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, evexN16 | evexBcstN8, 0x27, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, evexN32 | evexBcstN8, 0x27, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, evexN64 | evexBcstN8, 0x27, + }}, + {as: AVPTESTNMW, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, evexN16, 0x26, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, evexN32, 0x26, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, evexN64, 0x26, + }}, + {as: AVPUNPCKHBW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x68, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x68, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x68, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x68, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x68, + }}, + {as: AVPUNPCKHDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6A, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6A, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x6A, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x6A, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x6A, + }}, + {as: AVPUNPCKHQDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6D, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6D, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x6D, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x6D, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x6D, + }}, + {as: AVPUNPCKHWD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x69, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x69, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x69, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x69, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x69, + }}, + {as: AVPUNPCKLBW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x60, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x60, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x60, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x60, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x60, + }}, + {as: AVPUNPCKLDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x62, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x62, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x62, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x62, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x62, + }}, + {as: AVPUNPCKLQDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6C, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6C, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x6C, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x6C, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x6C, + }}, + {as: AVPUNPCKLWD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x61, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x61, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x61, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x61, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x61, + }}, + {as: AVPXOR, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEF, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEF, + }}, + {as: AVPXORD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xEF, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xEF, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xEF, + }}, + {as: AVPXORQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xEF, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xEF, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xEF, + }}, + {as: AVRANGEPD, ytab: _yvfixupimmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x50, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x50, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x50, + }}, + {as: AVRANGEPS, ytab: _yvfixupimmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x50, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x50, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x50, + }}, + {as: AVRANGESD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x51, + }}, + {as: AVRANGESS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x51, + }}, + {as: AVRCP14PD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x4C, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x4C, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x4C, + }}, + {as: AVRCP14PS, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x4C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x4C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x4C, + }}, + {as: AVRCP14SD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x4D, + }}, + {as: AVRCP14SS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x4D, + }}, + {as: AVRCP28PD, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0xCA, + }}, + {as: AVRCP28PS, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0xCA, + }}, + {as: AVRCP28SD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0xCB, + }}, + {as: AVRCP28SS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0xCB, + }}, + {as: AVRCPPS, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x53, + avxEscape | vex256 | vex0F | vexW0, 0x53, + }}, + {as: AVRCPSS, ytab: _yvrcpss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x53, + }}, + {as: AVREDUCEPD, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x56, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x56, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x56, + }}, + {as: AVREDUCEPS, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x56, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x56, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x56, + }}, + {as: AVREDUCESD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x57, + }}, + {as: AVREDUCESS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x57, + }}, + {as: AVRNDSCALEPD, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x09, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x09, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x09, + }}, + {as: AVRNDSCALEPS, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x08, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x08, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x08, + }}, + {as: AVRNDSCALESD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x0B, + }}, + {as: AVRNDSCALESS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x0A, + }}, + {as: AVROUNDPD, ytab: _yvroundpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x09, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x09, + }}, + {as: AVROUNDPS, ytab: _yvroundpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x08, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x08, + }}, + {as: AVROUNDSD, ytab: _yvdppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0B, + }}, + {as: AVROUNDSS, ytab: _yvdppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0A, + }}, + {as: AVRSQRT14PD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x4E, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x4E, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x4E, + }}, + {as: AVRSQRT14PS, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x4E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x4E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x4E, + }}, + {as: AVRSQRT14SD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x4F, + }}, + {as: AVRSQRT14SS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x4F, + }}, + {as: AVRSQRT28PD, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0xCC, + }}, + {as: AVRSQRT28PS, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0xCC, + }}, + {as: AVRSQRT28SD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0xCD, + }}, + {as: AVRSQRT28SS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0xCD, + }}, + {as: AVRSQRTPS, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x52, + avxEscape | vex256 | vex0F | vexW0, 0x52, + }}, + {as: AVRSQRTSS, ytab: _yvrcpss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x52, + }}, + {as: AVSCALEFPD, ytab: _yvscalefpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x2C, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x2C, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x2C, + }}, + {as: AVSCALEFPS, ytab: _yvscalefpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x2C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x2C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x2C, + }}, + {as: AVSCALEFSD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x2D, + }}, + {as: AVSCALEFSS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x2D, + }}, + {as: AVSCATTERDPD, ytab: _yvpscatterdq, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0xA2, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0xA2, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xA2, + }}, + {as: AVSCATTERDPS, ytab: _yvpscatterdd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0xA2, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0xA2, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xA2, + }}, + {as: AVSCATTERPF0DPD, ytab: _yvgatherpf0dpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC6, 05, + }}, + {as: AVSCATTERPF0DPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC6, 05, + }}, + {as: AVSCATTERPF0QPD, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC7, 05, + }}, + {as: AVSCATTERPF0QPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC7, 05, + }}, + {as: AVSCATTERPF1DPD, ytab: _yvgatherpf0dpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC6, 06, + }}, + {as: AVSCATTERPF1DPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC6, 06, + }}, + {as: AVSCATTERPF1QPD, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC7, 06, + }}, + {as: AVSCATTERPF1QPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC7, 06, + }}, + {as: AVSCATTERQPD, ytab: _yvpscatterdd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0xA3, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0xA3, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xA3, + }}, + {as: AVSCATTERQPS, ytab: _yvpscatterqd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0xA3, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0xA3, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xA3, + }}, + {as: AVSHUFF32X4, ytab: _yvshuff32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x23, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x23, + }}, + {as: AVSHUFF64X2, ytab: _yvshuff32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x23, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x23, + }}, + {as: AVSHUFI32X4, ytab: _yvshuff32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x43, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x43, + }}, + {as: AVSHUFI64X2, ytab: _yvshuff32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x43, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x43, + }}, + {as: AVSHUFPD, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xC6, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xC6, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xC6, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xC6, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xC6, + }}, + {as: AVSHUFPS, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0xC6, + avxEscape | vex256 | vex0F | vexW0, 0xC6, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xC6, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xC6, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xC6, + }}, + {as: AVSQRTPD, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x51, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x51, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x51, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x51, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x51, + }}, + {as: AVSQRTPS, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x51, + avxEscape | vex256 | vex0F | vexW0, 0x51, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x51, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x51, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x51, + }}, + {as: AVSQRTSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x51, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x51, + }}, + {as: AVSQRTSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x51, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x51, + }}, + {as: AVSTMXCSR, ytab: _yvldmxcsr, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0xAE, 03, + }}, + {as: AVSUBPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5C, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5C, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5C, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5C, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5C, + }}, + {as: AVSUBPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5C, + avxEscape | vex256 | vex0F | vexW0, 0x5C, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5C, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5C, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5C, + }}, + {as: AVSUBSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5C, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5C, + }}, + {as: AVSUBSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5C, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5C, + }}, + {as: AVTESTPD, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0F, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0F, + }}, + {as: AVTESTPS, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0E, + }}, + {as: AVUCOMISD, ytab: _yvcomisd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x2E, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0x2E, + }}, + {as: AVUCOMISS, ytab: _yvcomisd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x2E, + avxEscape | evex128 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0x2E, + }}, + {as: AVUNPCKHPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x15, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x15, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x15, + }}, + {as: AVUNPCKHPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x15, + avxEscape | vex256 | vex0F | vexW0, 0x15, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x15, + }}, + {as: AVUNPCKLPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x14, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x14, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x14, + }}, + {as: AVUNPCKLPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x14, + avxEscape | vex256 | vex0F | vexW0, 0x14, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x14, + }}, + {as: AVXORPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x57, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x57, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x57, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x57, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x57, + }}, + {as: AVXORPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x57, + avxEscape | vex256 | vex0F | vexW0, 0x57, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x57, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x57, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x57, + }}, + {as: AVZEROALL, ytab: _yvzeroall, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x77, + }}, + {as: AVZEROUPPER, ytab: _yvzeroall, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x77, + }}, +} diff --git a/src/cmd/internal/obj/x86/evex.go b/src/cmd/internal/obj/x86/evex.go new file mode 100644 index 0000000..aa93cd8 --- /dev/null +++ b/src/cmd/internal/obj/x86/evex.go @@ -0,0 +1,383 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86 + +import ( + "cmd/internal/obj" + "errors" + "fmt" + "strings" +) + +// evexBits stores EVEX prefix info that is used during instruction encoding. +type evexBits struct { + b1 byte // [W1mmLLpp] + b2 byte // [NNNbbZRS] + + // Associated instruction opcode. + opcode byte +} + +// newEVEXBits creates evexBits object from enc bytes at z position. +func newEVEXBits(z int, enc *opBytes) evexBits { + return evexBits{ + b1: enc[z+0], + b2: enc[z+1], + opcode: enc[z+2], + } +} + +// P returns EVEX.pp value. +func (evex evexBits) P() byte { return (evex.b1 & evexP) >> 0 } + +// L returns EVEX.L'L value. +func (evex evexBits) L() byte { return (evex.b1 & evexL) >> 2 } + +// M returns EVEX.mm value. +func (evex evexBits) M() byte { return (evex.b1 & evexM) >> 4 } + +// W returns EVEX.W value. +func (evex evexBits) W() byte { return (evex.b1 & evexW) >> 7 } + +// BroadcastEnabled reports whether BCST suffix is permitted. +func (evex evexBits) BroadcastEnabled() bool { + return evex.b2&evexBcst != 0 +} + +// ZeroingEnabled reports whether Z suffix is permitted. +func (evex evexBits) ZeroingEnabled() bool { + return (evex.b2&evexZeroing)>>2 != 0 +} + +// RoundingEnabled reports whether RN_SAE, RZ_SAE, RD_SAE and RU_SAE suffixes +// are permitted. +func (evex evexBits) RoundingEnabled() bool { + return (evex.b2&evexRounding)>>1 != 0 +} + +// SaeEnabled reports whether SAE suffix is permitted. +func (evex evexBits) SaeEnabled() bool { + return (evex.b2&evexSae)>>0 != 0 +} + +// DispMultiplier returns displacement multiplier that is calculated +// based on tuple type, EVEX.W and input size. +// If embedded broadcast is used, bcst should be true. +func (evex evexBits) DispMultiplier(bcst bool) int32 { + if bcst { + switch evex.b2 & evexBcst { + case evexBcstN4: + return 4 + case evexBcstN8: + return 8 + } + return 1 + } + + switch evex.b2 & evexN { + case evexN1: + return 1 + case evexN2: + return 2 + case evexN4: + return 4 + case evexN8: + return 8 + case evexN16: + return 16 + case evexN32: + return 32 + case evexN64: + return 64 + case evexN128: + return 128 + } + return 1 +} + +// EVEX is described by using 2-byte sequence. +// See evexBits for more details. +const ( + evexW = 0x80 // b1[W... ....] + evexWIG = 0 << 7 + evexW0 = 0 << 7 + evexW1 = 1 << 7 + + evexM = 0x30 // b2[..mm ...] + evex0F = 1 << 4 + evex0F38 = 2 << 4 + evex0F3A = 3 << 4 + + evexL = 0x0C // b1[.... LL..] + evexLIG = 0 << 2 + evex128 = 0 << 2 + evex256 = 1 << 2 + evex512 = 2 << 2 + + evexP = 0x03 // b1[.... ..pp] + evex66 = 1 << 0 + evexF3 = 2 << 0 + evexF2 = 3 << 0 + + // Precalculated Disp8 N value. + // N acts like a multiplier for 8bit displacement. + // Note that some N are not used, but their bits are reserved. + evexN = 0xE0 // b2[NNN. ....] + evexN1 = 0 << 5 + evexN2 = 1 << 5 + evexN4 = 2 << 5 + evexN8 = 3 << 5 + evexN16 = 4 << 5 + evexN32 = 5 << 5 + evexN64 = 6 << 5 + evexN128 = 7 << 5 + + // Disp8 for broadcasts. + evexBcst = 0x18 // b2[...b b...] + evexBcstN4 = 1 << 3 + evexBcstN8 = 2 << 3 + + // Flags that permit certain AVX512 features. + // It's semantically illegal to combine evexZeroing and evexSae. + evexZeroing = 0x4 // b2[.... .Z..] + evexZeroingEnabled = 1 << 2 + evexRounding = 0x2 // b2[.... ..R.] + evexRoundingEnabled = 1 << 1 + evexSae = 0x1 // b2[.... ...S] + evexSaeEnabled = 1 << 0 +) + +// compressedDisp8 calculates EVEX compressed displacement, if applicable. +func compressedDisp8(disp, elemSize int32) (disp8 byte, ok bool) { + if disp%elemSize == 0 { + v := disp / elemSize + if v >= -128 && v <= 127 { + return byte(v), true + } + } + return 0, false +} + +// evexZcase reports whether given Z-case belongs to EVEX group. +func evexZcase(zcase uint8) bool { + return zcase > Zevex_first && zcase < Zevex_last +} + +// evexSuffixBits carries instruction EVEX suffix set flags. +// +// Examples: +// +// "RU_SAE.Z" => {rounding: 3, zeroing: true} +// "Z" => {zeroing: true} +// "BCST" => {broadcast: true} +// "SAE.Z" => {sae: true, zeroing: true} +type evexSuffix struct { + rounding byte + sae bool + zeroing bool + broadcast bool +} + +// Rounding control values. +// Match exact value for EVEX.L'L field (with exception of rcUnset). +const ( + rcRNSAE = 0 // Round towards nearest + rcRDSAE = 1 // Round towards -Inf + rcRUSAE = 2 // Round towards +Inf + rcRZSAE = 3 // Round towards zero + rcUnset = 4 +) + +// newEVEXSuffix returns proper zero value for evexSuffix. +func newEVEXSuffix() evexSuffix { + return evexSuffix{rounding: rcUnset} +} + +// evexSuffixMap maps obj.X86suffix to its decoded version. +// Filled during init(). +var evexSuffixMap [255]evexSuffix + +func init() { + // Decode all valid suffixes for later use. + for i := range opSuffixTable { + suffix := newEVEXSuffix() + parts := strings.Split(opSuffixTable[i], ".") + for j := range parts { + switch parts[j] { + case "Z": + suffix.zeroing = true + case "BCST": + suffix.broadcast = true + case "SAE": + suffix.sae = true + + case "RN_SAE": + suffix.rounding = rcRNSAE + case "RD_SAE": + suffix.rounding = rcRDSAE + case "RU_SAE": + suffix.rounding = rcRUSAE + case "RZ_SAE": + suffix.rounding = rcRZSAE + } + } + evexSuffixMap[i] = suffix + } +} + +// toDisp8 tries to convert disp to proper 8-bit displacement value. +func toDisp8(disp int32, p *obj.Prog, asmbuf *AsmBuf) (disp8 byte, ok bool) { + if asmbuf.evexflag { + bcst := evexSuffixMap[p.Scond].broadcast + elemSize := asmbuf.evex.DispMultiplier(bcst) + return compressedDisp8(disp, elemSize) + } + return byte(disp), disp >= -128 && disp < 128 +} + +// EncodeRegisterRange packs [reg0-reg1] list into 64-bit value that +// is intended to be stored inside obj.Addr.Offset with TYPE_REGLIST. +func EncodeRegisterRange(reg0, reg1 int16) int64 { + return (int64(reg0) << 0) | + (int64(reg1) << 16) | + obj.RegListX86Lo +} + +// decodeRegisterRange unpacks [reg0-reg1] list from 64-bit value created by EncodeRegisterRange. +func decodeRegisterRange(list int64) (reg0, reg1 int) { + return int((list >> 0) & 0xFFFF), + int((list >> 16) & 0xFFFF) +} + +// ParseSuffix handles the special suffix for the 386/AMD64. +// Suffix bits are stored into p.Scond. +// +// Leading "." in cond is ignored. +func ParseSuffix(p *obj.Prog, cond string) error { + cond = strings.TrimPrefix(cond, ".") + + suffix := newOpSuffix(cond) + if !suffix.IsValid() { + return inferSuffixError(cond) + } + + p.Scond = uint8(suffix) + return nil +} + +// inferSuffixError returns non-nil error that describes what could be +// the cause of suffix parse failure. +// +// At the point this function is executed there is already assembly error, +// so we can burn some clocks to construct good error message. +// +// Reported issues: +// - duplicated suffixes +// - illegal rounding/SAE+broadcast combinations +// - unknown suffixes +// - misplaced suffix (e.g. wrong Z suffix position) +func inferSuffixError(cond string) error { + suffixSet := make(map[string]bool) // Set for duplicates detection. + unknownSet := make(map[string]bool) // Set of unknown suffixes. + hasBcst := false + hasRoundSae := false + var msg []string // Error message parts + + suffixes := strings.Split(cond, ".") + for i, suffix := range suffixes { + switch suffix { + case "Z": + if i != len(suffixes)-1 { + msg = append(msg, "Z suffix should be the last") + } + case "BCST": + hasBcst = true + case "SAE", "RN_SAE", "RZ_SAE", "RD_SAE", "RU_SAE": + hasRoundSae = true + default: + if !unknownSet[suffix] { + msg = append(msg, fmt.Sprintf("unknown suffix %q", suffix)) + } + unknownSet[suffix] = true + } + + if suffixSet[suffix] { + msg = append(msg, fmt.Sprintf("duplicate suffix %q", suffix)) + } + suffixSet[suffix] = true + } + + if hasBcst && hasRoundSae { + msg = append(msg, "can't combine rounding/SAE and broadcast") + } + + if len(msg) == 0 { + return errors.New("bad suffix combination") + } + return errors.New(strings.Join(msg, "; ")) +} + +// opSuffixTable is a complete list of possible opcode suffix combinations. +// It "maps" uint8 suffix bits to their string representation. +// With the exception of first and last elements, order is not important. +var opSuffixTable = [...]string{ + "", // Map empty suffix to empty string. + + "Z", + + "SAE", + "SAE.Z", + + "RN_SAE", + "RZ_SAE", + "RD_SAE", + "RU_SAE", + "RN_SAE.Z", + "RZ_SAE.Z", + "RD_SAE.Z", + "RU_SAE.Z", + + "BCST", + "BCST.Z", + + "<bad suffix>", +} + +// opSuffix represents instruction opcode suffix. +// Compound (multi-part) suffixes expressed with single opSuffix value. +// +// uint8 type is used to fit obj.Prog.Scond. +type opSuffix uint8 + +// badOpSuffix is used to represent all invalid suffix combinations. +const badOpSuffix = opSuffix(len(opSuffixTable) - 1) + +// newOpSuffix returns opSuffix object that matches suffixes string. +// +// If no matching suffix is found, special "invalid" suffix is returned. +// Use IsValid method to check against this case. +func newOpSuffix(suffixes string) opSuffix { + for i := range opSuffixTable { + if opSuffixTable[i] == suffixes { + return opSuffix(i) + } + } + return badOpSuffix +} + +// IsValid reports whether suffix is valid. +// Empty suffixes are valid. +func (suffix opSuffix) IsValid() bool { + return suffix != badOpSuffix +} + +// String returns suffix printed representation. +// +// It matches the string that was used to create suffix with NewX86Suffix() +// for valid suffixes. +// For all invalid suffixes, special marker is returned. +func (suffix opSuffix) String() string { + return opSuffixTable[suffix] +} diff --git a/src/cmd/internal/obj/x86/list6.go b/src/cmd/internal/obj/x86/list6.go new file mode 100644 index 0000000..6028031 --- /dev/null +++ b/src/cmd/internal/obj/x86/list6.go @@ -0,0 +1,264 @@ +// Inferno utils/6c/list.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6c/list.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x86 + +import ( + "cmd/internal/obj" + "fmt" +) + +var Register = []string{ + "AL", // [D_AL] + "CL", + "DL", + "BL", + "SPB", + "BPB", + "SIB", + "DIB", + "R8B", + "R9B", + "R10B", + "R11B", + "R12B", + "R13B", + "R14B", + "R15B", + "AX", // [D_AX] + "CX", + "DX", + "BX", + "SP", + "BP", + "SI", + "DI", + "R8", + "R9", + "R10", + "R11", + "R12", + "R13", + "R14", + "R15", + "AH", + "CH", + "DH", + "BH", + "F0", // [D_F0] + "F1", + "F2", + "F3", + "F4", + "F5", + "F6", + "F7", + "M0", + "M1", + "M2", + "M3", + "M4", + "M5", + "M6", + "M7", + "K0", + "K1", + "K2", + "K3", + "K4", + "K5", + "K6", + "K7", + "X0", + "X1", + "X2", + "X3", + "X4", + "X5", + "X6", + "X7", + "X8", + "X9", + "X10", + "X11", + "X12", + "X13", + "X14", + "X15", + "X16", + "X17", + "X18", + "X19", + "X20", + "X21", + "X22", + "X23", + "X24", + "X25", + "X26", + "X27", + "X28", + "X29", + "X30", + "X31", + "Y0", + "Y1", + "Y2", + "Y3", + "Y4", + "Y5", + "Y6", + "Y7", + "Y8", + "Y9", + "Y10", + "Y11", + "Y12", + "Y13", + "Y14", + "Y15", + "Y16", + "Y17", + "Y18", + "Y19", + "Y20", + "Y21", + "Y22", + "Y23", + "Y24", + "Y25", + "Y26", + "Y27", + "Y28", + "Y29", + "Y30", + "Y31", + "Z0", + "Z1", + "Z2", + "Z3", + "Z4", + "Z5", + "Z6", + "Z7", + "Z8", + "Z9", + "Z10", + "Z11", + "Z12", + "Z13", + "Z14", + "Z15", + "Z16", + "Z17", + "Z18", + "Z19", + "Z20", + "Z21", + "Z22", + "Z23", + "Z24", + "Z25", + "Z26", + "Z27", + "Z28", + "Z29", + "Z30", + "Z31", + "CS", // [D_CS] + "SS", + "DS", + "ES", + "FS", + "GS", + "GDTR", // [D_GDTR] + "IDTR", // [D_IDTR] + "LDTR", // [D_LDTR] + "MSW", // [D_MSW] + "TASK", // [D_TASK] + "CR0", // [D_CR] + "CR1", + "CR2", + "CR3", + "CR4", + "CR5", + "CR6", + "CR7", + "CR8", + "CR9", + "CR10", + "CR11", + "CR12", + "CR13", + "CR14", + "CR15", + "DR0", // [D_DR] + "DR1", + "DR2", + "DR3", + "DR4", + "DR5", + "DR6", + "DR7", + "TR0", // [D_TR] + "TR1", + "TR2", + "TR3", + "TR4", + "TR5", + "TR6", + "TR7", + "TLS", // [D_TLS] + "MAXREG", // [MAXREG] +} + +func init() { + obj.RegisterRegister(REG_AL, REG_AL+len(Register), rconv) + obj.RegisterOpcode(obj.ABaseAMD64, Anames) + obj.RegisterRegisterList(obj.RegListX86Lo, obj.RegListX86Hi, rlconv) + obj.RegisterOpSuffix("386", opSuffixString) + obj.RegisterOpSuffix("amd64", opSuffixString) +} + +func rconv(r int) string { + if REG_AL <= r && r-REG_AL < len(Register) { + return Register[r-REG_AL] + } + return fmt.Sprintf("Rgok(%d)", r-obj.RBaseAMD64) +} + +func rlconv(bits int64) string { + reg0, reg1 := decodeRegisterRange(bits) + return fmt.Sprintf("[%s-%s]", rconv(reg0), rconv(reg1)) +} + +func opSuffixString(s uint8) string { + return "." + opSuffix(s).String() +} diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go new file mode 100644 index 0000000..85a4260 --- /dev/null +++ b/src/cmd/internal/obj/x86/obj6.go @@ -0,0 +1,1457 @@ +// Inferno utils/6l/pass.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x86 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/src" + "cmd/internal/sys" + "log" + "math" + "path" + "strings" +) + +func CanUse1InsnTLS(ctxt *obj.Link) bool { + if isAndroid { + // Android uses a global variable for the tls offset. + return false + } + + if ctxt.Arch.Family == sys.I386 { + switch ctxt.Headtype { + case objabi.Hlinux, + objabi.Hplan9, + objabi.Hwindows: + return false + } + + return true + } + + switch ctxt.Headtype { + case objabi.Hplan9, objabi.Hwindows: + return false + case objabi.Hlinux, objabi.Hfreebsd: + return !ctxt.Flag_shared + } + + return true +} + +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + // Thread-local storage references use the TLS pseudo-register. + // As a register, TLS refers to the thread-local storage base, and it + // can only be loaded into another register: + // + // MOVQ TLS, AX + // + // An offset from the thread-local storage base is written off(reg)(TLS*1). + // Semantically it is off(reg), but the (TLS*1) annotation marks this as + // indexing from the loaded TLS base. This emits a relocation so that + // if the linker needs to adjust the offset, it can. For example: + // + // MOVQ TLS, AX + // MOVQ 0(AX)(TLS*1), CX // load g into CX + // + // On systems that support direct access to the TLS memory, this + // pair of instructions can be reduced to a direct TLS memory reference: + // + // MOVQ 0(TLS), CX // load g into CX + // + // The 2-instruction and 1-instruction forms correspond to the two code + // sequences for loading a TLS variable in the local exec model given in "ELF + // Handling For Thread-Local Storage". + // + // We apply this rewrite on systems that support the 1-instruction form. + // The decision is made using only the operating system and the -shared flag, + // not the link mode. If some link modes on a particular operating system + // require the 2-instruction form, then all builds for that operating system + // will use the 2-instruction form, so that the link mode decision can be + // delayed to link time. + // + // In this way, all supported systems use identical instructions to + // access TLS, and they are rewritten appropriately first here in + // liblink and then finally using relocations in the linker. + // + // When -shared is passed, we leave the code in the 2-instruction form but + // assemble (and relocate) them in different ways to generate the initial + // exec code sequence. It's a bit of a fluke that this is possible without + // rewriting the instructions more comprehensively, and it only does because + // we only support a single TLS variable (g). + + if CanUse1InsnTLS(ctxt) { + // Reduce 2-instruction sequence to 1-instruction sequence. + // Sequences like + // MOVQ TLS, BX + // ... off(BX)(TLS*1) ... + // become + // NOP + // ... off(TLS) ... + // + // TODO(rsc): Remove the Hsolaris special case. It exists only to + // guarantee we are producing byte-identical binaries as before this code. + // But it should be unnecessary. + if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris { + obj.Nopout(p) + } + if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { + p.From.Reg = REG_TLS + p.From.Scale = 0 + p.From.Index = REG_NONE + } + + if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { + p.To.Reg = REG_TLS + p.To.Scale = 0 + p.To.Index = REG_NONE + } + } else { + // load_g, below, always inserts the 1-instruction sequence. Rewrite it + // as the 2-instruction sequence if necessary. + // MOVQ 0(TLS), BX + // becomes + // MOVQ TLS, BX + // MOVQ 0(BX)(TLS*1), BX + if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { + q := obj.Appendp(p, newprog) + q.As = p.As + q.From = p.From + q.From.Type = obj.TYPE_MEM + q.From.Reg = p.To.Reg + q.From.Index = REG_TLS + q.From.Scale = 2 // TODO: use 1 + q.To = p.To + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_TLS + p.From.Index = REG_NONE + p.From.Offset = 0 + } + } + + // Android and Win64 use a tls offset determined at runtime. Rewrite + // MOVQ TLS, BX + // to + // MOVQ runtime.tls_g(SB), BX + if (isAndroid || (ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64)) && + (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_EXTERN + p.From.Reg = REG_NONE + p.From.Sym = ctxt.Lookup("runtime.tls_g") + p.From.Index = REG_NONE + if ctxt.Headtype == objabi.Hwindows { + // Win64 requires an additional indirection + // to retrieve the TLS pointer, + // as runtime.tls_g contains the TLS offset from GS. + // add + // MOVQ 0(BX)(GS*1), BX + q := obj.Appendp(p, newprog) + q.As = p.As + q.From = obj.Addr{} + q.From.Type = obj.TYPE_MEM + q.From.Reg = p.To.Reg + q.From.Index = REG_GS + q.From.Scale = 1 + q.From.Offset = 0 + q.To = p.To + } + } + + // TODO: Remove. + if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 { + if p.From.Scale == 1 && p.From.Index == REG_TLS { + p.From.Scale = 2 + } + if p.To.Scale == 1 && p.To.Index == REG_TLS { + p.To.Scale = 2 + } + } + + // Rewrite 0 to $0 in 3rd argument to CMPPS etc. + // That's what the tables expect. + switch p.As { + case ACMPPD, ACMPPS, ACMPSD, ACMPSS: + if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { + p.To.Type = obj.TYPE_CONST + } + } + + // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. + switch p.As { + case obj.ACALL, obj.AJMP, obj.ARET: + if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { + p.To.Type = obj.TYPE_BRANCH + } + } + + // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. + if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { + switch p.As { + case AMOVL: + p.As = ALEAL + p.From.Type = obj.TYPE_MEM + case AMOVQ: + p.As = ALEAQ + p.From.Type = obj.TYPE_MEM + } + } + + // Rewrite float constants to values stored in memory. + switch p.As { + // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx + case AMOVSS: + if p.From.Type == obj.TYPE_FCONST { + // f == 0 can't be used here due to -0, so use Float64bits + if f := p.From.Val.(float64); math.Float64bits(f) == 0 { + if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { + p.As = AXORPS + p.From = p.To + break + } + } + } + fallthrough + + case AFMOVF, + AFADDF, + AFSUBF, + AFSUBRF, + AFMULF, + AFDIVF, + AFDIVRF, + AFCOMF, + AFCOMFP, + AADDSS, + ASUBSS, + AMULSS, + ADIVSS, + ACOMISS, + AUCOMISS: + if p.From.Type == obj.TYPE_FCONST { + f32 := float32(p.From.Val.(float64)) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_EXTERN + p.From.Sym = ctxt.Float32Sym(f32) + p.From.Offset = 0 + } + + case AMOVSD: + // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx + if p.From.Type == obj.TYPE_FCONST { + // f == 0 can't be used here due to -0, so use Float64bits + if f := p.From.Val.(float64); math.Float64bits(f) == 0 { + if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { + p.As = AXORPS + p.From = p.To + break + } + } + } + fallthrough + + case AFMOVD, + AFADDD, + AFSUBD, + AFSUBRD, + AFMULD, + AFDIVD, + AFDIVRD, + AFCOMD, + AFCOMDP, + AADDSD, + ASUBSD, + AMULSD, + ADIVSD, + ACOMISD, + AUCOMISD: + if p.From.Type == obj.TYPE_FCONST { + f64 := p.From.Val.(float64) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_EXTERN + p.From.Sym = ctxt.Float64Sym(f64) + p.From.Offset = 0 + } + } + + if ctxt.Flag_dynlink { + rewriteToUseGot(ctxt, p, newprog) + } + + if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 { + rewriteToPcrel(ctxt, p, newprog) + } +} + +// Rewrite p, if necessary, to access global data via the global offset table. +func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + var lea, mov obj.As + var reg int16 + if ctxt.Arch.Family == sys.AMD64 { + lea = ALEAQ + mov = AMOVQ + reg = REG_R15 + } else { + lea = ALEAL + mov = AMOVL + reg = REG_CX + if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { + // Special case: clobber the destination register with + // the PC so we don't have to clobber CX. + // The SSA backend depends on CX not being clobbered across LEAL. + // See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared). + reg = p.To.Reg + } + } + + if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { + // ADUFFxxx $offset + // becomes + // $MOV runtime.duffxxx@GOT, $reg + // $LEA $offset($reg), $reg + // CALL $reg + // (we use LEAx rather than ADDx because ADDx clobbers + // flags and duffzero on 386 does not otherwise do so). + var sym *obj.LSym + if p.As == obj.ADUFFZERO { + sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal) + } else { + sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal) + } + offset := p.To.Offset + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + p.From.Sym = sym + p.To.Type = obj.TYPE_REG + p.To.Reg = reg + p.To.Offset = 0 + p.To.Sym = nil + p1 := obj.Appendp(p, newprog) + p1.As = lea + p1.From.Type = obj.TYPE_MEM + p1.From.Offset = offset + p1.From.Reg = reg + p1.To.Type = obj.TYPE_REG + p1.To.Reg = reg + p2 := obj.Appendp(p1, newprog) + p2.As = obj.ACALL + p2.To.Type = obj.TYPE_REG + p2.To.Reg = reg + } + + // We only care about global data: NAME_EXTERN means a global + // symbol in the Go sense, and p.Sym.Local is true for a few + // internally defined symbols. + if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below + p.As = mov + p.From.Type = obj.TYPE_ADDR + } + if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // $MOV $sym, Rx becomes $MOV sym@GOT, Rx + // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx + // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX + cmplxdest := false + pAs := p.As + var dest obj.Addr + if p.To.Type != obj.TYPE_REG || pAs != mov { + if ctxt.Arch.Family == sys.AMD64 { + ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p) + } + cmplxdest = true + dest = p.To + p.As = mov + p.To.Type = obj.TYPE_REG + p.To.Reg = reg + p.To.Sym = nil + p.To.Name = obj.NAME_NONE + } + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + q := p + if p.From.Offset != 0 { + q = obj.Appendp(p, newprog) + q.As = lea + q.From.Type = obj.TYPE_MEM + q.From.Reg = p.To.Reg + q.From.Offset = p.From.Offset + q.To = p.To + p.From.Offset = 0 + } + if cmplxdest { + q = obj.Appendp(q, newprog) + q.As = pAs + q.To = dest + q.From.Type = obj.TYPE_REG + q.From.Reg = reg + } + } + if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + var source *obj.Addr + // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry + // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15) + // An addition may be inserted between the two MOVs if there is an offset. + if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) + } + source = &p.From + } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + source = &p.To + } else { + return + } + if p.As == obj.ACALL { + // When dynlinking on 386, almost any call might end up being a call + // to a PLT, so make sure the GOT pointer is loaded into BX. + // RegTo2 is set on the replacement call insn to stop it being + // processed when it is in turn passed to progedit. + // + // We disable open-coded defers in buildssa() on 386 ONLY with shared + // libraries because of this extra code added before deferreturn calls. + if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 { + return + } + p1 := obj.Appendp(p, newprog) + p2 := obj.Appendp(p1, newprog) + + p1.As = ALEAL + p1.From.Type = obj.TYPE_MEM + p1.From.Name = obj.NAME_STATIC + p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_") + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REG_BX + + p2.As = p.As + p2.Scond = p.Scond + p2.From = p.From + if p.RestArgs != nil { + p2.RestArgs = append(p2.RestArgs, p.RestArgs...) + } + p2.Reg = p.Reg + p2.To = p.To + // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr + // in ../pass.go complain, so set it back to TYPE_MEM here, until p2 + // itself gets passed to progedit. + p2.To.Type = obj.TYPE_MEM + p2.RegTo2 = 1 + + obj.Nopout(p) + return + + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { + return + } + if source.Type != obj.TYPE_MEM { + ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + p1 := obj.Appendp(p, newprog) + p2 := obj.Appendp(p1, newprog) + + p1.As = mov + p1.From.Type = obj.TYPE_MEM + p1.From.Sym = source.Sym + p1.From.Name = obj.NAME_GOTREF + p1.To.Type = obj.TYPE_REG + p1.To.Reg = reg + + p2.As = p.As + p2.From = p.From + p2.To = p.To + if p.From.Name == obj.NAME_EXTERN { + p2.From.Reg = reg + p2.From.Name = obj.NAME_NONE + p2.From.Sym = nil + } else if p.To.Name == obj.NAME_EXTERN { + p2.To.Reg = reg + p2.To.Name = obj.NAME_NONE + p2.To.Sym = nil + } else { + return + } + obj.Nopout(p) +} + +func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + // RegTo2 is set on the instructions we insert here so they don't get + // processed twice. + if p.RegTo2 != 0 { + return + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { + return + } + // Any Prog (aside from the above special cases) with an Addr with Name == + // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX + // inserted before it. + isName := func(a *obj.Addr) bool { + if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 { + return false + } + if a.Sym.Type == objabi.STLSBSS { + return false + } + return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF + } + + if isName(&p.From) && p.From.Type == obj.TYPE_ADDR { + // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting + // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX" + // respectively. + if p.To.Type != obj.TYPE_REG { + q := obj.Appendp(p, newprog) + q.As = p.As + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_CX + q.To = p.To + p.As = AMOVL + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_CX + p.To.Sym = nil + p.To.Name = obj.NAME_NONE + } + } + + if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) { + return + } + var dst int16 = REG_CX + if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { + dst = p.To.Reg + // Why? See the comment near the top of rewriteToUseGot above. + // AMOVLs might be introduced by the GOT rewrites. + } + q := obj.Appendp(p, newprog) + q.RegTo2 = 1 + r := obj.Appendp(q, newprog) + r.RegTo2 = 1 + q.As = obj.ACALL + thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))) + q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) }) + q.To.Type = obj.TYPE_MEM + q.To.Name = obj.NAME_EXTERN + r.As = p.As + r.Scond = p.Scond + r.From = p.From + r.RestArgs = p.RestArgs + r.Reg = p.Reg + r.To = p.To + if isName(&p.From) { + r.From.Reg = dst + } + if isName(&p.To) { + r.To.Reg = dst + } + if p.GetFrom3() != nil && isName(p.GetFrom3()) { + r.GetFrom3().Reg = dst + } + obj.Nopout(p) +} + +// Prog.mark +const ( + markBit = 1 << 0 // used in errorCheck to avoid duplicate work +) + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + p := cursym.Func().Text + autoffset := int32(p.To.Offset) + if autoffset < 0 { + autoffset = 0 + } + + hasCall := false + for q := p; q != nil; q = q.Link { + if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO { + hasCall = true + break + } + } + + var bpsize int + if ctxt.Arch.Family == sys.AMD64 && + !p.From.Sym.NoFrame() && // (1) below + !(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below + !(autoffset == 0 && !hasCall) { // (3) below + // Make room to save a base pointer. + // There are 2 cases we must avoid: + // 1) If noframe is set (which we do for functions which tail call). + // 2) Scary runtime internals which would be all messed up by frame pointers. + // We detect these using a heuristic: frameless nosplit functions. + // TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic. + // For performance, we also want to avoid: + // 3) Frameless leaf functions + bpsize = ctxt.Arch.PtrSize + autoffset += int32(bpsize) + p.To.Offset += int64(bpsize) + } else { + bpsize = 0 + } + + textarg := int64(p.To.Val.(int32)) + cursym.Func().Args = int32(textarg) + cursym.Func().Locals = int32(p.To.Offset) + + // TODO(rsc): Remove. + if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 { + cursym.Func().Locals = 0 + } + + // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. + if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() { + leaf := true + LeafSearch: + for q := p; q != nil; q = q.Link { + switch q.As { + case obj.ACALL: + // Treat common runtime calls that take no arguments + // the same as duffcopy and duffzero. + if !isZeroArgRuntimeCall(q.To.Sym) { + leaf = false + break LeafSearch + } + fallthrough + case obj.ADUFFCOPY, obj.ADUFFZERO: + if autoffset >= objabi.StackSmall-8 { + leaf = false + break LeafSearch + } + } + } + + if leaf { + p.From.Sym.Set(obj.AttrNoSplit, true) + } + } + + var regEntryTmp0, regEntryTmp1 int16 + if ctxt.Arch.Family == sys.AMD64 { + regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1 + } else { + regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI + } + + var regg int16 + if !p.From.Sym.NoSplit() { + // Emit split check and load G register + p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) + } else if p.From.Sym.Wrapper() { + // Load G register for the wrapper code + p, regg = loadG(ctxt, cursym, p, newprog) + } + + // Delve debugger would like the next instruction to be noted as the end of the function prologue. + // TODO: are there other cases (e.g., wrapper functions) that need marking? + markedPrologue := false + + if autoffset != 0 { + if autoffset%int32(ctxt.Arch.RegSize) != 0 { + ctxt.Diag("unaligned stack size %d", autoffset) + } + p = obj.Appendp(p, newprog) + p.As = AADJSP + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(autoffset) + p.Spadj = autoffset + p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd) + markedPrologue = true + } + + if bpsize > 0 { + // Save caller's BP + p = obj.Appendp(p, newprog) + + p.As = AMOVQ + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_BP + p.To.Type = obj.TYPE_MEM + p.To.Reg = REG_SP + p.To.Scale = 1 + p.To.Offset = int64(autoffset) - int64(bpsize) + if !markedPrologue { + p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd) + } + + // Move current frame to BP + p = obj.Appendp(p, newprog) + + p.As = ALEAQ + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_SP + p.From.Scale = 1 + p.From.Offset = int64(autoffset) - int64(bpsize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_BP + } + + if cursym.Func().Text.From.Sym.Wrapper() { + // if g._panic != nil && g._panic.argp == FP { + // g._panic.argp = bottom-of-frame + // } + // + // MOVQ g_panic(g), regEntryTmp0 + // TESTQ regEntryTmp0, regEntryTmp0 + // JNE checkargp + // end: + // NOP + // ... rest of function ... + // checkargp: + // LEAQ (autoffset+8)(SP), regEntryTmp1 + // CMPQ panic_argp(regEntryTmp0), regEntryTmp1 + // JNE end + // MOVQ SP, panic_argp(regEntryTmp0) + // JMP end + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes. + // + // The layout is chosen to help static branch prediction: + // Both conditional jumps are unlikely, so they are arranged to be forward jumps. + + // MOVQ g_panic(g), regEntryTmp0 + p = obj.Appendp(p, newprog) + p.As = AMOVQ + p.From.Type = obj.TYPE_MEM + p.From.Reg = regg + p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic + p.To.Type = obj.TYPE_REG + p.To.Reg = regEntryTmp0 + if ctxt.Arch.Family == sys.I386 { + p.As = AMOVL + } + + // TESTQ regEntryTmp0, regEntryTmp0 + p = obj.Appendp(p, newprog) + p.As = ATESTQ + p.From.Type = obj.TYPE_REG + p.From.Reg = regEntryTmp0 + p.To.Type = obj.TYPE_REG + p.To.Reg = regEntryTmp0 + if ctxt.Arch.Family == sys.I386 { + p.As = ATESTL + } + + // JNE checkargp (checkargp to be resolved later) + jne := obj.Appendp(p, newprog) + jne.As = AJNE + jne.To.Type = obj.TYPE_BRANCH + + // end: + // NOP + end := obj.Appendp(jne, newprog) + end.As = obj.ANOP + + // Fast forward to end of function. + var last *obj.Prog + for last = end; last.Link != nil; last = last.Link { + } + + // LEAQ (autoffset+8)(SP), regEntryTmp1 + p = obj.Appendp(last, newprog) + p.As = ALEAQ + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_SP + p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize) + p.To.Type = obj.TYPE_REG + p.To.Reg = regEntryTmp1 + if ctxt.Arch.Family == sys.I386 { + p.As = ALEAL + } + + // Set jne branch target. + jne.To.SetTarget(p) + + // CMPQ panic_argp(regEntryTmp0), regEntryTmp1 + p = obj.Appendp(p, newprog) + p.As = ACMPQ + p.From.Type = obj.TYPE_MEM + p.From.Reg = regEntryTmp0 + p.From.Offset = 0 // Panic.argp + p.To.Type = obj.TYPE_REG + p.To.Reg = regEntryTmp1 + if ctxt.Arch.Family == sys.I386 { + p.As = ACMPL + } + + // JNE end + p = obj.Appendp(p, newprog) + p.As = AJNE + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(end) + + // MOVQ SP, panic_argp(regEntryTmp0) + p = obj.Appendp(p, newprog) + p.As = AMOVQ + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_SP + p.To.Type = obj.TYPE_MEM + p.To.Reg = regEntryTmp0 + p.To.Offset = 0 // Panic.argp + if ctxt.Arch.Family == sys.I386 { + p.As = AMOVL + } + + // JMP end + p = obj.Appendp(p, newprog) + p.As = obj.AJMP + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(end) + + // Reset p for following code. + p = end + } + + var deltasp int32 + for p = cursym.Func().Text; p != nil; p = p.Link { + pcsize := ctxt.Arch.RegSize + switch p.From.Name { + case obj.NAME_AUTO: + p.From.Offset += int64(deltasp) - int64(bpsize) + case obj.NAME_PARAM: + p.From.Offset += int64(deltasp) + int64(pcsize) + } + if p.GetFrom3() != nil { + switch p.GetFrom3().Name { + case obj.NAME_AUTO: + p.GetFrom3().Offset += int64(deltasp) - int64(bpsize) + case obj.NAME_PARAM: + p.GetFrom3().Offset += int64(deltasp) + int64(pcsize) + } + } + switch p.To.Name { + case obj.NAME_AUTO: + p.To.Offset += int64(deltasp) - int64(bpsize) + case obj.NAME_PARAM: + p.To.Offset += int64(deltasp) + int64(pcsize) + } + + switch p.As { + default: + if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ { + f := cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + f.FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + continue + + case APUSHL, APUSHFL: + deltasp += 4 + p.Spadj = 4 + continue + + case APUSHQ, APUSHFQ: + deltasp += 8 + p.Spadj = 8 + continue + + case APUSHW, APUSHFW: + deltasp += 2 + p.Spadj = 2 + continue + + case APOPL, APOPFL: + deltasp -= 4 + p.Spadj = -4 + continue + + case APOPQ, APOPFQ: + deltasp -= 8 + p.Spadj = -8 + continue + + case APOPW, APOPFW: + deltasp -= 2 + p.Spadj = -2 + continue + + case AADJSP: + p.Spadj = int32(p.From.Offset) + deltasp += int32(p.From.Offset) + continue + + case obj.ARET: + // do nothing + } + + if autoffset != deltasp { + ctxt.Diag("%s: unbalanced PUSH/POP", cursym) + } + + if autoffset != 0 { + to := p.To // Keep To attached to RET for retjmp below + p.To = obj.Addr{} + if bpsize > 0 { + // Restore caller's BP + p.As = AMOVQ + + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_SP + p.From.Scale = 1 + p.From.Offset = int64(autoffset) - int64(bpsize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_BP + p = obj.Appendp(p, newprog) + } + + p.As = AADJSP + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(-autoffset) + p.Spadj = -autoffset + p = obj.Appendp(p, newprog) + p.As = obj.ARET + p.To = to + + // If there are instructions following + // this ARET, they come from a branch + // with the same stackframe, so undo + // the cleanup. + p.Spadj = +autoffset + } + + if p.To.Sym != nil { // retjmp + p.As = obj.AJMP + } + } +} + +func isZeroArgRuntimeCall(s *obj.LSym) bool { + if s == nil { + return false + } + switch s.Name { + case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift": + return true + } + if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") { + // These functions do take arguments (in registers), + // but use no stack before they do a stack check. We + // should include them. See issue 31219. + return true + } + return false +} + +func indir_cx(ctxt *obj.Link, a *obj.Addr) { + a.Type = obj.TYPE_MEM + a.Reg = REG_CX +} + +// loadG ensures the G is loaded into a register (either CX or REGG), +// appending instructions to p if necessary. It returns the new last +// instruction and the G register. +func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) { + if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal { + // Use the G register directly in ABIInternal + return p, REGG + } + + var regg int16 = REG_CX + if ctxt.Arch.Family == sys.AMD64 { + regg = REGG // == REG_R14 + } + + p = obj.Appendp(p, newprog) + p.As = AMOVQ + if ctxt.Arch.PtrSize == 4 { + p.As = AMOVL + } + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_TLS + p.From.Offset = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = regg + + // Rewrite TLS instruction if necessary. + next := p.Link + progedit(ctxt, p, newprog) + for p.Link != next { + p = p.Link + progedit(ctxt, p, newprog) + } + + if p.From.Index == REG_TLS { + p.From.Scale = 2 + } + + return p, regg +} + +// Append code to p to check for stack split. +// Appends to (does not overwrite) p. +// Assumes g is in rg. +// Returns last new instruction and G register. +func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) { + cmp := ACMPQ + lea := ALEAQ + mov := AMOVQ + sub := ASUBQ + push, pop := APUSHQ, APOPQ + + if ctxt.Arch.Family == sys.I386 { + cmp = ACMPL + lea = ALEAL + mov = AMOVL + sub = ASUBL + push, pop = APUSHL, APOPL + } + + tmp := int16(REG_AX) // use AX for 32-bit + if ctxt.Arch.Family == sys.AMD64 { + // Avoid register parameters. + tmp = int16(REGENTRYTMP0) + } + + if ctxt.Flag_maymorestack != "" { + p = cursym.Func().SpillRegisterArgs(p, newprog) + + if cursym.Func().Text.From.Sym.NeedCtxt() { + p = obj.Appendp(p, newprog) + p.As = push + p.From.Type = obj.TYPE_REG + p.From.Reg = REGCTXT + } + + // We call maymorestack with an ABI matching the + // caller's ABI. Since this is the first thing that + // happens in the function, we have to be consistent + // with the caller about CPU state (notably, + // fixed-meaning registers). + + p = obj.Appendp(p, newprog) + p.As = obj.ACALL + p.To.Type = obj.TYPE_BRANCH + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI()) + + if cursym.Func().Text.From.Sym.NeedCtxt() { + p = obj.Appendp(p, newprog) + p.As = pop + p.To.Type = obj.TYPE_REG + p.To.Reg = REGCTXT + } + + p = cursym.Func().UnspillRegisterArgs(p, newprog) + } + + // Jump back to here after morestack returns. + startPred := p + + // Load G register + var rg int16 + p, rg = loadG(ctxt, cursym, p, newprog) + + var q1 *obj.Prog + if framesize <= objabi.StackSmall { + // small stack: SP <= stackguard + // CMPQ SP, stackguard + p = obj.Appendp(p, newprog) + + p.As = cmp + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_SP + p.To.Type = obj.TYPE_MEM + p.To.Reg = rg + p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 + if cursym.CFunc() { + p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 + } + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = ctxt.StartUnsafePoint(p, newprog) + } else if framesize <= objabi.StackBig { + // large stack: SP-framesize <= stackguard-StackSmall + // LEAQ -xxx(SP), tmp + // CMPQ tmp, stackguard + p = obj.Appendp(p, newprog) + + p.As = lea + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_SP + p.From.Offset = -(int64(framesize) - objabi.StackSmall) + p.To.Type = obj.TYPE_REG + p.To.Reg = tmp + + p = obj.Appendp(p, newprog) + p.As = cmp + p.From.Type = obj.TYPE_REG + p.From.Reg = tmp + p.To.Type = obj.TYPE_MEM + p.To.Reg = rg + p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 + if cursym.CFunc() { + p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 + } + + p = ctxt.StartUnsafePoint(p, newprog) // see the comment above + } else { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // MOVQ SP, tmp + // SUBQ $(framesize - StackSmall), tmp + // // If subtraction wrapped (carry set), morestack. + // JCS label-of-call-to-morestack + // CMPQ tmp, stackguard + + p = obj.Appendp(p, newprog) + + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_SP + p.To.Type = obj.TYPE_REG + p.To.Reg = tmp + + p = ctxt.StartUnsafePoint(p, newprog) // see the comment above + + p = obj.Appendp(p, newprog) + p.As = sub + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(framesize) - objabi.StackSmall + p.To.Type = obj.TYPE_REG + p.To.Reg = tmp + + p = obj.Appendp(p, newprog) + p.As = AJCS + p.To.Type = obj.TYPE_BRANCH + q1 = p + + p = obj.Appendp(p, newprog) + p.As = cmp + p.From.Type = obj.TYPE_REG + p.From.Reg = tmp + p.To.Type = obj.TYPE_MEM + p.To.Reg = rg + p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 + if cursym.CFunc() { + p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 + } + } + + // common + jls := obj.Appendp(p, newprog) + jls.As = AJLS + jls.To.Type = obj.TYPE_BRANCH + + end := ctxt.EndUnsafePoint(jls, newprog, -1) + + var last *obj.Prog + for last = cursym.Func().Text; last.Link != nil; last = last.Link { + } + + // Now we are at the end of the function, but logically + // we are still in function prologue. We need to fix the + // SP data and PCDATA. + spfix := obj.Appendp(last, newprog) + spfix.As = obj.ANOP + spfix.Spadj = -framesize + + pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog) + spill := ctxt.StartUnsafePoint(pcdata, newprog) + pcdata = cursym.Func().SpillRegisterArgs(spill, newprog) + + call := obj.Appendp(pcdata, newprog) + call.Pos = cursym.Func().Text.Pos + call.As = obj.ACALL + call.To.Type = obj.TYPE_BRANCH + call.To.Name = obj.NAME_EXTERN + morestack := "runtime.morestack" + switch { + case cursym.CFunc(): + morestack = "runtime.morestackc" + case !cursym.Func().Text.From.Sym.NeedCtxt(): + morestack = "runtime.morestack_noctxt" + } + call.To.Sym = ctxt.Lookup(morestack) + // When compiling 386 code for dynamic linking, the call needs to be adjusted + // to follow PIC rules. This in turn can insert more instructions, so we need + // to keep track of the start of the call (where the jump will be to) and the + // end (which following instructions are appended to). + callend := call + progedit(ctxt, callend, newprog) + for ; callend.Link != nil; callend = callend.Link { + progedit(ctxt, callend.Link, newprog) + } + + pcdata = cursym.Func().UnspillRegisterArgs(callend, newprog) + pcdata = ctxt.EndUnsafePoint(pcdata, newprog, -1) + + jmp := obj.Appendp(pcdata, newprog) + jmp.As = obj.AJMP + jmp.To.Type = obj.TYPE_BRANCH + jmp.To.SetTarget(startPred.Link) + jmp.Spadj = +framesize + + jls.To.SetTarget(spill) + if q1 != nil { + q1.To.SetTarget(spill) + } + + return end, rg +} + +func isR15(r int16) bool { + return r == REG_R15 || r == REG_R15B +} +func addrMentionsR15(a *obj.Addr) bool { + if a == nil { + return false + } + return isR15(a.Reg) || isR15(a.Index) +} +func progMentionsR15(p *obj.Prog) bool { + return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3()) +} + +// progOverwritesR15 reports whether p writes to R15 and does not depend on +// the previous value of R15. +func progOverwritesR15(p *obj.Prog) bool { + if !(p.To.Type == obj.TYPE_REG && isR15(p.To.Reg)) { + // Not writing to R15. + return false + } + if (p.As == AXORL || p.As == AXORQ) && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) { + // These look like uses of R15, but aren't, so we must detect these + // before the use check below. + return true + } + if addrMentionsR15(&p.From) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3()) { + // use before overwrite + return false + } + if p.As == AMOVL || p.As == AMOVQ || p.As == APOPQ { + return true + // TODO: MOVB might be ok if we only ever use R15B. + } + return false +} + +func addrUsesGlobal(a *obj.Addr) bool { + if a == nil { + return false + } + return a.Name == obj.NAME_EXTERN && !a.Sym.Local() +} +func progUsesGlobal(p *obj.Prog) bool { + if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { + // These opcodes don't use a GOT to access their argument (see rewriteToUseGot), + // or R15 would be dead at them anyway. + return false + } + if p.As == ALEAQ { + // The GOT entry is placed directly in the destination register; R15 is not used. + return false + } + return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3()) +} + +func errorCheck(ctxt *obj.Link, s *obj.LSym) { + // When dynamic linking, R15 is used to access globals. Reject code that + // uses R15 after a global variable access. + if !ctxt.Flag_dynlink { + return + } + + // Flood fill all the instructions where R15's value is junk. + // If there are any uses of R15 in that set, report an error. + var work []*obj.Prog + var mentionsR15 bool + for p := s.Func().Text; p != nil; p = p.Link { + if progUsesGlobal(p) { + work = append(work, p) + p.Mark |= markBit + } + if progMentionsR15(p) { + mentionsR15 = true + } + } + if mentionsR15 { + for len(work) > 0 { + p := work[len(work)-1] + work = work[:len(work)-1] + if q := p.To.Target(); q != nil && q.Mark&markBit == 0 { + q.Mark |= markBit + work = append(work, q) + } + if p.As == obj.AJMP || p.As == obj.ARET { + continue // no fallthrough + } + if progMentionsR15(p) { + if progOverwritesR15(p) { + // R15 is overwritten by this instruction. Its value is not junk any more. + continue + } + pos := ctxt.PosTable.Pos(p.Pos) + ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p) + break // only report one error + } + if q := p.Link; q != nil && q.Mark&markBit == 0 { + q.Mark |= markBit + work = append(work, q) + } + } + } + + // Clean up. + for p := s.Func().Text; p != nil; p = p.Link { + p.Mark &^= markBit + } +} + +var unaryDst = map[obj.As]bool{ + ABSWAPL: true, + ABSWAPQ: true, + ACLDEMOTE: true, + ACLFLUSH: true, + ACLFLUSHOPT: true, + ACLWB: true, + ACMPXCHG16B: true, + ACMPXCHG8B: true, + ADECB: true, + ADECL: true, + ADECQ: true, + ADECW: true, + AFBSTP: true, + AFFREE: true, + AFLDENV: true, + AFSAVE: true, + AFSTCW: true, + AFSTENV: true, + AFSTSW: true, + AFXSAVE64: true, + AFXSAVE: true, + AINCB: true, + AINCL: true, + AINCQ: true, + AINCW: true, + ANEGB: true, + ANEGL: true, + ANEGQ: true, + ANEGW: true, + ANOTB: true, + ANOTL: true, + ANOTQ: true, + ANOTW: true, + APOPL: true, + APOPQ: true, + APOPW: true, + ARDFSBASEL: true, + ARDFSBASEQ: true, + ARDGSBASEL: true, + ARDGSBASEQ: true, + ARDRANDL: true, + ARDRANDQ: true, + ARDRANDW: true, + ARDSEEDL: true, + ARDSEEDQ: true, + ARDSEEDW: true, + ASETCC: true, + ASETCS: true, + ASETEQ: true, + ASETGE: true, + ASETGT: true, + ASETHI: true, + ASETLE: true, + ASETLS: true, + ASETLT: true, + ASETMI: true, + ASETNE: true, + ASETOC: true, + ASETOS: true, + ASETPC: true, + ASETPL: true, + ASETPS: true, + ASGDT: true, + ASIDT: true, + ASLDTL: true, + ASLDTQ: true, + ASLDTW: true, + ASMSWL: true, + ASMSWQ: true, + ASMSWW: true, + ASTMXCSR: true, + ASTRL: true, + ASTRQ: true, + ASTRW: true, + AXSAVE64: true, + AXSAVE: true, + AXSAVEC64: true, + AXSAVEC: true, + AXSAVEOPT64: true, + AXSAVEOPT: true, + AXSAVES64: true, + AXSAVES: true, +} + +var Linkamd64 = obj.LinkArch{ + Arch: sys.ArchAMD64, + Init: instinit, + ErrorCheck: errorCheck, + Preprocess: preprocess, + Assemble: span6, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: AMD64DWARFRegisters, +} + +var Link386 = obj.LinkArch{ + Arch: sys.Arch386, + Init: instinit, + Preprocess: preprocess, + Assemble: span6, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: X86DWARFRegisters, +} diff --git a/src/cmd/internal/obj/x86/obj6_test.go b/src/cmd/internal/obj/x86/obj6_test.go new file mode 100644 index 0000000..d1246be --- /dev/null +++ b/src/cmd/internal/obj/x86/obj6_test.go @@ -0,0 +1,167 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86_test + +import ( + "bufio" + "bytes" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "testing" +) + +const testdata = ` +MOVQ AX, AX -> MOVQ AX, AX + +LEAQ name(SB), AX -> MOVQ name@GOT(SB), AX +LEAQ name+10(SB), AX -> MOVQ name@GOT(SB), AX; LEAQ 10(AX), AX +MOVQ $name(SB), AX -> MOVQ name@GOT(SB), AX +MOVQ $name+10(SB), AX -> MOVQ name@GOT(SB), AX; LEAQ 10(AX), AX + +MOVQ name(SB), AX -> NOP; MOVQ name@GOT(SB), R15; MOVQ (R15), AX +MOVQ name+10(SB), AX -> NOP; MOVQ name@GOT(SB), R15; MOVQ 10(R15), AX + +CMPQ name(SB), $0 -> NOP; MOVQ name@GOT(SB), R15; CMPQ (R15), $0 + +MOVQ $1, name(SB) -> NOP; MOVQ name@GOT(SB), R15; MOVQ $1, (R15) +MOVQ $1, name+10(SB) -> NOP; MOVQ name@GOT(SB), R15; MOVQ $1, 10(R15) +` + +type ParsedTestData struct { + input string + marks []int + marker_to_input map[int][]string + marker_to_expected map[int][]string + marker_to_output map[int][]string +} + +const marker_start = 1234 + +func parseTestData(t *testing.T) *ParsedTestData { + r := &ParsedTestData{} + scanner := bufio.NewScanner(strings.NewReader(testdata)) + r.marker_to_input = make(map[int][]string) + r.marker_to_expected = make(map[int][]string) + marker := marker_start + input_insns := []string{} + for scanner.Scan() { + line := scanner.Text() + if len(strings.TrimSpace(line)) == 0 { + continue + } + parts := strings.Split(line, "->") + if len(parts) != 2 { + t.Fatalf("malformed line %v", line) + } + r.marks = append(r.marks, marker) + marker_insn := fmt.Sprintf("MOVQ $%d, AX", marker) + input_insns = append(input_insns, marker_insn) + for _, input_insn := range strings.Split(parts[0], ";") { + input_insns = append(input_insns, input_insn) + r.marker_to_input[marker] = append(r.marker_to_input[marker], normalize(input_insn)) + } + for _, expected_insn := range strings.Split(parts[1], ";") { + r.marker_to_expected[marker] = append(r.marker_to_expected[marker], normalize(expected_insn)) + } + marker++ + } + r.input = "TEXT ·foo(SB),$0\n" + strings.Join(input_insns, "\n") + "\n" + return r +} + +var spaces_re *regexp.Regexp = regexp.MustCompile(`\s+`) + +func normalize(s string) string { + return spaces_re.ReplaceAllLiteralString(strings.TrimSpace(s), " ") +} + +func asmOutput(t *testing.T, s string) []byte { + tmpdir, err := os.MkdirTemp("", "progedittest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + tmpfile, err := os.Create(filepath.Join(tmpdir, "input.s")) + if err != nil { + t.Fatal(err) + } + defer tmpfile.Close() + _, err = tmpfile.WriteString(s) + if err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, + testenv.GoToolPath(t), "tool", "asm", "-S", "-dynlink", + "-o", filepath.Join(tmpdir, "output.6"), tmpfile.Name()) + + cmd.Env = append(os.Environ(), + "GOARCH=amd64", "GOOS=linux", "GOPATH="+filepath.Join(tmpdir, "_gopath")) + asmout, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("error %s output %s", err, asmout) + } + return asmout +} + +func parseOutput(t *testing.T, td *ParsedTestData, asmout []byte) { + scanner := bufio.NewScanner(bytes.NewReader(asmout)) + marker := regexp.MustCompile(`MOVQ \$([0-9]+), AX`) + mark := -1 + td.marker_to_output = make(map[int][]string) + for scanner.Scan() { + line := scanner.Text() + if line[0] != '\t' { + continue + } + parts := strings.SplitN(line, "\t", 3) + if len(parts) != 3 { + continue + } + n := normalize(parts[2]) + mark_matches := marker.FindStringSubmatch(n) + if mark_matches != nil { + mark, _ = strconv.Atoi(mark_matches[1]) + if _, ok := td.marker_to_input[mark]; !ok { + t.Fatalf("unexpected marker %d", mark) + } + } else if mark != -1 { + td.marker_to_output[mark] = append(td.marker_to_output[mark], n) + } + } +} + +func TestDynlink(t *testing.T) { + testenv.MustHaveGoBuild(t) + + if os.Getenv("GOHOSTARCH") != "" { + // TODO: make this work? It was failing due to the + // GOARCH= filtering above and skipping is easiest for + // now. + t.Skip("skipping when GOHOSTARCH is set") + } + + testdata := parseTestData(t) + asmout := asmOutput(t, testdata.input) + parseOutput(t, testdata, asmout) + for _, m := range testdata.marks { + i := strings.Join(testdata.marker_to_input[m], "; ") + o := strings.Join(testdata.marker_to_output[m], "; ") + e := strings.Join(testdata.marker_to_expected[m], "; ") + if o != e { + if o == i { + t.Errorf("%s was unchanged; should have become %s", i, e) + } else { + t.Errorf("%s became %s; should have become %s", i, o, e) + } + } else if i != e { + t.Logf("%s correctly became %s", i, o) + } + } +} diff --git a/src/cmd/internal/obj/x86/pcrelative_test.go b/src/cmd/internal/obj/x86/pcrelative_test.go new file mode 100644 index 0000000..3827100 --- /dev/null +++ b/src/cmd/internal/obj/x86/pcrelative_test.go @@ -0,0 +1,105 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86_test + +import ( + "bytes" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "testing" +) + +const asmData = ` +GLOBL zeros<>(SB),8,$64 +TEXT ·testASM(SB),4,$0 +VMOVUPS zeros<>(SB), %s // PC relative relocation is off by 1, for Y8-Y15, Z8-15 and Z24-Z31 +RET +` + +const goData = ` +package main + +func testASM() + +func main() { + testASM() +} +` + +func objdumpOutput(t *testing.T, mname, source string) []byte { + tmpdir, err := os.MkdirTemp("", mname) + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + err = os.WriteFile(filepath.Join(tmpdir, "go.mod"), []byte(fmt.Sprintf("module %s\n", mname)), 0666) + if err != nil { + t.Fatal(err) + } + tmpfile, err := os.Create(filepath.Join(tmpdir, "input.s")) + if err != nil { + t.Fatal(err) + } + defer tmpfile.Close() + _, err = tmpfile.WriteString(source) + if err != nil { + t.Fatal(err) + } + tmpfile2, err := os.Create(filepath.Join(tmpdir, "input.go")) + if err != nil { + t.Fatal(err) + } + defer tmpfile2.Close() + _, err = tmpfile2.WriteString(goData) + if err != nil { + t.Fatal(err) + } + + cmd := testenv.Command(t, + testenv.GoToolPath(t), "build", "-o", + filepath.Join(tmpdir, "output")) + + cmd.Env = append(os.Environ(), + "GOARCH=amd64", "GOOS=linux", "GOPATH="+filepath.Join(tmpdir, "_gopath")) + cmd.Dir = tmpdir + + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("error %s output %s", err, out) + } + cmd2 := testenv.Command(t, + testenv.GoToolPath(t), "tool", "objdump", "-s", "testASM", + filepath.Join(tmpdir, "output")) + cmd2.Env = cmd.Env + cmd2.Dir = tmpdir + objout, err := cmd2.CombinedOutput() + if err != nil { + t.Fatalf("error %s output %s", err, objout) + } + + return objout +} + +func TestVexEvexPCrelative(t *testing.T) { + testenv.MustHaveGoBuild(t) +LOOP: + for _, reg := range []string{"Y0", "Y8", "Z0", "Z8", "Z16", "Z24"} { + asm := fmt.Sprintf(asmData, reg) + objout := objdumpOutput(t, "pcrelative", asm) + data := bytes.Split(objout, []byte("\n")) + for idx := len(data) - 1; idx >= 0; idx-- { + // check that RET wasn't overwritten. + if bytes.Index(data[idx], []byte("RET")) != -1 { + if testing.Short() { + break LOOP + } + continue LOOP + } + } + t.Errorf("VMOVUPS zeros<>(SB), %s overwrote RET", reg) + } +} diff --git a/src/cmd/internal/obj/x86/ytab.go b/src/cmd/internal/obj/x86/ytab.go new file mode 100644 index 0000000..7d0b75b --- /dev/null +++ b/src/cmd/internal/obj/x86/ytab.go @@ -0,0 +1,44 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86 + +// argListMax specifies upper arg count limit expected to be carried by obj.Prog. +// Max len(obj.Prog.RestArgs) can be inferred from this to be 4. +const argListMax int = 6 + +type argList [argListMax]uint8 + +type ytab struct { + zcase uint8 + zoffset uint8 + + // Last arg is usually destination. + // For unary instructions unaryDst is used to determine + // if single argument is a source or destination. + args argList +} + +// Returns true if yt is compatible with args. +// +// Elements from args and yt.args are used +// to index ycover table like `ycover[args[i]+yt.args[i]]`. +// This means that args should contain values that already +// multiplied by Ymax. +func (yt *ytab) match(args []int) bool { + // Trailing Yxxx check is required to avoid a case + // where shorter arg list is matched. + // If we had exact yt.args length, it could be `yt.argc != len(args)`. + if len(args) < len(yt.args) && yt.args[len(args)] != Yxxx { + return false + } + + for i := range args { + if ycover[args[i]+int(yt.args[i])] == 0 { + return false + } + } + + return true +} |