From 43a123c1ae6613b3efeed291fa552ecd909d3acf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 16 Apr 2024 21:23:18 +0200 Subject: Adding upstream version 1.20.14. Signed-off-by: Daniel Baumann --- src/cmd/internal/obj/abi_string.go | 16 + src/cmd/internal/obj/addrtype_string.go | 37 + src/cmd/internal/obj/arm/a.out.go | 410 ++ src/cmd/internal/obj/arm/anames.go | 144 + src/cmd/internal/obj/arm/anames5.go | 77 + src/cmd/internal/obj/arm/asm5.go | 3094 ++++++++ src/cmd/internal/obj/arm/list5.go | 124 + src/cmd/internal/obj/arm/obj5.go | 838 +++ src/cmd/internal/obj/arm64/a.out.go | 1213 ++++ src/cmd/internal/obj/arm64/anames.go | 544 ++ src/cmd/internal/obj/arm64/anames7.go | 124 + src/cmd/internal/obj/arm64/asm7.go | 7657 ++++++++++++++++++++ src/cmd/internal/obj/arm64/asm_arm64_test.go | 171 + src/cmd/internal/obj/arm64/asm_arm64_test.s | 21 + src/cmd/internal/obj/arm64/doc.go | 296 + src/cmd/internal/obj/arm64/list7.go | 255 + src/cmd/internal/obj/arm64/obj7.go | 1095 +++ .../internal/obj/arm64/specialoperand_string.go | 166 + src/cmd/internal/obj/arm64/sysRegEnc.go | 895 +++ src/cmd/internal/obj/data.go | 207 + src/cmd/internal/obj/dwarf.go | 709 ++ src/cmd/internal/obj/go.go | 16 + src/cmd/internal/obj/inl.go | 132 + src/cmd/internal/obj/ld.go | 85 + src/cmd/internal/obj/line.go | 35 + src/cmd/internal/obj/line_test.go | 40 + src/cmd/internal/obj/link.go | 1000 +++ src/cmd/internal/obj/loong64/a.out.go | 419 ++ src/cmd/internal/obj/loong64/anames.go | 134 + src/cmd/internal/obj/loong64/asm.go | 1980 +++++ src/cmd/internal/obj/loong64/cnames.go | 43 + src/cmd/internal/obj/loong64/list.go | 46 + src/cmd/internal/obj/loong64/obj.go | 716 ++ src/cmd/internal/obj/mips/a.out.go | 483 ++ src/cmd/internal/obj/mips/anames.go | 137 + src/cmd/internal/obj/mips/anames0.go | 45 + src/cmd/internal/obj/mips/asm0.go | 2121 ++++++ src/cmd/internal/obj/mips/list0.go | 83 + src/cmd/internal/obj/mips/obj0.go | 1536 ++++ src/cmd/internal/obj/objfile.go | 897 +++ src/cmd/internal/obj/objfile_test.go | 144 + src/cmd/internal/obj/pass.go | 181 + src/cmd/internal/obj/pcln.go | 423 ++ src/cmd/internal/obj/plist.go | 382 + src/cmd/internal/obj/ppc64/a.out.go | 1092 +++ src/cmd/internal/obj/ppc64/anames.go | 611 ++ src/cmd/internal/obj/ppc64/anames9.go | 55 + src/cmd/internal/obj/ppc64/asm9.go | 5342 ++++++++++++++ src/cmd/internal/obj/ppc64/asm9_gtables.go | 1660 +++++ src/cmd/internal/obj/ppc64/asm_test.go | 555 ++ src/cmd/internal/obj/ppc64/doc.go | 254 + src/cmd/internal/obj/ppc64/list9.go | 117 + src/cmd/internal/obj/ppc64/obj9.go | 1414 ++++ src/cmd/internal/obj/riscv/anames.go | 260 + src/cmd/internal/obj/riscv/asm_test.go | 213 + src/cmd/internal/obj/riscv/cpu.go | 650 ++ src/cmd/internal/obj/riscv/inst.go | 459 ++ src/cmd/internal/obj/riscv/list.go | 33 + src/cmd/internal/obj/riscv/obj.go | 2248 ++++++ .../obj/riscv/testdata/testbranch/branch_test.go | 133 + .../obj/riscv/testdata/testbranch/branch_test.s | 156 + src/cmd/internal/obj/s390x/a.out.go | 1003 +++ src/cmd/internal/obj/s390x/anames.go | 720 ++ src/cmd/internal/obj/s390x/anamesz.go | 39 + src/cmd/internal/obj/s390x/asmz.go | 5047 +++++++++++++ src/cmd/internal/obj/s390x/condition_code.go | 128 + src/cmd/internal/obj/s390x/listz.go | 73 + src/cmd/internal/obj/s390x/objz.go | 802 ++ src/cmd/internal/obj/s390x/rotate.go | 117 + src/cmd/internal/obj/s390x/rotate_test.go | 122 + src/cmd/internal/obj/s390x/vector.go | 1069 +++ src/cmd/internal/obj/sizeof_test.go | 38 + src/cmd/internal/obj/stringer.go | 104 + src/cmd/internal/obj/sym.go | 452 ++ src/cmd/internal/obj/textflag.go | 58 + src/cmd/internal/obj/util.go | 673 ++ src/cmd/internal/obj/wasm/a.out.go | 342 + src/cmd/internal/obj/wasm/anames.go | 218 + src/cmd/internal/obj/wasm/wasmobj.go | 1201 +++ src/cmd/internal/obj/x86/a.out.go | 426 ++ src/cmd/internal/obj/x86/aenum.go | 1609 ++++ src/cmd/internal/obj/x86/anames.go | 1607 ++++ src/cmd/internal/obj/x86/asm6.go | 5441 ++++++++++++++ src/cmd/internal/obj/x86/asm_test.go | 291 + src/cmd/internal/obj/x86/avx_optabs.go | 4628 ++++++++++++ src/cmd/internal/obj/x86/evex.go | 383 + src/cmd/internal/obj/x86/list6.go | 264 + src/cmd/internal/obj/x86/obj6.go | 1457 ++++ src/cmd/internal/obj/x86/obj6_test.go | 167 + src/cmd/internal/obj/x86/pcrelative_test.go | 105 + src/cmd/internal/obj/x86/ytab.go | 44 + 91 files changed, 73051 insertions(+) create mode 100644 src/cmd/internal/obj/abi_string.go create mode 100644 src/cmd/internal/obj/addrtype_string.go create mode 100644 src/cmd/internal/obj/arm/a.out.go create mode 100644 src/cmd/internal/obj/arm/anames.go create mode 100644 src/cmd/internal/obj/arm/anames5.go create mode 100644 src/cmd/internal/obj/arm/asm5.go create mode 100644 src/cmd/internal/obj/arm/list5.go create mode 100644 src/cmd/internal/obj/arm/obj5.go create mode 100644 src/cmd/internal/obj/arm64/a.out.go create mode 100644 src/cmd/internal/obj/arm64/anames.go create mode 100644 src/cmd/internal/obj/arm64/anames7.go create mode 100644 src/cmd/internal/obj/arm64/asm7.go create mode 100644 src/cmd/internal/obj/arm64/asm_arm64_test.go create mode 100644 src/cmd/internal/obj/arm64/asm_arm64_test.s create mode 100644 src/cmd/internal/obj/arm64/doc.go create mode 100644 src/cmd/internal/obj/arm64/list7.go create mode 100644 src/cmd/internal/obj/arm64/obj7.go create mode 100644 src/cmd/internal/obj/arm64/specialoperand_string.go create mode 100644 src/cmd/internal/obj/arm64/sysRegEnc.go create mode 100644 src/cmd/internal/obj/data.go create mode 100644 src/cmd/internal/obj/dwarf.go create mode 100644 src/cmd/internal/obj/go.go create mode 100644 src/cmd/internal/obj/inl.go create mode 100644 src/cmd/internal/obj/ld.go create mode 100644 src/cmd/internal/obj/line.go create mode 100644 src/cmd/internal/obj/line_test.go create mode 100644 src/cmd/internal/obj/link.go create mode 100644 src/cmd/internal/obj/loong64/a.out.go create mode 100644 src/cmd/internal/obj/loong64/anames.go create mode 100644 src/cmd/internal/obj/loong64/asm.go create mode 100644 src/cmd/internal/obj/loong64/cnames.go create mode 100644 src/cmd/internal/obj/loong64/list.go create mode 100644 src/cmd/internal/obj/loong64/obj.go create mode 100644 src/cmd/internal/obj/mips/a.out.go create mode 100644 src/cmd/internal/obj/mips/anames.go create mode 100644 src/cmd/internal/obj/mips/anames0.go create mode 100644 src/cmd/internal/obj/mips/asm0.go create mode 100644 src/cmd/internal/obj/mips/list0.go create mode 100644 src/cmd/internal/obj/mips/obj0.go create mode 100644 src/cmd/internal/obj/objfile.go create mode 100644 src/cmd/internal/obj/objfile_test.go create mode 100644 src/cmd/internal/obj/pass.go create mode 100644 src/cmd/internal/obj/pcln.go create mode 100644 src/cmd/internal/obj/plist.go create mode 100644 src/cmd/internal/obj/ppc64/a.out.go create mode 100644 src/cmd/internal/obj/ppc64/anames.go create mode 100644 src/cmd/internal/obj/ppc64/anames9.go create mode 100644 src/cmd/internal/obj/ppc64/asm9.go create mode 100644 src/cmd/internal/obj/ppc64/asm9_gtables.go create mode 100644 src/cmd/internal/obj/ppc64/asm_test.go create mode 100644 src/cmd/internal/obj/ppc64/doc.go create mode 100644 src/cmd/internal/obj/ppc64/list9.go create mode 100644 src/cmd/internal/obj/ppc64/obj9.go create mode 100644 src/cmd/internal/obj/riscv/anames.go create mode 100644 src/cmd/internal/obj/riscv/asm_test.go create mode 100644 src/cmd/internal/obj/riscv/cpu.go create mode 100644 src/cmd/internal/obj/riscv/inst.go create mode 100644 src/cmd/internal/obj/riscv/list.go create mode 100644 src/cmd/internal/obj/riscv/obj.go create mode 100644 src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go create mode 100644 src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s create mode 100644 src/cmd/internal/obj/s390x/a.out.go create mode 100644 src/cmd/internal/obj/s390x/anames.go create mode 100644 src/cmd/internal/obj/s390x/anamesz.go create mode 100644 src/cmd/internal/obj/s390x/asmz.go create mode 100644 src/cmd/internal/obj/s390x/condition_code.go create mode 100644 src/cmd/internal/obj/s390x/listz.go create mode 100644 src/cmd/internal/obj/s390x/objz.go create mode 100644 src/cmd/internal/obj/s390x/rotate.go create mode 100644 src/cmd/internal/obj/s390x/rotate_test.go create mode 100644 src/cmd/internal/obj/s390x/vector.go create mode 100644 src/cmd/internal/obj/sizeof_test.go create mode 100644 src/cmd/internal/obj/stringer.go create mode 100644 src/cmd/internal/obj/sym.go create mode 100644 src/cmd/internal/obj/textflag.go create mode 100644 src/cmd/internal/obj/util.go create mode 100644 src/cmd/internal/obj/wasm/a.out.go create mode 100644 src/cmd/internal/obj/wasm/anames.go create mode 100644 src/cmd/internal/obj/wasm/wasmobj.go create mode 100644 src/cmd/internal/obj/x86/a.out.go create mode 100644 src/cmd/internal/obj/x86/aenum.go create mode 100644 src/cmd/internal/obj/x86/anames.go create mode 100644 src/cmd/internal/obj/x86/asm6.go create mode 100644 src/cmd/internal/obj/x86/asm_test.go create mode 100644 src/cmd/internal/obj/x86/avx_optabs.go create mode 100644 src/cmd/internal/obj/x86/evex.go create mode 100644 src/cmd/internal/obj/x86/list6.go create mode 100644 src/cmd/internal/obj/x86/obj6.go create mode 100644 src/cmd/internal/obj/x86/obj6_test.go create mode 100644 src/cmd/internal/obj/x86/pcrelative_test.go create mode 100644 src/cmd/internal/obj/x86/ytab.go (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/abi_string.go b/src/cmd/internal/obj/abi_string.go new file mode 100644 index 0000000..a439da3 --- /dev/null +++ b/src/cmd/internal/obj/abi_string.go @@ -0,0 +1,16 @@ +// Code generated by "stringer -type ABI"; DO NOT EDIT. + +package obj + +import "strconv" + +const _ABI_name = "ABI0ABIInternalABICount" + +var _ABI_index = [...]uint8{0, 4, 15, 23} + +func (i ABI) String() string { + if i >= ABI(len(_ABI_index)-1) { + return "ABI(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _ABI_name[_ABI_index[i]:_ABI_index[i+1]] +} diff --git a/src/cmd/internal/obj/addrtype_string.go b/src/cmd/internal/obj/addrtype_string.go new file mode 100644 index 0000000..e6277d3 --- /dev/null +++ b/src/cmd/internal/obj/addrtype_string.go @@ -0,0 +1,37 @@ +// Code generated by "stringer -type AddrType"; DO NOT EDIT. + +package obj + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TYPE_NONE-0] + _ = x[TYPE_BRANCH-1] + _ = x[TYPE_TEXTSIZE-2] + _ = x[TYPE_MEM-3] + _ = x[TYPE_CONST-4] + _ = x[TYPE_FCONST-5] + _ = x[TYPE_SCONST-6] + _ = x[TYPE_REG-7] + _ = x[TYPE_ADDR-8] + _ = x[TYPE_SHIFT-9] + _ = x[TYPE_REGREG-10] + _ = x[TYPE_REGREG2-11] + _ = x[TYPE_INDIR-12] + _ = x[TYPE_REGLIST-13] + _ = x[TYPE_SPECIAL-14] +} + +const _AddrType_name = "TYPE_NONETYPE_BRANCHTYPE_TEXTSIZETYPE_MEMTYPE_CONSTTYPE_FCONSTTYPE_SCONSTTYPE_REGTYPE_ADDRTYPE_SHIFTTYPE_REGREGTYPE_REGREG2TYPE_INDIRTYPE_REGLISTTYPE_SPECIAL" + +var _AddrType_index = [...]uint8{0, 9, 20, 33, 41, 51, 62, 73, 81, 90, 100, 111, 123, 133, 145, 157} + +func (i AddrType) String() string { + if i >= AddrType(len(_AddrType_index)-1) { + return "AddrType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _AddrType_name[_AddrType_index[i]:_AddrType_index[i+1]] +} diff --git a/src/cmd/internal/obj/arm/a.out.go b/src/cmd/internal/obj/arm/a.out.go new file mode 100644 index 0000000..fd695ad --- /dev/null +++ b/src/cmd/internal/obj/arm/a.out.go @@ -0,0 +1,410 @@ +// Inferno utils/5c/5.out.h +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/5c/5.out.h +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm + +import "cmd/internal/obj" + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p arm + +const ( + NSNAME = 8 + NSYM = 50 + NREG = 16 +) + +/* -1 disables use of REGARG */ +const ( + REGARG = -1 +) + +const ( + REG_R0 = obj.RBaseARM + iota // must be 16-aligned + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + + REG_F0 // must be 16-aligned + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + + REG_FPSR // must be 2-aligned + REG_FPCR + + REG_CPSR // must be 2-aligned + REG_SPSR + + REGRET = REG_R0 + /* compiler allocates R1 up as temps */ + /* compiler allocates register variables R3 up */ + /* compiler allocates external registers R10 down */ + REGEXT = REG_R10 + /* these two registers are declared in runtime.h */ + REGG = REGEXT - 0 + REGM = REGEXT - 1 + + REGCTXT = REG_R7 + REGTMP = REG_R11 + REGSP = REG_R13 + REGLINK = REG_R14 + REGPC = REG_R15 + + NFREG = 16 + /* compiler allocates register variables F0 up */ + /* compiler allocates external registers F7 down */ + FREGRET = REG_F0 + FREGEXT = REG_F7 + FREGTMP = REG_F15 +) + +// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040b/IHI0040B_aadwarf.pdf +var ARMDWARFRegisters = map[int16]int16{} + +func init() { + // f assigns dwarfregisters[from:to] = (base):(step*(to-from)+base) + f := func(from, to, base, step int16) { + for r := int16(from); r <= to; r++ { + ARMDWARFRegisters[r] = step*(r-from) + base + } + } + f(REG_R0, REG_R15, 0, 1) + f(REG_F0, REG_F15, 64, 2) // Use d0 through D15, aka S0, S2, ..., S30 +} + +// Special registers, after subtracting obj.RBaseARM, bit 9 indicates +// a special register and the low bits select the register. +const ( + REG_SPECIAL = obj.RBaseARM + 1<<9 + iota + REG_MB_SY + REG_MB_ST + REG_MB_ISH + REG_MB_ISHST + REG_MB_NSH + REG_MB_NSHST + REG_MB_OSH + REG_MB_OSHST + + MAXREG +) + +const ( + C_NONE = iota + C_REG + C_REGREG + C_REGREG2 + C_REGLIST + C_SHIFT /* register shift R>>x */ + C_SHIFTADDR /* memory address with shifted offset R>>x(R) */ + C_FREG + C_PSR + C_FCR + C_SPR /* REG_MB_SY */ + + C_RCON /* 0xff rotated */ + C_NCON /* ~RCON */ + C_RCON2A /* OR of two disjoint C_RCON constants */ + C_RCON2S /* subtraction of two disjoint C_RCON constants */ + C_SCON /* 0xffff */ + C_LCON + C_LCONADDR + C_ZFCON + C_SFCON + C_LFCON + + C_RACON /* <=0xff rotated constant offset from auto */ + C_LACON /* Large Auto CONstant, i.e. large offset from SP */ + + C_SBRA + C_LBRA + + C_HAUTO /* halfword insn offset (-0xff to 0xff) */ + C_FAUTO /* float insn offset (0 to 0x3fc, word aligned) */ + C_HFAUTO /* both H and F */ + C_SAUTO /* -0xfff to 0xfff */ + C_LAUTO + + C_HOREG + C_FOREG + C_HFOREG + C_SOREG + C_ROREG + C_SROREG /* both nil and R */ + C_LOREG + + C_PC + C_SP + C_HREG + + C_ADDR /* reference to relocatable address */ + + // TLS "var" in local exec mode: will become a constant offset from + // thread local base that is ultimately chosen by the program linker. + C_TLS_LE + + // TLS "var" in initial exec mode: will become a memory address (chosen + // by the program linker) that the dynamic linker will fill with the + // offset from the thread local base. + C_TLS_IE + + C_TEXTSIZE + + C_GOK + + C_NCLASS /* must be the last */ +) + +const ( + AAND = obj.ABaseARM + obj.A_ARCHSPECIFIC + iota + AEOR + ASUB + ARSB + AADD + AADC + ASBC + ARSC + ATST + ATEQ + ACMP + ACMN + AORR + ABIC + + AMVN + + /* + * Do not reorder or fragment the conditional branch + * opcodes, or the predication code will break + */ + ABEQ + ABNE + ABCS + ABHS + ABCC + ABLO + ABMI + ABPL + ABVS + ABVC + ABHI + ABLS + ABGE + ABLT + ABGT + ABLE + + AMOVWD + AMOVWF + AMOVDW + AMOVFW + AMOVFD + AMOVDF + AMOVF + AMOVD + + ACMPF + ACMPD + AADDF + AADDD + ASUBF + ASUBD + AMULF + AMULD + ANMULF + ANMULD + AMULAF + AMULAD + ANMULAF + ANMULAD + AMULSF + AMULSD + ANMULSF + ANMULSD + AFMULAF + AFMULAD + AFNMULAF + AFNMULAD + AFMULSF + AFMULSD + AFNMULSF + AFNMULSD + ADIVF + ADIVD + ASQRTF + ASQRTD + AABSF + AABSD + ANEGF + ANEGD + + ASRL + ASRA + ASLL + AMULU + ADIVU + AMUL + AMMUL + ADIV + AMOD + AMODU + ADIVHW + ADIVUHW + + AMOVB + AMOVBS + AMOVBU + AMOVH + AMOVHS + AMOVHU + AMOVW + AMOVM + ASWPBU + ASWPW + + ARFE + ASWI + AMULA + AMULS + AMMULA + AMMULS + + AWORD + + AMULL + AMULAL + AMULLU + AMULALU + + ABX + ABXRET + ADWORD + + ALDREX + ASTREX + ALDREXD + ASTREXD + + ADMB + + APLD + + ACLZ + AREV + AREV16 + AREVSH + ARBIT + + AXTAB + AXTAH + AXTABU + AXTAHU + + ABFX + ABFXU + ABFC + ABFI + + AMULWT + AMULWB + AMULBB + AMULAWT + AMULAWB + AMULABB + + AMRC // MRC/MCR + + ALAST + + // aliases + AB = obj.AJMP + ABL = obj.ACALL +) + +/* scond byte */ +const ( + C_SCOND = (1 << 4) - 1 + C_SBIT = 1 << 4 + C_PBIT = 1 << 5 + C_WBIT = 1 << 6 + C_FBIT = 1 << 7 /* psr flags-only */ + C_UBIT = 1 << 7 /* up bit, unsigned bit */ + + // These constants are the ARM condition codes encodings, + // XORed with 14 so that C_SCOND_NONE has value 0, + // so that a zeroed Prog.scond means "always execute". + C_SCOND_XOR = 14 + + C_SCOND_EQ = 0 ^ C_SCOND_XOR + C_SCOND_NE = 1 ^ C_SCOND_XOR + C_SCOND_HS = 2 ^ C_SCOND_XOR + C_SCOND_LO = 3 ^ C_SCOND_XOR + C_SCOND_MI = 4 ^ C_SCOND_XOR + C_SCOND_PL = 5 ^ C_SCOND_XOR + C_SCOND_VS = 6 ^ C_SCOND_XOR + C_SCOND_VC = 7 ^ C_SCOND_XOR + C_SCOND_HI = 8 ^ C_SCOND_XOR + C_SCOND_LS = 9 ^ C_SCOND_XOR + C_SCOND_GE = 10 ^ C_SCOND_XOR + C_SCOND_LT = 11 ^ C_SCOND_XOR + C_SCOND_GT = 12 ^ C_SCOND_XOR + C_SCOND_LE = 13 ^ C_SCOND_XOR + C_SCOND_NONE = 14 ^ C_SCOND_XOR + C_SCOND_NV = 15 ^ C_SCOND_XOR + + /* D_SHIFT type */ + SHIFT_LL = 0 << 5 + SHIFT_LR = 1 << 5 + SHIFT_AR = 2 << 5 + SHIFT_RR = 3 << 5 +) diff --git a/src/cmd/internal/obj/arm/anames.go b/src/cmd/internal/obj/arm/anames.go new file mode 100644 index 0000000..f5e92de --- /dev/null +++ b/src/cmd/internal/obj/arm/anames.go @@ -0,0 +1,144 @@ +// Code generated by stringer -i a.out.go -o anames.go -p arm; DO NOT EDIT. + +package arm + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "AND", + "EOR", + "SUB", + "RSB", + "ADD", + "ADC", + "SBC", + "RSC", + "TST", + "TEQ", + "CMP", + "CMN", + "ORR", + "BIC", + "MVN", + "BEQ", + "BNE", + "BCS", + "BHS", + "BCC", + "BLO", + "BMI", + "BPL", + "BVS", + "BVC", + "BHI", + "BLS", + "BGE", + "BLT", + "BGT", + "BLE", + "MOVWD", + "MOVWF", + "MOVDW", + "MOVFW", + "MOVFD", + "MOVDF", + "MOVF", + "MOVD", + "CMPF", + "CMPD", + "ADDF", + "ADDD", + "SUBF", + "SUBD", + "MULF", + "MULD", + "NMULF", + "NMULD", + "MULAF", + "MULAD", + "NMULAF", + "NMULAD", + "MULSF", + "MULSD", + "NMULSF", + "NMULSD", + "FMULAF", + "FMULAD", + "FNMULAF", + "FNMULAD", + "FMULSF", + "FMULSD", + "FNMULSF", + "FNMULSD", + "DIVF", + "DIVD", + "SQRTF", + "SQRTD", + "ABSF", + "ABSD", + "NEGF", + "NEGD", + "SRL", + "SRA", + "SLL", + "MULU", + "DIVU", + "MUL", + "MMUL", + "DIV", + "MOD", + "MODU", + "DIVHW", + "DIVUHW", + "MOVB", + "MOVBS", + "MOVBU", + "MOVH", + "MOVHS", + "MOVHU", + "MOVW", + "MOVM", + "SWPBU", + "SWPW", + "RFE", + "SWI", + "MULA", + "MULS", + "MMULA", + "MMULS", + "WORD", + "MULL", + "MULAL", + "MULLU", + "MULALU", + "BX", + "BXRET", + "DWORD", + "LDREX", + "STREX", + "LDREXD", + "STREXD", + "DMB", + "PLD", + "CLZ", + "REV", + "REV16", + "REVSH", + "RBIT", + "XTAB", + "XTAH", + "XTABU", + "XTAHU", + "BFX", + "BFXU", + "BFC", + "BFI", + "MULWT", + "MULWB", + "MULBB", + "MULAWT", + "MULAWB", + "MULABB", + "MRC", + "LAST", +} diff --git a/src/cmd/internal/obj/arm/anames5.go b/src/cmd/internal/obj/arm/anames5.go new file mode 100644 index 0000000..78fcd55 --- /dev/null +++ b/src/cmd/internal/obj/arm/anames5.go @@ -0,0 +1,77 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package arm + +var cnames5 = []string{ + "NONE", + "REG", + "REGREG", + "REGREG2", + "REGLIST", + "SHIFT", + "SHIFTADDR", + "FREG", + "PSR", + "FCR", + "SPR", + "RCON", + "NCON", + "RCON2A", + "RCON2S", + "SCON", + "LCON", + "LCONADDR", + "ZFCON", + "SFCON", + "LFCON", + "RACON", + "LACON", + "SBRA", + "LBRA", + "HAUTO", + "FAUTO", + "HFAUTO", + "SAUTO", + "LAUTO", + "HOREG", + "FOREG", + "HFOREG", + "SOREG", + "ROREG", + "SROREG", + "LOREG", + "PC", + "SP", + "HREG", + "ADDR", + "C_TLS_LE", + "C_TLS_IE", + "TEXTSIZE", + "GOK", + "NCLASS", + "SCOND = (1<<4)-1", + "SBIT = 1<<4", + "PBIT = 1<<5", + "WBIT = 1<<6", + "FBIT = 1<<7", + "UBIT = 1<<7", + "SCOND_XOR = 14", + "SCOND_EQ = 0 ^ C_SCOND_XOR", + "SCOND_NE = 1 ^ C_SCOND_XOR", + "SCOND_HS = 2 ^ C_SCOND_XOR", + "SCOND_LO = 3 ^ C_SCOND_XOR", + "SCOND_MI = 4 ^ C_SCOND_XOR", + "SCOND_PL = 5 ^ C_SCOND_XOR", + "SCOND_VS = 6 ^ C_SCOND_XOR", + "SCOND_VC = 7 ^ C_SCOND_XOR", + "SCOND_HI = 8 ^ C_SCOND_XOR", + "SCOND_LS = 9 ^ C_SCOND_XOR", + "SCOND_GE = 10 ^ C_SCOND_XOR", + "SCOND_LT = 11 ^ C_SCOND_XOR", + "SCOND_GT = 12 ^ C_SCOND_XOR", + "SCOND_LE = 13 ^ C_SCOND_XOR", + "SCOND_NONE = 14 ^ C_SCOND_XOR", + "SCOND_NV = 15 ^ C_SCOND_XOR", +} diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go new file mode 100644 index 0000000..7b16827 --- /dev/null +++ b/src/cmd/internal/obj/arm/asm5.go @@ -0,0 +1,3094 @@ +// Inferno utils/5l/span.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/5l/span.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "fmt" + "internal/buildcfg" + "log" + "math" + "sort" +) + +// ctxt5 holds state while assembling a single function. +// Each function gets a fresh ctxt5. +// This allows for multiple functions to be safely concurrently assembled. +type ctxt5 struct { + ctxt *obj.Link + newprog obj.ProgAlloc + cursym *obj.LSym + printp *obj.Prog + blitrl *obj.Prog + elitrl *obj.Prog + autosize int64 + instoffset int64 + pc int64 + pool struct { + start uint32 + size uint32 + extra uint32 + } +} + +type Optab struct { + as obj.As + a1 uint8 + a2 int8 + a3 uint8 + type_ uint8 + size int8 + param int16 + flag int8 + pcrelsiz uint8 + scond uint8 // optional flags accepted by the instruction +} + +type Opcross [32][2][32]uint8 + +const ( + LFROM = 1 << 0 + LTO = 1 << 1 + LPOOL = 1 << 2 + LPCREL = 1 << 3 +) + +var optab = []Optab{ + /* struct Optab: + OPCODE, from, prog->reg, to, type, size, param, flag, extra data size, optional suffix */ + {obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0, 0}, + {AADD, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AAND, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AORR, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AORR, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {AMVN, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, C_SBIT}, + {ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0, 0}, + {AADD, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AADD, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AAND, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AAND, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AORR, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AORR, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, C_SBIT}, + {AMOVW, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, 0}, + {AMVN, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0, 0}, + {ACMP, C_RCON, C_REG, C_NONE, 2, 4, 0, 0, 0, 0}, + {AADD, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AADD, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AORR, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AORR, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {AMVN, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0, C_SBIT}, + {ACMP, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0, 0}, + {AMOVW, C_RACON, C_NONE, C_REG, 4, 4, REGSP, 0, 0, C_SBIT}, + {AB, C_NONE, C_NONE, C_SBRA, 5, 4, 0, LPOOL, 0, 0}, + {ABL, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, + {ABX, C_NONE, C_NONE, C_SBRA, 74, 20, 0, 0, 0, 0}, + {ABEQ, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, + {ABEQ, C_RCON, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, // prediction hinted form, hint ignored + {AB, C_NONE, C_NONE, C_ROREG, 6, 4, 0, LPOOL, 0, 0}, + {ABL, C_NONE, C_NONE, C_ROREG, 7, 4, 0, 0, 0, 0}, + {ABL, C_REG, C_NONE, C_ROREG, 7, 4, 0, 0, 0, 0}, + {ABX, C_NONE, C_NONE, C_ROREG, 75, 12, 0, 0, 0, 0}, + {ABXRET, C_NONE, C_NONE, C_ROREG, 76, 4, 0, 0, 0, 0}, + {ASLL, C_RCON, C_REG, C_REG, 8, 4, 0, 0, 0, C_SBIT}, + {ASLL, C_RCON, C_NONE, C_REG, 8, 4, 0, 0, 0, C_SBIT}, + {ASLL, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0, C_SBIT}, + {ASLL, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0, C_SBIT}, + {ASWI, C_NONE, C_NONE, C_NONE, 10, 4, 0, 0, 0, 0}, + {ASWI, C_NONE, C_NONE, C_LCON, 10, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_LCON, 11, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_LCONADDR, 11, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_ADDR, 11, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_TLS_LE, 103, 4, 0, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_TLS_IE, 104, 4, 0, 0, 0, 0}, + {AMOVW, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0, 0}, + {AMOVW, C_SCON, C_NONE, C_REG, 12, 4, 0, 0, 0, 0}, + {AMOVW, C_LCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0, 0}, + {AMOVW, C_LCONADDR, C_NONE, C_REG, 12, 4, 0, LFROM | LPCREL, 4, 0}, + {AMVN, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0, 0}, + {AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AAND, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AAND, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AORR, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AORR, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0, 0}, + {AADD, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AADD, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AAND, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AAND, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AORR, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AORR, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT}, + {AMVN, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0, 0}, + {ACMP, C_SCON, C_REG, C_NONE, 13, 8, 0, 0, 0, 0}, + {AADD, C_RCON2A, C_REG, C_REG, 106, 8, 0, 0, 0, 0}, + {AADD, C_RCON2A, C_NONE, C_REG, 106, 8, 0, 0, 0, 0}, + {AORR, C_RCON2A, C_REG, C_REG, 106, 8, 0, 0, 0, 0}, + {AORR, C_RCON2A, C_NONE, C_REG, 106, 8, 0, 0, 0, 0}, + {AADD, C_RCON2S, C_REG, C_REG, 107, 8, 0, 0, 0, 0}, + {AADD, C_RCON2S, C_NONE, C_REG, 107, 8, 0, 0, 0, 0}, + {AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AAND, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AAND, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AORR, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AORR, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0, C_SBIT}, + {AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0, 0}, + {ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0, 0}, + {AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, 0}, + {AMOVBS, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_REG, 58, 4, 0, 0, 0, 0}, + {AMOVH, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0, 0}, + {AMOVHS, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0, 0}, + {AMOVHU, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0, 0}, + {AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0, C_SBIT}, + {AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0, C_SBIT}, + {ADIV, C_REG, C_REG, C_REG, 16, 4, 0, 0, 0, 0}, + {ADIV, C_REG, C_NONE, C_REG, 16, 4, 0, 0, 0, 0}, + {ADIVHW, C_REG, C_REG, C_REG, 105, 4, 0, 0, 0, 0}, + {ADIVHW, C_REG, C_NONE, C_REG, 105, 4, 0, 0, 0, 0}, + {AMULL, C_REG, C_REG, C_REGREG, 17, 4, 0, 0, 0, C_SBIT}, + {ABFX, C_LCON, C_REG, C_REG, 18, 4, 0, 0, 0, 0}, // width in From, LSB in From3 + {ABFX, C_LCON, C_NONE, C_REG, 18, 4, 0, 0, 0, 0}, // width in From, LSB in From3 + {AMOVW, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_SAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_SOREG, C_NONE, C_REG, 21, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_SAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_SOREG, C_NONE, C_REG, 21, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AXTAB, C_SHIFT, C_REG, C_REG, 22, 4, 0, 0, 0, 0}, + {AXTAB, C_SHIFT, C_NONE, C_REG, 22, 4, 0, 0, 0, 0}, + {AMOVW, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, C_SBIT}, + {AMOVB, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVBS, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVBU, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVH, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVHS, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVHU, C_SHIFT, C_NONE, C_REG, 23, 4, 0, 0, 0, 0}, + {AMOVW, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_TLS_LE, C_NONE, C_REG, 101, 4, 0, LFROM, 0, 0}, + {AMOVW, C_TLS_IE, C_NONE, C_REG, 102, 8, 0, LFROM, 0, 0}, + {AMOVW, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_ADDR, C_NONE, C_REG, 65, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_ADDR, C_NONE, C_REG, 65, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_LACON, C_NONE, C_REG, 34, 8, REGSP, LFROM, 0, C_SBIT}, + {AMOVW, C_PSR, C_NONE, C_REG, 35, 4, 0, 0, 0, 0}, + {AMOVW, C_REG, C_NONE, C_PSR, 36, 4, 0, 0, 0, 0}, + {AMOVW, C_RCON, C_NONE, C_PSR, 37, 4, 0, 0, 0, 0}, + {AMOVM, C_REGLIST, C_NONE, C_SOREG, 38, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVM, C_SOREG, C_NONE, C_REGLIST, 39, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {ASWPW, C_SOREG, C_REG, C_REG, 40, 4, 0, 0, 0, 0}, + {ARFE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0, 0}, + {AMOVF, C_FREG, C_NONE, C_FAUTO, 50, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FREG, C_NONE, C_FOREG, 50, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FAUTO, C_NONE, C_FREG, 51, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FOREG, C_NONE, C_FREG, 51, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FREG, C_NONE, C_LAUTO, 52, 12, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FREG, C_NONE, C_LOREG, 52, 12, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_LAUTO, C_NONE, C_FREG, 53, 12, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_LOREG, C_NONE, C_FREG, 53, 12, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_FREG, C_NONE, C_ADDR, 68, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVF, C_ADDR, C_NONE, C_FREG, 69, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AADDF, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0, 0}, + {AADDF, C_FREG, C_FREG, C_FREG, 54, 4, 0, 0, 0, 0}, + {AMOVF, C_FREG, C_NONE, C_FREG, 55, 4, 0, 0, 0, 0}, + {ANEGF, C_FREG, C_NONE, C_FREG, 55, 4, 0, 0, 0, 0}, + {AMOVW, C_REG, C_NONE, C_FCR, 56, 4, 0, 0, 0, 0}, + {AMOVW, C_FCR, C_NONE, C_REG, 57, 4, 0, 0, 0, 0}, + {AMOVW, C_SHIFTADDR, C_NONE, C_REG, 59, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_SHIFTADDR, C_NONE, C_REG, 59, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_SHIFTADDR, C_NONE, C_REG, 60, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVW, C_REG, C_NONE, C_SHIFTADDR, 61, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_REG, C_NONE, C_SHIFTADDR, 61, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_REG, C_NONE, C_SHIFTADDR, 61, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBU, C_REG, C_NONE, C_SHIFTADDR, 61, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_SHIFTADDR, 62, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_SHIFTADDR, 62, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_SHIFTADDR, 62, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVB, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVBS, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVH, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHS, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0, C_PBIT | C_WBIT | C_UBIT}, + {AMOVHU, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4, C_PBIT | C_WBIT | C_UBIT}, + {ALDREX, C_SOREG, C_NONE, C_REG, 77, 4, 0, 0, 0, 0}, + {ASTREX, C_SOREG, C_REG, C_REG, 78, 4, 0, 0, 0, 0}, + {ADMB, C_NONE, C_NONE, C_NONE, 110, 4, 0, 0, 0, 0}, + {ADMB, C_LCON, C_NONE, C_NONE, 110, 4, 0, 0, 0, 0}, + {ADMB, C_SPR, C_NONE, C_NONE, 110, 4, 0, 0, 0, 0}, + {AMOVF, C_ZFCON, C_NONE, C_FREG, 80, 8, 0, 0, 0, 0}, + {AMOVF, C_SFCON, C_NONE, C_FREG, 81, 4, 0, 0, 0, 0}, + {ACMPF, C_FREG, C_FREG, C_NONE, 82, 8, 0, 0, 0, 0}, + {ACMPF, C_FREG, C_NONE, C_NONE, 83, 8, 0, 0, 0, 0}, + {AMOVFW, C_FREG, C_NONE, C_FREG, 84, 4, 0, 0, 0, C_UBIT}, + {AMOVWF, C_FREG, C_NONE, C_FREG, 85, 4, 0, 0, 0, C_UBIT}, + {AMOVFW, C_FREG, C_NONE, C_REG, 86, 8, 0, 0, 0, C_UBIT}, + {AMOVWF, C_REG, C_NONE, C_FREG, 87, 8, 0, 0, 0, C_UBIT}, + {AMOVW, C_REG, C_NONE, C_FREG, 88, 4, 0, 0, 0, 0}, + {AMOVW, C_FREG, C_NONE, C_REG, 89, 4, 0, 0, 0, 0}, + {ALDREXD, C_SOREG, C_NONE, C_REG, 91, 4, 0, 0, 0, 0}, + {ASTREXD, C_SOREG, C_REG, C_REG, 92, 4, 0, 0, 0, 0}, + {APLD, C_SOREG, C_NONE, C_NONE, 95, 4, 0, 0, 0, 0}, + {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 96, 4, 0, 0, 0, 0}, + {ACLZ, C_REG, C_NONE, C_REG, 97, 4, 0, 0, 0, 0}, + {AMULWT, C_REG, C_REG, C_REG, 98, 4, 0, 0, 0, 0}, + {AMULA, C_REG, C_REG, C_REGREG2, 99, 4, 0, 0, 0, C_SBIT}, + {AMULAWT, C_REG, C_REG, C_REGREG2, 99, 4, 0, 0, 0, 0}, + {obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0, 0}, + {obj.AFUNCDATA, C_LCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0, 0}, + {obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, + {obj.ANOP, C_LCON, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_REG, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, + {obj.ANOP, C_FREG, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, + {obj.ADUFFZERO, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, // same as ABL + {obj.ADUFFCOPY, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, // same as ABL + {obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0, 0}, +} + +var mbOp = []struct { + reg int16 + enc uint32 +}{ + {REG_MB_SY, 15}, + {REG_MB_ST, 14}, + {REG_MB_ISH, 11}, + {REG_MB_ISHST, 10}, + {REG_MB_NSH, 7}, + {REG_MB_NSHST, 6}, + {REG_MB_OSH, 3}, + {REG_MB_OSHST, 2}, +} + +var oprange [ALAST & obj.AMask][]Optab + +var xcmp [C_GOK + 1][C_GOK + 1]bool + +var ( + symdiv *obj.LSym + symdivu *obj.LSym + symmod *obj.LSym + symmodu *obj.LSym +) + +// Note about encoding: Prog.scond holds the condition encoding, +// but XOR'ed with C_SCOND_XOR, so that C_SCOND_NONE == 0. +// The code that shifts the value << 28 has the responsibility +// for XORing with C_SCOND_XOR too. + +func checkSuffix(c *ctxt5, p *obj.Prog, o *Optab) { + if p.Scond&C_SBIT != 0 && o.scond&C_SBIT == 0 { + c.ctxt.Diag("invalid .S suffix: %v", p) + } + if p.Scond&C_PBIT != 0 && o.scond&C_PBIT == 0 { + c.ctxt.Diag("invalid .P suffix: %v", p) + } + if p.Scond&C_WBIT != 0 && o.scond&C_WBIT == 0 { + c.ctxt.Diag("invalid .W suffix: %v", p) + } + if p.Scond&C_UBIT != 0 && o.scond&C_UBIT == 0 { + c.ctxt.Diag("invalid .U suffix: %v", p) + } +} + +func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on arm") + ctxt.Retpoline = false // don't keep printing + } + + var p *obj.Prog + var op *obj.Prog + + p = cursym.Func().Text + if p == nil || p.Link == nil { // handle external functions and ELF section symbols + return + } + + if oprange[AAND&obj.AMask] == nil { + ctxt.Diag("arm ops not initialized, call arm.buildop first") + } + + c := ctxt5{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: p.To.Offset + 4} + pc := int32(0) + + op = p + p = p.Link + var m int + var o *Optab + for ; p != nil || c.blitrl != nil; op, p = p, p.Link { + if p == nil { + if c.checkpool(op, pc) { + p = op + continue + } + + // can't happen: blitrl is not nil, but checkpool didn't flushpool + ctxt.Diag("internal inconsistency") + + break + } + + p.Pc = int64(pc) + o = c.oplook(p) + m = int(o.size) + + if m%4 != 0 || p.Pc%4 != 0 { + ctxt.Diag("!pc invalid: %v size=%d", p, m) + } + + // must check literal pool here in case p generates many instructions + if c.blitrl != nil { + // Emit the constant pool just before p if p + // would push us over the immediate size limit. + if c.checkpool(op, pc+int32(m)) { + // Back up to the instruction just + // before the pool and continue with + // the first instruction of the pool. + p = op + continue + } + } + + if m == 0 && (p.As != obj.AFUNCDATA && p.As != obj.APCDATA && p.As != obj.ANOP) { + ctxt.Diag("zero-width instruction\n%v", p) + continue + } + + switch o.flag & (LFROM | LTO | LPOOL) { + case LFROM: + c.addpool(p, &p.From) + + case LTO: + c.addpool(p, &p.To) + + case LPOOL: + if p.Scond&C_SCOND == C_SCOND_NONE { + c.flushpool(p, 0, 0) + } + } + + if p.As == AMOVW && p.To.Type == obj.TYPE_REG && p.To.Reg == REGPC && p.Scond&C_SCOND == C_SCOND_NONE { + c.flushpool(p, 0, 0) + } + + pc += int32(m) + } + + c.cursym.Size = int64(pc) + + /* + * if any procedure is large enough to + * generate a large SBRA branch, then + * generate extra passes putting branches + * around jmps to fix. this is rare. + */ + times := 0 + + var bflag int + var opc int32 + var out [6 + 3]uint32 + for { + bflag = 0 + pc = 0 + times++ + c.cursym.Func().Text.Pc = 0 // force re-layout the code. + for p = c.cursym.Func().Text; p != nil; p = p.Link { + o = c.oplook(p) + if int64(pc) > p.Pc { + p.Pc = int64(pc) + } + + /* very large branches + if(o->type == 6 && p->pcond) { + otxt = p->pcond->pc - c; + if(otxt < 0) + otxt = -otxt; + if(otxt >= (1L<<17) - 10) { + q = emallocz(sizeof(Prog)); + q->link = p->link; + p->link = q; + q->as = AB; + q->to.type = TYPE_BRANCH; + q->pcond = p->pcond; + p->pcond = q; + q = emallocz(sizeof(Prog)); + q->link = p->link; + p->link = q; + q->as = AB; + q->to.type = TYPE_BRANCH; + q->pcond = q->link->link; + bflag = 1; + } + } + */ + opc = int32(p.Pc) + m = int(o.size) + if p.Pc != int64(opc) { + bflag = 1 + } + + //print("%v pc changed %d to %d in iter. %d\n", p, opc, (int32)p->pc, times); + pc = int32(p.Pc + int64(m)) + + if m%4 != 0 || p.Pc%4 != 0 { + ctxt.Diag("pc invalid: %v size=%d", p, m) + } + + if m/4 > len(out) { + ctxt.Diag("instruction size too large: %d > %d", m/4, len(out)) + } + if m == 0 && (p.As != obj.AFUNCDATA && p.As != obj.APCDATA && p.As != obj.ANOP) { + if p.As == obj.ATEXT { + c.autosize = p.To.Offset + 4 + continue + } + + ctxt.Diag("zero-width instruction\n%v", p) + continue + } + } + + c.cursym.Size = int64(pc) + if bflag == 0 { + break + } + } + + if pc%4 != 0 { + ctxt.Diag("sym->size=%d, invalid", pc) + } + + /* + * lay out the code. all the pc-relative code references, + * even cross-function, are resolved now; + * only data references need to be relocated. + * with more work we could leave cross-function + * code references to be relocated too, and then + * perhaps we'd be able to parallelize the span loop above. + */ + + p = c.cursym.Func().Text + c.autosize = p.To.Offset + 4 + c.cursym.Grow(c.cursym.Size) + + bp := c.cursym.P + pc = int32(p.Pc) // even p->link might need extra padding + var v int + for p = p.Link; p != nil; p = p.Link { + c.pc = p.Pc + o = c.oplook(p) + opc = int32(p.Pc) + c.asmout(p, o, out[:]) + m = int(o.size) + + if m%4 != 0 || p.Pc%4 != 0 { + ctxt.Diag("final stage: pc invalid: %v size=%d", p, m) + } + + if int64(pc) > p.Pc { + ctxt.Diag("PC padding invalid: want %#d, has %#d: %v", p.Pc, pc, p) + } + for int64(pc) != p.Pc { + // emit 0xe1a00000 (MOVW R0, R0) + bp[0] = 0x00 + bp = bp[1:] + + bp[0] = 0x00 + bp = bp[1:] + bp[0] = 0xa0 + bp = bp[1:] + bp[0] = 0xe1 + bp = bp[1:] + pc += 4 + } + + for i := 0; i < m/4; i++ { + v = int(out[i]) + bp[0] = byte(v) + bp = bp[1:] + bp[0] = byte(v >> 8) + bp = bp[1:] + bp[0] = byte(v >> 16) + bp = bp[1:] + bp[0] = byte(v >> 24) + bp = bp[1:] + } + + pc += int32(m) + } +} + +// checkpool flushes the literal pool when the first reference to +// it threatens to go out of range of a 12-bit PC-relative offset. +// +// nextpc is the tentative next PC at which the pool could be emitted. +// checkpool should be called *before* emitting the instruction that +// would cause the PC to reach nextpc. +// If nextpc is too far from the first pool reference, checkpool will +// flush the pool immediately after p. +// The caller should resume processing a p.Link. +func (c *ctxt5) checkpool(p *obj.Prog, nextpc int32) bool { + poolLast := nextpc + poolLast += 4 // the AB instruction to jump around the pool + poolLast += int32(c.pool.size) - 4 // the offset of the last pool entry + + refPC := int32(c.pool.start) // PC of the first pool reference + + v := poolLast - refPC - 8 // 12-bit PC-relative offset (see omvl) + + if c.pool.size >= 0xff0 || immaddr(v) == 0 { + return c.flushpool(p, 1, 0) + } else if p.Link == nil { + return c.flushpool(p, 2, 0) + } + return false +} + +func (c *ctxt5) flushpool(p *obj.Prog, skip int, force int) bool { + if c.blitrl != nil { + if skip != 0 { + if false && skip == 1 { + fmt.Printf("note: flush literal pool at %x: len=%d ref=%x\n", uint64(p.Pc+4), c.pool.size, c.pool.start) + } + q := c.newprog() + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.Link) + q.Link = c.blitrl + q.Pos = p.Pos + c.blitrl = q + } else if force == 0 && (p.Pc+int64(c.pool.size)-int64(c.pool.start) < 2048) { + return false + } + + // The line number for constant pool entries doesn't really matter. + // We set it to the line number of the preceding instruction so that + // there are no deltas to encode in the pc-line tables. + for q := c.blitrl; q != nil; q = q.Link { + q.Pos = p.Pos + } + + c.elitrl.Link = p.Link + p.Link = c.blitrl + + c.blitrl = nil /* BUG: should refer back to values until out-of-range */ + c.elitrl = nil + c.pool.size = 0 + c.pool.start = 0 + c.pool.extra = 0 + return true + } + + return false +} + +func (c *ctxt5) addpool(p *obj.Prog, a *obj.Addr) { + t := c.newprog() + t.As = AWORD + + switch c.aclass(a) { + default: + t.To.Offset = a.Offset + t.To.Sym = a.Sym + t.To.Type = a.Type + t.To.Name = a.Name + + if c.ctxt.Flag_shared && t.To.Sym != nil { + t.Rel = p + } + + case C_SROREG, + C_LOREG, + C_ROREG, + C_FOREG, + C_SOREG, + C_HOREG, + C_FAUTO, + C_SAUTO, + C_LAUTO, + C_LACON: + t.To.Type = obj.TYPE_CONST + t.To.Offset = c.instoffset + } + + if t.Rel == nil { + for q := c.blitrl; q != nil; q = q.Link { /* could hash on t.t0.offset */ + if q.Rel == nil && q.To == t.To { + p.Pool = q + return + } + } + } + + q := c.newprog() + *q = *t + q.Pc = int64(c.pool.size) + + if c.blitrl == nil { + c.blitrl = q + c.pool.start = uint32(p.Pc) + } else { + c.elitrl.Link = q + } + c.elitrl = q + c.pool.size += 4 + + // Store the link to the pool entry in Pool. + p.Pool = q +} + +func (c *ctxt5) regoff(a *obj.Addr) int32 { + c.instoffset = 0 + c.aclass(a) + return int32(c.instoffset) +} + +func immrot(v uint32) int32 { + for i := 0; i < 16; i++ { + if v&^0xff == 0 { + return int32(uint32(int32(i)<<8) | v | 1<<25) + } + v = v<<2 | v>>30 + } + + return 0 +} + +// immrot2a returns bits encoding the immediate constant fields of two instructions, +// such that the encoded constants x, y satisfy x|y==v, x&y==0. +// Returns 0,0 if no such decomposition of v exists. +func immrot2a(v uint32) (uint32, uint32) { + for i := uint(1); i < 32; i++ { + m := uint32(1<= 0 && v <= 0xfff { + return v&0xfff | 1<<24 | 1<<23 /* pre indexing */ /* pre indexing, up */ + } + if v >= -0xfff && v < 0 { + return -v&0xfff | 1<<24 /* pre indexing */ + } + return 0 +} + +func immfloat(v int32) bool { + return v&0xC03 == 0 /* offset will fit in floating-point load/store */ +} + +func immhalf(v int32) bool { + if v >= 0 && v <= 0xff { + return v|1<<24|1<<23 != 0 /* pre indexing */ /* pre indexing, up */ + } + if v >= -0xff && v < 0 { + return -v&0xff|1<<24 != 0 /* pre indexing */ + } + return false +} + +func (c *ctxt5) aclass(a *obj.Addr) int { + switch a.Type { + case obj.TYPE_NONE: + return C_NONE + + case obj.TYPE_REG: + c.instoffset = 0 + if REG_R0 <= a.Reg && a.Reg <= REG_R15 { + return C_REG + } + if REG_F0 <= a.Reg && a.Reg <= REG_F15 { + return C_FREG + } + if a.Reg == REG_FPSR || a.Reg == REG_FPCR { + return C_FCR + } + if a.Reg == REG_CPSR || a.Reg == REG_SPSR { + return C_PSR + } + if a.Reg >= REG_SPECIAL { + return C_SPR + } + return C_GOK + + case obj.TYPE_REGREG: + return C_REGREG + + case obj.TYPE_REGREG2: + return C_REGREG2 + + case obj.TYPE_REGLIST: + return C_REGLIST + + case obj.TYPE_SHIFT: + if a.Reg == 0 { + // register shift R>>i + return C_SHIFT + } else { + // memory address with shifted offset R>>i(R) + return C_SHIFTADDR + } + + case obj.TYPE_MEM: + switch a.Name { + case obj.NAME_EXTERN, + obj.NAME_GOTREF, + obj.NAME_STATIC: + if a.Sym == nil || a.Sym.Name == "" { + fmt.Printf("null sym external\n") + return C_GOK + } + + c.instoffset = 0 // s.b. unused but just in case + if a.Sym.Type == objabi.STLSBSS { + if c.ctxt.Flag_shared { + return C_TLS_IE + } else { + return C_TLS_LE + } + } + + return C_ADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = c.autosize + a.Offset + if t := immaddr(int32(c.instoffset)); t != 0 { + if immhalf(int32(c.instoffset)) { + if immfloat(t) { + return C_HFAUTO + } + return C_HAUTO + } + + if immfloat(t) { + return C_FAUTO + } + return C_SAUTO + } + + return C_LAUTO + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = c.autosize + a.Offset + 4 + if t := immaddr(int32(c.instoffset)); t != 0 { + if immhalf(int32(c.instoffset)) { + if immfloat(t) { + return C_HFAUTO + } + return C_HAUTO + } + + if immfloat(t) { + return C_FAUTO + } + return C_SAUTO + } + + return C_LAUTO + + case obj.NAME_NONE: + c.instoffset = a.Offset + if t := immaddr(int32(c.instoffset)); t != 0 { + if immhalf(int32(c.instoffset)) { /* n.b. that it will also satisfy immrot */ + if immfloat(t) { + return C_HFOREG + } + return C_HOREG + } + + if immfloat(t) { + return C_FOREG /* n.b. that it will also satisfy immrot */ + } + if immrot(uint32(c.instoffset)) != 0 { + return C_SROREG + } + if immhalf(int32(c.instoffset)) { + return C_HOREG + } + return C_SOREG + } + + if immrot(uint32(c.instoffset)) != 0 { + return C_ROREG + } + return C_LOREG + } + + return C_GOK + + case obj.TYPE_FCONST: + if c.chipzero5(a.Val.(float64)) >= 0 { + return C_ZFCON + } + if c.chipfloat5(a.Val.(float64)) >= 0 { + return C_SFCON + } + return C_LFCON + + case obj.TYPE_TEXTSIZE: + return C_TEXTSIZE + + case obj.TYPE_CONST, + obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Reg != 0 { + return c.aconsize() + } + + if immrot(uint32(c.instoffset)) != 0 { + return C_RCON + } + if immrot(^uint32(c.instoffset)) != 0 { + return C_NCON + } + if uint32(c.instoffset) <= 0xffff && buildcfg.GOARM == 7 { + return C_SCON + } + if x, y := immrot2a(uint32(c.instoffset)); x != 0 && y != 0 { + return C_RCON2A + } + if y, x := immrot2s(uint32(c.instoffset)); x != 0 && y != 0 { + return C_RCON2S + } + return C_LCON + + case obj.NAME_EXTERN, + obj.NAME_GOTREF, + obj.NAME_STATIC: + s := a.Sym + if s == nil { + break + } + c.instoffset = 0 // s.b. unused but just in case + return C_LCONADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + c.instoffset = c.autosize + a.Offset + return c.aconsize() + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = c.autosize + a.Offset + 4 + return c.aconsize() + } + + return C_GOK + + case obj.TYPE_BRANCH: + return C_SBRA + } + + return C_GOK +} + +func (c *ctxt5) aconsize() int { + if immrot(uint32(c.instoffset)) != 0 { + return C_RACON + } + if immrot(uint32(-c.instoffset)) != 0 { + return C_RACON + } + return C_LACON +} + +func (c *ctxt5) oplook(p *obj.Prog) *Optab { + a1 := int(p.Optab) + if a1 != 0 { + return &optab[a1-1] + } + a1 = int(p.From.Class) + if a1 == 0 { + a1 = c.aclass(&p.From) + 1 + p.From.Class = int8(a1) + } + + a1-- + a3 := int(p.To.Class) + if a3 == 0 { + a3 = c.aclass(&p.To) + 1 + p.To.Class = int8(a3) + } + + a3-- + a2 := C_NONE + if p.Reg != 0 { + switch { + case REG_F0 <= p.Reg && p.Reg <= REG_F15: + a2 = C_FREG + case REG_R0 <= p.Reg && p.Reg <= REG_R15: + a2 = C_REG + default: + c.ctxt.Diag("invalid register in %v", p) + } + } + + // check illegal base register + switch a1 { + case C_SOREG, C_LOREG, C_HOREG, C_FOREG, C_ROREG, C_HFOREG, C_SROREG, C_SHIFTADDR: + if p.From.Reg < REG_R0 || REG_R15 < p.From.Reg { + c.ctxt.Diag("illegal base register: %v", p) + } + default: + } + switch a3 { + case C_SOREG, C_LOREG, C_HOREG, C_FOREG, C_ROREG, C_HFOREG, C_SROREG, C_SHIFTADDR: + if p.To.Reg < REG_R0 || REG_R15 < p.To.Reg { + c.ctxt.Diag("illegal base register: %v", p) + } + default: + } + + // If current instruction has a .S suffix (flags update), + // we must use the constant pool instead of splitting it. + if (a1 == C_RCON2A || a1 == C_RCON2S) && p.Scond&C_SBIT != 0 { + a1 = C_LCON + } + if (a3 == C_RCON2A || a3 == C_RCON2S) && p.Scond&C_SBIT != 0 { + a3 = C_LCON + } + + if false { /*debug['O']*/ + fmt.Printf("oplook %v %v %v %v\n", p.As, DRconv(a1), DRconv(a2), DRconv(a3)) + fmt.Printf("\t\t%d %d\n", p.From.Type, p.To.Type) + } + + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] + c3 := &xcmp[a3] + for i := range ops { + op := &ops[i] + if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + checkSuffix(c, p, op) + return op + } + } + + c.ctxt.Diag("illegal combination %v; %v %v %v; from %d %d; to %d %d", p, DRconv(a1), DRconv(a2), DRconv(a3), p.From.Type, p.From.Name, p.To.Type, p.To.Name) + if ops == nil { + ops = optab + } + return &ops[0] +} + +func cmp(a int, b int) bool { + if a == b { + return true + } + switch a { + case C_LCON: + if b == C_RCON || b == C_NCON || b == C_SCON || b == C_RCON2A || b == C_RCON2S { + return true + } + + case C_LACON: + if b == C_RACON { + return true + } + + case C_LFCON: + if b == C_ZFCON || b == C_SFCON { + return true + } + + case C_HFAUTO: + return b == C_HAUTO || b == C_FAUTO + + case C_FAUTO, C_HAUTO: + return b == C_HFAUTO + + case C_SAUTO: + return cmp(C_HFAUTO, b) + + case C_LAUTO: + return cmp(C_SAUTO, b) + + case C_HFOREG: + return b == C_HOREG || b == C_FOREG + + case C_FOREG, C_HOREG: + return b == C_HFOREG + + case C_SROREG: + return cmp(C_SOREG, b) || cmp(C_ROREG, b) + + case C_SOREG, C_ROREG: + return b == C_SROREG || cmp(C_HFOREG, b) + + case C_LOREG: + return cmp(C_SROREG, b) + + case C_LBRA: + if b == C_SBRA { + return true + } + + case C_HREG: + return cmp(C_SP, b) || cmp(C_PC, b) + } + + return false +} + +type ocmp []Optab + +func (x ocmp) Len() int { + return len(x) +} + +func (x ocmp) Swap(i, j int) { + x[i], x[j] = x[j], x[i] +} + +func (x ocmp) Less(i, j int) bool { + p1 := &x[i] + p2 := &x[j] + n := int(p1.as) - int(p2.as) + if n != 0 { + return n < 0 + } + n = int(p1.a1) - int(p2.a1) + if n != 0 { + return n < 0 + } + n = int(p1.a2) - int(p2.a2) + if n != 0 { + return n < 0 + } + n = int(p1.a3) - int(p2.a3) + if n != 0 { + return n < 0 + } + return false +} + +func opset(a, b0 obj.As) { + oprange[a&obj.AMask] = oprange[b0] +} + +func buildop(ctxt *obj.Link) { + if oprange[AAND&obj.AMask] != nil { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + symdiv = ctxt.Lookup("runtime._div") + symdivu = ctxt.Lookup("runtime._divu") + symmod = ctxt.Lookup("runtime._mod") + symmodu = ctxt.Lookup("runtime._modu") + + var n int + + for i := 0; i < C_GOK; i++ { + for n = 0; n < C_GOK; n++ { + if cmp(n, i) { + xcmp[i][n] = true + } + } + } + for n = 0; optab[n].as != obj.AXXX; n++ { + if optab[n].flag&LPCREL != 0 { + if ctxt.Flag_shared { + optab[n].size += int8(optab[n].pcrelsiz) + } else { + optab[n].flag &^= LPCREL + } + } + } + + sort.Sort(ocmp(optab[:n])) + for i := 0; i < n; i++ { + r := optab[i].as + r0 := r & obj.AMask + start := i + for optab[i].as == r { + i++ + } + oprange[r0] = optab[start:i] + i-- + + switch r { + default: + ctxt.Diag("unknown op in build: %v", r) + ctxt.DiagFlush() + log.Fatalf("bad code") + + case AADD: + opset(ASUB, r0) + opset(ARSB, r0) + opset(AADC, r0) + opset(ASBC, r0) + opset(ARSC, r0) + + case AORR: + opset(AEOR, r0) + opset(ABIC, r0) + + case ACMP: + opset(ATEQ, r0) + opset(ACMN, r0) + opset(ATST, r0) + + case AMVN: + break + + case ABEQ: + opset(ABNE, r0) + opset(ABCS, r0) + opset(ABHS, r0) + opset(ABCC, r0) + opset(ABLO, r0) + opset(ABMI, r0) + opset(ABPL, r0) + opset(ABVS, r0) + opset(ABVC, r0) + opset(ABHI, r0) + opset(ABLS, r0) + opset(ABGE, r0) + opset(ABLT, r0) + opset(ABGT, r0) + opset(ABLE, r0) + + case ASLL: + opset(ASRL, r0) + opset(ASRA, r0) + + case AMUL: + opset(AMULU, r0) + + case ADIV: + opset(AMOD, r0) + opset(AMODU, r0) + opset(ADIVU, r0) + + case ADIVHW: + opset(ADIVUHW, r0) + + case AMOVW, + AMOVB, + AMOVBS, + AMOVBU, + AMOVH, + AMOVHS, + AMOVHU: + break + + case ASWPW: + opset(ASWPBU, r0) + + case AB, + ABL, + ABX, + ABXRET, + obj.ADUFFZERO, + obj.ADUFFCOPY, + ASWI, + AWORD, + AMOVM, + ARFE, + obj.ATEXT: + break + + case AADDF: + opset(AADDD, r0) + opset(ASUBF, r0) + opset(ASUBD, r0) + opset(AMULF, r0) + opset(AMULD, r0) + opset(ANMULF, r0) + opset(ANMULD, r0) + opset(AMULAF, r0) + opset(AMULAD, r0) + opset(AMULSF, r0) + opset(AMULSD, r0) + opset(ANMULAF, r0) + opset(ANMULAD, r0) + opset(ANMULSF, r0) + opset(ANMULSD, r0) + opset(AFMULAF, r0) + opset(AFMULAD, r0) + opset(AFMULSF, r0) + opset(AFMULSD, r0) + opset(AFNMULAF, r0) + opset(AFNMULAD, r0) + opset(AFNMULSF, r0) + opset(AFNMULSD, r0) + opset(ADIVF, r0) + opset(ADIVD, r0) + + case ANEGF: + opset(ANEGD, r0) + opset(ASQRTF, r0) + opset(ASQRTD, r0) + opset(AMOVFD, r0) + opset(AMOVDF, r0) + opset(AABSF, r0) + opset(AABSD, r0) + + case ACMPF: + opset(ACMPD, r0) + + case AMOVF: + opset(AMOVD, r0) + + case AMOVFW: + opset(AMOVDW, r0) + + case AMOVWF: + opset(AMOVWD, r0) + + case AMULL: + opset(AMULAL, r0) + opset(AMULLU, r0) + opset(AMULALU, r0) + + case AMULWT: + opset(AMULWB, r0) + opset(AMULBB, r0) + opset(AMMUL, r0) + + case AMULAWT: + opset(AMULAWB, r0) + opset(AMULABB, r0) + opset(AMULS, r0) + opset(AMMULA, r0) + opset(AMMULS, r0) + + case ABFX: + opset(ABFXU, r0) + opset(ABFC, r0) + opset(ABFI, r0) + + case ACLZ: + opset(AREV, r0) + opset(AREV16, r0) + opset(AREVSH, r0) + opset(ARBIT, r0) + + case AXTAB: + opset(AXTAH, r0) + opset(AXTABU, r0) + opset(AXTAHU, r0) + + case ALDREX, + ASTREX, + ALDREXD, + ASTREXD, + ADMB, + APLD, + AAND, + AMULA, + obj.AUNDEF, + obj.AFUNCDATA, + obj.APCDATA, + obj.ANOP: + break + } + } +} + +func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) { + c.printp = p + o1 := uint32(0) + o2 := uint32(0) + o3 := uint32(0) + o4 := uint32(0) + o5 := uint32(0) + o6 := uint32(0) + if false { /*debug['P']*/ + fmt.Printf("%x: %v\ttype %d\n", uint32(p.Pc), p, o.type_) + } + switch o.type_ { + default: + c.ctxt.Diag("%v: unknown asm %d", p, o.type_) + + case 0: /* pseudo ops */ + if false { /*debug['G']*/ + fmt.Printf("%x: %s: arm\n", uint32(p.Pc), p.From.Sym.Name) + } + + case 1: /* op R,[R],R */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = 0 + } + if p.As == AMOVB || p.As == AMOVH || p.As == AMOVW || p.As == AMVN { + r = 0 + } else if r == 0 { + r = rt + } + o1 |= (uint32(rf)&15)<<0 | (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + + case 2: /* movbu $I,[R],R */ + c.aclass(&p.From) + + o1 = c.oprrr(p, p.As, int(p.Scond)) + o1 |= uint32(immrot(uint32(c.instoffset))) + rt := int(p.To.Reg) + r := int(p.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = 0 + } + if p.As == AMOVW || p.As == AMVN { + r = 0 + } else if r == 0 { + r = rt + } + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + + case 106: /* op $I,R,R where I can be decomposed into 2 immediates */ + c.aclass(&p.From) + r := int(p.Reg) + rt := int(p.To.Reg) + if r == 0 { + r = rt + } + x, y := immrot2a(uint32(c.instoffset)) + var as2 obj.As + switch p.As { + case AADD, ASUB, AORR, AEOR, ABIC: + as2 = p.As // ADD, SUB, ORR, EOR, BIC + case ARSB: + as2 = AADD // RSB -> RSB/ADD pair + case AADC: + as2 = AADD // ADC -> ADC/ADD pair + case ASBC: + as2 = ASUB // SBC -> SBC/SUB pair + case ARSC: + as2 = AADD // RSC -> RSC/ADD pair + default: + c.ctxt.Diag("unknown second op for %v", p) + } + o1 = c.oprrr(p, p.As, int(p.Scond)) + o2 = c.oprrr(p, as2, int(p.Scond)) + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + o2 |= (uint32(rt)&15)<<16 | (uint32(rt)&15)<<12 + o1 |= x + o2 |= y + + case 107: /* op $I,R,R where I can be decomposed into 2 immediates */ + c.aclass(&p.From) + r := int(p.Reg) + rt := int(p.To.Reg) + if r == 0 { + r = rt + } + y, x := immrot2s(uint32(c.instoffset)) + var as2 obj.As + switch p.As { + case AADD: + as2 = ASUB // ADD -> ADD/SUB pair + case ASUB: + as2 = AADD // SUB -> SUB/ADD pair + case ARSB: + as2 = ASUB // RSB -> RSB/SUB pair + case AADC: + as2 = ASUB // ADC -> ADC/SUB pair + case ASBC: + as2 = AADD // SBC -> SBC/ADD pair + case ARSC: + as2 = ASUB // RSC -> RSC/SUB pair + default: + c.ctxt.Diag("unknown second op for %v", p) + } + o1 = c.oprrr(p, p.As, int(p.Scond)) + o2 = c.oprrr(p, as2, int(p.Scond)) + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + o2 |= (uint32(rt)&15)<<16 | (uint32(rt)&15)<<12 + o1 |= y + o2 |= x + + case 3: /* add R<<[IR],[R],R */ + o1 = c.mov(p) + + case 4: /* MOVW $off(R), R -> add $off,[R],R */ + c.aclass(&p.From) + if c.instoffset < 0 { + o1 = c.oprrr(p, ASUB, int(p.Scond)) + o1 |= uint32(immrot(uint32(-c.instoffset))) + } else { + o1 = c.oprrr(p, AADD, int(p.Scond)) + o1 |= uint32(immrot(uint32(c.instoffset))) + } + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 |= (uint32(r) & 15) << 16 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 5: /* bra s */ + o1 = c.opbra(p, p.As, int(p.Scond)) + + v := int32(-8) + if p.To.Sym != nil { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + v += int32(p.To.Offset) + rel.Add = int64(o1) | (int64(v)>>2)&0xffffff + rel.Type = objabi.R_CALLARM + break + } + + if p.To.Target() != nil { + v = int32((p.To.Target().Pc - c.pc) - 8) + } + o1 |= (uint32(v) >> 2) & 0xffffff + + case 6: /* b ,O(R) -> add $O,R,PC */ + c.aclass(&p.To) + + o1 = c.oprrr(p, AADD, int(p.Scond)) + o1 |= uint32(immrot(uint32(c.instoffset))) + o1 |= (uint32(p.To.Reg) & 15) << 16 + o1 |= (REGPC & 15) << 12 + + case 7: /* bl (R) -> blx R */ + c.aclass(&p.To) + + if c.instoffset != 0 { + c.ctxt.Diag("%v: doesn't support BL offset(REG) with non-zero offset %d", p, c.instoffset) + } + o1 = c.oprrr(p, ABL, int(p.Scond)) + o1 |= (uint32(p.To.Reg) & 15) << 0 + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 0 + rel.Type = objabi.R_CALLIND + + case 8: /* sll $c,[R],R -> mov (R<<$c),R */ + c.aclass(&p.From) + + o1 = c.oprrr(p, p.As, int(p.Scond)) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 |= (uint32(r) & 15) << 0 + o1 |= uint32((c.instoffset & 31) << 7) + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 9: /* sll R,[R],R -> mov (R< 31 || width <= 0 || (lsb+width) > 32 { + c.ctxt.Diag("%v: wrong width or LSB", p) + } + switch p.As { + case ABFX, ABFXU: // (width-1) is encoded + o1 |= (uint32(r)&15)<<0 | (uint32(rt)&15)<<12 | uint32(lsb)<<7 | uint32(width-1)<<16 + case ABFC, ABFI: // MSB is encoded + o1 |= (uint32(r)&15)<<0 | (uint32(rt)&15)<<12 | uint32(lsb)<<7 | uint32(lsb+width-1)<<16 + default: + c.ctxt.Diag("illegal combination: %v", p) + } + + case 20: /* mov/movb/movbu R,O(R) */ + c.aclass(&p.To) + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.osr(p.As, int(p.From.Reg), int32(c.instoffset), r, int(p.Scond)) + + case 21: /* mov/movbu O(R),R -> lr */ + c.aclass(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.olr(int32(c.instoffset), r, int(p.To.Reg), int(p.Scond)) + if p.As != AMOVW { + o1 |= 1 << 22 + } + + case 22: /* XTAB R@>i, [R], R */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + switch p.From.Offset &^ 0xf { + // only 0/8/16/24 bits rotation is accepted + case SHIFT_RR, SHIFT_RR | 8<<7, SHIFT_RR | 16<<7, SHIFT_RR | 24<<7: + o1 |= uint32(p.From.Offset) & 0xc0f + default: + c.ctxt.Diag("illegal shift: %v", p) + } + rt := p.To.Reg + r := p.Reg + if r == 0 { + r = rt + } + o1 |= (uint32(rt)&15)<<12 | (uint32(r)&15)<<16 + + case 23: /* MOVW/MOVB/MOVH R@>i, R */ + switch p.As { + case AMOVW: + o1 = c.mov(p) + case AMOVBU, AMOVBS, AMOVB, AMOVHU, AMOVHS, AMOVH: + o1 = c.movxt(p) + default: + c.ctxt.Diag("illegal combination: %v", p) + } + + case 30: /* mov/movb/movbu R,L(R) */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.osrr(int(p.From.Reg), REGTMP&15, r, int(p.Scond)) + if p.As != AMOVW { + o2 |= 1 << 22 + } + + case 31: /* mov/movbu L(R),R -> lr[b] */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.olrr(REGTMP&15, r, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVBU || p.As == AMOVBS || p.As == AMOVB { + o2 |= 1 << 22 + } + + case 34: /* mov $lacon,R */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + + o2 = c.oprrr(p, AADD, int(p.Scond)) + o2 |= REGTMP & 15 + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 |= (uint32(r) & 15) << 16 + if p.To.Type != obj.TYPE_NONE { + o2 |= (uint32(p.To.Reg) & 15) << 12 + } + + case 35: /* mov PSR,R */ + o1 = 2<<23 | 0xf<<16 | 0<<0 + + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= (uint32(p.From.Reg) & 1) << 22 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 36: /* mov R,PSR */ + o1 = 2<<23 | 0x2cf<<12 | 0<<4 + + if p.Scond&C_FBIT != 0 { + o1 ^= 0x010 << 12 + } + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= (uint32(p.To.Reg) & 1) << 22 + o1 |= (uint32(p.From.Reg) & 15) << 0 + + case 37: /* mov $con,PSR */ + c.aclass(&p.From) + + o1 = 2<<23 | 0x2cf<<12 | 0<<4 + if p.Scond&C_FBIT != 0 { + o1 ^= 0x010 << 12 + } + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= uint32(immrot(uint32(c.instoffset))) + o1 |= (uint32(p.To.Reg) & 1) << 22 + o1 |= (uint32(p.From.Reg) & 15) << 0 + + case 38, 39: + switch o.type_ { + case 38: /* movm $con,oreg -> stm */ + o1 = 0x4 << 25 + + o1 |= uint32(p.From.Offset & 0xffff) + o1 |= (uint32(p.To.Reg) & 15) << 16 + c.aclass(&p.To) + + case 39: /* movm oreg,$con -> ldm */ + o1 = 0x4<<25 | 1<<20 + + o1 |= uint32(p.To.Offset & 0xffff) + o1 |= (uint32(p.From.Reg) & 15) << 16 + c.aclass(&p.From) + } + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in MOVM; %v", p) + } + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + if p.Scond&C_PBIT != 0 { + o1 |= 1 << 24 + } + if p.Scond&C_UBIT != 0 { + o1 |= 1 << 23 + } + if p.Scond&C_WBIT != 0 { + o1 |= 1 << 21 + } + + case 40: /* swp oreg,reg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in SWP") + } + o1 = 0x2<<23 | 0x9<<4 + if p.As != ASWPW { + o1 |= 1 << 22 + } + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 41: /* rfe -> movm.s.w.u 0(r13),[r15] */ + o1 = 0xe8fd8000 + + case 50: /* floating point store */ + v := c.regoff(&p.To) + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.ofsr(p.As, int(p.From.Reg), v, r, int(p.Scond), p) + + case 51: /* floating point load */ + v := c.regoff(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.ofsr(p.As, int(p.To.Reg), v, r, int(p.Scond), p) | 1<<20 + + case 52: /* floating point store, int32 offset UGLY */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.oprrr(p, AADD, int(p.Scond)) | (REGTMP&15)<<12 | (REGTMP&15)<<16 | (uint32(r)&15)<<0 + o3 = c.ofsr(p.As, int(p.From.Reg), 0, REGTMP, int(p.Scond), p) + + case 53: /* floating point load, int32 offset UGLY */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.oprrr(p, AADD, int(p.Scond)) | (REGTMP&15)<<12 | (REGTMP&15)<<16 | (uint32(r)&15)<<0 + o3 = c.ofsr(p.As, int(p.To.Reg), 0, (REGTMP&15), int(p.Scond), p) | 1<<20 + + case 54: /* floating point arith */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + switch p.As { + case AMULAD, AMULAF, AMULSF, AMULSD, ANMULAF, ANMULAD, ANMULSF, ANMULSD, + AFMULAD, AFMULAF, AFMULSF, AFMULSD, AFNMULAF, AFNMULAD, AFNMULSF, AFNMULSD: + c.ctxt.Diag("illegal combination: %v", p) + default: + r = rt + } + } + + o1 |= (uint32(rf)&15)<<0 | (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + + case 55: /* negf freg, freg */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + + o1 |= (uint32(rf)&15)<<0 | (uint32(rt)&15)<<12 + + case 56: /* move to FP[CS]R */ + o1 = ((uint32(p.Scond)&C_SCOND)^C_SCOND_XOR)<<28 | 0xee1<<16 | 0xa1<<4 + + o1 |= (uint32(p.From.Reg) & 15) << 12 + + case 57: /* move from FP[CS]R */ + o1 = ((uint32(p.Scond)&C_SCOND)^C_SCOND_XOR)<<28 | 0xef1<<16 | 0xa1<<4 + + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 58: /* movbu R,R */ + o1 = c.oprrr(p, AAND, int(p.Scond)) + + o1 |= uint32(immrot(0xff)) + rt := int(p.To.Reg) + r := int(p.From.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = 0 + } + if r == 0 { + r = rt + } + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + + case 59: /* movw/bu R< ldr indexed */ + if p.From.Reg == 0 { + c.ctxt.Diag("source operand is not a memory address: %v", p) + break + } + if p.From.Offset&(1<<4) != 0 { + c.ctxt.Diag("bad shift in LDR") + break + } + o1 = c.olrr(int(p.From.Offset), int(p.From.Reg), int(p.To.Reg), int(p.Scond)) + if p.As == AMOVBU { + o1 |= 1 << 22 + } + + case 60: /* movb R(R),R -> ldrsb indexed */ + if p.From.Reg == 0 { + c.ctxt.Diag("source operand is not a memory address: %v", p) + break + } + if p.From.Offset&(^0xf) != 0 { + c.ctxt.Diag("bad shift: %v", p) + break + } + o1 = c.olhrr(int(p.From.Offset), int(p.From.Reg), int(p.To.Reg), int(p.Scond)) + switch p.As { + case AMOVB, AMOVBS: + o1 ^= 1<<5 | 1<<6 + case AMOVH, AMOVHS: + o1 ^= 1 << 6 + default: + } + if p.Scond&C_UBIT != 0 { + o1 &^= 1 << 23 + } + + case 61: /* movw/b/bu R,R<<[IR](R) -> str indexed */ + if p.To.Reg == 0 { + c.ctxt.Diag("MOV to shifter operand") + } + o1 = c.osrr(int(p.From.Reg), int(p.To.Offset), int(p.To.Reg), int(p.Scond)) + if p.As == AMOVB || p.As == AMOVBS || p.As == AMOVBU { + o1 |= 1 << 22 + } + + case 62: /* MOVH/MOVHS/MOVHU Reg, Reg<<0(Reg) -> strh */ + if p.To.Reg == 0 { + c.ctxt.Diag("MOV to shifter operand") + } + if p.To.Offset&(^0xf) != 0 { + c.ctxt.Diag("bad shift: %v", p) + } + o1 = c.olhrr(int(p.To.Offset), int(p.To.Reg), int(p.From.Reg), int(p.Scond)) + o1 ^= 1 << 20 + if p.Scond&C_UBIT != 0 { + o1 &^= 1 << 23 + } + + /* reloc ops */ + case 64: /* mov/movb/movbu R,addr */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + o2 = c.osr(p.As, int(p.From.Reg), 0, REGTMP, int(p.Scond)) + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 65: /* mov/movbu addr,R */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + o2 = c.olr(0, REGTMP, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVBU || p.As == AMOVBS || p.As == AMOVB { + o2 |= 1 << 22 + } + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 101: /* movw tlsvar,R, local exec*/ + o1 = c.omvl(p, &p.From, int(p.To.Reg)) + + case 102: /* movw tlsvar,R, initial exec*/ + o1 = c.omvl(p, &p.From, int(p.To.Reg)) + o2 = c.olrr(int(p.To.Reg)&15, (REGPC & 15), int(p.To.Reg), int(p.Scond)) + + case 103: /* word tlsvar, local exec */ + if p.To.Sym == nil { + c.ctxt.Diag("nil sym in tls %v", p) + } + if p.To.Offset != 0 { + c.ctxt.Diag("offset against tls var in %v", p) + } + // This case happens with words generated in the PC stream as part of + // the literal c.pool. + rel := obj.Addrel(c.cursym) + + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Type = objabi.R_TLS_LE + o1 = 0 + + case 104: /* word tlsvar, initial exec */ + if p.To.Sym == nil { + c.ctxt.Diag("nil sym in tls %v", p) + } + if p.To.Offset != 0 { + c.ctxt.Diag("offset against tls var in %v", p) + } + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Type = objabi.R_TLS_IE + rel.Add = c.pc - p.Rel.Pc - 8 - int64(rel.Siz) + + case 68: /* floating point store -> ADDR */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + o2 = c.ofsr(p.As, int(p.From.Reg), 0, REGTMP, int(p.Scond), p) + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 69: /* floating point load <- ADDR */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + o2 = c.ofsr(p.As, int(p.To.Reg), 0, (REGTMP&15), int(p.Scond), p) | 1<<20 + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + /* ArmV4 ops: */ + case 70: /* movh/movhu R,O(R) -> strh */ + c.aclass(&p.To) + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.oshr(int(p.From.Reg), int32(c.instoffset), r, int(p.Scond)) + + case 71: /* movb/movh/movhu O(R),R -> ldrsb/ldrsh/ldrh */ + c.aclass(&p.From) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o1 = c.olhr(int32(c.instoffset), r, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVB || p.As == AMOVBS { + o1 ^= 1<<5 | 1<<6 + } else if p.As == AMOVH || p.As == AMOVHS { + o1 ^= (1 << 6) + } + + case 72: /* movh/movhu R,L(R) -> strh */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.oshrr(int(p.From.Reg), REGTMP&15, r, int(p.Scond)) + + case 73: /* movb/movh/movhu L(R),R -> ldrsb/ldrsh/ldrh */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 = c.olhrr(REGTMP&15, r, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVB || p.As == AMOVBS { + o2 ^= 1<<5 | 1<<6 + } else if p.As == AMOVH || p.As == AMOVHS { + o2 ^= (1 << 6) + } + + case 74: /* bx $I */ + c.ctxt.Diag("ABX $I") + + case 75: /* bx O(R) */ + c.aclass(&p.To) + + if c.instoffset != 0 { + c.ctxt.Diag("non-zero offset in ABX") + } + + /* + o1 = c.oprrr(p, AADD, p->scond) | immrot(0) | ((REGPC&15)<<16) | ((REGLINK&15)<<12); // mov PC, LR + o2 = (((p->scond&C_SCOND) ^ C_SCOND_XOR)<<28) | (0x12fff<<8) | (1<<4) | ((p->to.reg&15) << 0); // BX R + */ + // p->to.reg may be REGLINK + o1 = c.oprrr(p, AADD, int(p.Scond)) + + o1 |= uint32(immrot(uint32(c.instoffset))) + o1 |= (uint32(p.To.Reg) & 15) << 16 + o1 |= (REGTMP & 15) << 12 + o2 = c.oprrr(p, AADD, int(p.Scond)) | uint32(immrot(0)) | (REGPC&15)<<16 | (REGLINK&15)<<12 // mov PC, LR + o3 = ((uint32(p.Scond)&C_SCOND)^C_SCOND_XOR)<<28 | 0x12fff<<8 | 1<<4 | REGTMP&15 // BX Rtmp + + case 76: /* bx O(R) when returning from fn*/ + c.ctxt.Diag("ABXRET") + + case 77: /* ldrex oreg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in LDREX") + } + o1 = 0x19<<20 | 0xf9f + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 78: /* strex reg,oreg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in STREX") + } + if p.To.Reg == p.From.Reg || p.To.Reg == p.Reg { + c.ctxt.Diag("cannot use same register as both source and destination: %v", p) + } + o1 = 0x18<<20 | 0xf90 + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 80: /* fmov zfcon,freg */ + if p.As == AMOVD { + o1 = 0xeeb00b00 // VMOV imm 64 + o2 = c.oprrr(p, ASUBD, int(p.Scond)) + } else { + o1 = 0x0eb00a00 // VMOV imm 32 + o2 = c.oprrr(p, ASUBF, int(p.Scond)) + } + + v := int32(0x70) // 1.0 + r := (int(p.To.Reg) & 15) << 0 + + // movf $1.0, r + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + o1 |= (uint32(r) & 15) << 12 + o1 |= (uint32(v) & 0xf) << 0 + o1 |= (uint32(v) & 0xf0) << 12 + + // subf r,r,r + o2 |= (uint32(r)&15)<<0 | (uint32(r)&15)<<16 | (uint32(r)&15)<<12 + + case 81: /* fmov sfcon,freg */ + o1 = 0x0eb00a00 // VMOV imm 32 + if p.As == AMOVD { + o1 = 0xeeb00b00 // VMOV imm 64 + } + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= (uint32(p.To.Reg) & 15) << 12 + v := int32(c.chipfloat5(p.From.Val.(float64))) + o1 |= (uint32(v) & 0xf) << 0 + o1 |= (uint32(v) & 0xf0) << 12 + + case 82: /* fcmp freg,freg, */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.Reg)&15)<<12 | (uint32(p.From.Reg)&15)<<0 + o2 = 0x0ef1fa10 // VMRS R15 + o2 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 83: /* fcmp freg,, */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.From.Reg)&15)<<12 | 1<<16 + o2 = 0x0ef1fa10 // VMRS R15 + o2 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 84: /* movfw freg,freg - truncate float-to-fix */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 85: /* movwf freg,freg - fix-to-float */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + // macro for movfw freg,FTMP; movw FTMP,reg + case 86: /* movfw freg,reg - truncate float-to-fix */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 0 + o1 |= (FREGTMP & 15) << 12 + o2 = c.oprrr(p, -AMOVFW, int(p.Scond)) + o2 |= (FREGTMP & 15) << 16 + o2 |= (uint32(p.To.Reg) & 15) << 12 + + // macro for movw reg,FTMP; movwf FTMP,freg + case 87: /* movwf reg,freg - fix-to-float */ + o1 = c.oprrr(p, -AMOVWF, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 12 + o1 |= (FREGTMP & 15) << 16 + o2 = c.oprrr(p, p.As, int(p.Scond)) + o2 |= (FREGTMP & 15) << 0 + o2 |= (uint32(p.To.Reg) & 15) << 12 + + case 88: /* movw reg,freg */ + o1 = c.oprrr(p, -AMOVWF, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 12 + o1 |= (uint32(p.To.Reg) & 15) << 16 + + case 89: /* movw freg,reg */ + o1 = c.oprrr(p, -AMOVFW, int(p.Scond)) + + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.To.Reg) & 15) << 12 + + case 91: /* ldrexd oreg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in LDREX") + } + o1 = 0x1b<<20 | 0xf9f + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 92: /* strexd reg,oreg,reg */ + c.aclass(&p.From) + + if c.instoffset != 0 { + c.ctxt.Diag("offset must be zero in STREX") + } + if p.Reg&1 != 0 { + c.ctxt.Diag("source register must be even in STREXD: %v", p) + } + if p.To.Reg == p.From.Reg || p.To.Reg == p.Reg || p.To.Reg == p.Reg+1 { + c.ctxt.Diag("cannot use same register as both source and destination: %v", p) + } + o1 = 0x1a<<20 | 0xf90 + o1 |= (uint32(p.From.Reg) & 15) << 16 + o1 |= (uint32(p.Reg) & 15) << 0 + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + + case 93: /* movb/movh/movhu addr,R -> ldrsb/ldrsh/ldrh */ + o1 = c.omvl(p, &p.From, REGTMP) + + if o1 == 0 { + break + } + o2 = c.olhr(0, REGTMP, int(p.To.Reg), int(p.Scond)) + if p.As == AMOVB || p.As == AMOVBS { + o2 ^= 1<<5 | 1<<6 + } else if p.As == AMOVH || p.As == AMOVHS { + o2 ^= (1 << 6) + } + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 94: /* movh/movhu R,addr -> strh */ + o1 = c.omvl(p, &p.To, REGTMP) + + if o1 == 0 { + break + } + o2 = c.oshr(int(p.From.Reg), 0, REGTMP, int(p.Scond)) + if o.flag&LPCREL != 0 { + o3 = o2 + o2 = c.oprrr(p, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12 + } + + case 95: /* PLD off(reg) */ + o1 = 0xf5d0f000 + + o1 |= (uint32(p.From.Reg) & 15) << 16 + if p.From.Offset < 0 { + o1 &^= (1 << 23) + o1 |= uint32((-p.From.Offset) & 0xfff) + } else { + o1 |= uint32(p.From.Offset & 0xfff) + } + + // This is supposed to be something that stops execution. + // It's not supposed to be reached, ever, but if it is, we'd + // like to be able to tell how we got there. Assemble as + // 0xf7fabcfd which is guaranteed to raise undefined instruction + // exception. + case 96: /* UNDEF */ + o1 = 0xf7fabcfd + + case 97: /* CLZ Rm, Rd */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.To.Reg) & 15) << 12 + o1 |= (uint32(p.From.Reg) & 15) << 0 + + case 98: /* MULW{T,B} Rs, Rm, Rd */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.To.Reg) & 15) << 16 + o1 |= (uint32(p.From.Reg) & 15) << 8 + o1 |= (uint32(p.Reg) & 15) << 0 + + case 99: /* MULAW{T,B} Rs, Rm, Rn, Rd */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + + o1 |= (uint32(p.To.Reg) & 15) << 16 + o1 |= (uint32(p.From.Reg) & 15) << 8 + o1 |= (uint32(p.Reg) & 15) << 0 + o1 |= uint32((p.To.Offset & 15) << 12) + + case 105: /* divhw r,[r,]r */ + o1 = c.oprrr(p, p.As, int(p.Scond)) + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 |= (uint32(rf)&15)<<8 | (uint32(r)&15)<<0 | (uint32(rt)&15)<<16 + + case 110: /* dmb [mbop | $con] */ + o1 = 0xf57ff050 + mbop := uint32(0) + + switch c.aclass(&p.From) { + case C_SPR: + for _, f := range mbOp { + if f.reg == p.From.Reg { + mbop = f.enc + break + } + } + case C_RCON: + for _, f := range mbOp { + enc := uint32(c.instoffset) + if f.enc == enc { + mbop = enc + break + } + } + case C_NONE: + mbop = 0xf + } + + if mbop == 0 { + c.ctxt.Diag("illegal mb option:\n%v", p) + } + o1 |= mbop + } + + out[0] = o1 + out[1] = o2 + out[2] = o3 + out[3] = o4 + out[4] = o5 + out[5] = o6 +} + +func (c *ctxt5) movxt(p *obj.Prog) uint32 { + o1 := ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + switch p.As { + case AMOVB, AMOVBS: + o1 |= 0x6af<<16 | 0x7<<4 + case AMOVH, AMOVHS: + o1 |= 0x6bf<<16 | 0x7<<4 + case AMOVBU: + o1 |= 0x6ef<<16 | 0x7<<4 + case AMOVHU: + o1 |= 0x6ff<<16 | 0x7<<4 + default: + c.ctxt.Diag("illegal combination: %v", p) + } + switch p.From.Offset &^ 0xf { + // only 0/8/16/24 bits rotation is accepted + case SHIFT_RR, SHIFT_RR | 8<<7, SHIFT_RR | 16<<7, SHIFT_RR | 24<<7: + o1 |= uint32(p.From.Offset) & 0xc0f + default: + c.ctxt.Diag("illegal shift: %v", p) + } + o1 |= (uint32(p.To.Reg) & 15) << 12 + return o1 +} + +func (c *ctxt5) mov(p *obj.Prog) uint32 { + c.aclass(&p.From) + o1 := c.oprrr(p, p.As, int(p.Scond)) + o1 |= uint32(p.From.Offset) + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = 0 + } + r := int(p.Reg) + if p.As == AMOVW || p.As == AMVN { + r = 0 + } else if r == 0 { + r = rt + } + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + return o1 +} + +func (c *ctxt5) oprrr(p *obj.Prog, a obj.As, sc int) uint32 { + o := ((uint32(sc) & C_SCOND) ^ C_SCOND_XOR) << 28 + if sc&C_SBIT != 0 { + o |= 1 << 20 + } + switch a { + case ADIVHW: + return o | 0x71<<20 | 0xf<<12 | 0x1<<4 + case ADIVUHW: + return o | 0x73<<20 | 0xf<<12 | 0x1<<4 + case AMMUL: + return o | 0x75<<20 | 0xf<<12 | 0x1<<4 + case AMULS: + return o | 0x6<<20 | 0x9<<4 + case AMMULA: + return o | 0x75<<20 | 0x1<<4 + case AMMULS: + return o | 0x75<<20 | 0xd<<4 + case AMULU, AMUL: + return o | 0x0<<21 | 0x9<<4 + case AMULA: + return o | 0x1<<21 | 0x9<<4 + case AMULLU: + return o | 0x4<<21 | 0x9<<4 + case AMULL: + return o | 0x6<<21 | 0x9<<4 + case AMULALU: + return o | 0x5<<21 | 0x9<<4 + case AMULAL: + return o | 0x7<<21 | 0x9<<4 + case AAND: + return o | 0x0<<21 + case AEOR: + return o | 0x1<<21 + case ASUB: + return o | 0x2<<21 + case ARSB: + return o | 0x3<<21 + case AADD: + return o | 0x4<<21 + case AADC: + return o | 0x5<<21 + case ASBC: + return o | 0x6<<21 + case ARSC: + return o | 0x7<<21 + case ATST: + return o | 0x8<<21 | 1<<20 + case ATEQ: + return o | 0x9<<21 | 1<<20 + case ACMP: + return o | 0xa<<21 | 1<<20 + case ACMN: + return o | 0xb<<21 | 1<<20 + case AORR: + return o | 0xc<<21 + + case AMOVB, AMOVH, AMOVW: + if sc&(C_PBIT|C_WBIT) != 0 { + c.ctxt.Diag("invalid .P/.W suffix: %v", p) + } + return o | 0xd<<21 + case ABIC: + return o | 0xe<<21 + case AMVN: + return o | 0xf<<21 + case ASLL: + return o | 0xd<<21 | 0<<5 + case ASRL: + return o | 0xd<<21 | 1<<5 + case ASRA: + return o | 0xd<<21 | 2<<5 + case ASWI: + return o | 0xf<<24 + + case AADDD: + return o | 0xe<<24 | 0x3<<20 | 0xb<<8 | 0<<4 + case AADDF: + return o | 0xe<<24 | 0x3<<20 | 0xa<<8 | 0<<4 + case ASUBD: + return o | 0xe<<24 | 0x3<<20 | 0xb<<8 | 4<<4 + case ASUBF: + return o | 0xe<<24 | 0x3<<20 | 0xa<<8 | 4<<4 + case AMULD: + return o | 0xe<<24 | 0x2<<20 | 0xb<<8 | 0<<4 + case AMULF: + return o | 0xe<<24 | 0x2<<20 | 0xa<<8 | 0<<4 + case ANMULD: + return o | 0xe<<24 | 0x2<<20 | 0xb<<8 | 0x4<<4 + case ANMULF: + return o | 0xe<<24 | 0x2<<20 | 0xa<<8 | 0x4<<4 + case AMULAD: + return o | 0xe<<24 | 0xb<<8 + case AMULAF: + return o | 0xe<<24 | 0xa<<8 + case AMULSD: + return o | 0xe<<24 | 0xb<<8 | 0x4<<4 + case AMULSF: + return o | 0xe<<24 | 0xa<<8 | 0x4<<4 + case ANMULAD: + return o | 0xe<<24 | 0x1<<20 | 0xb<<8 | 0x4<<4 + case ANMULAF: + return o | 0xe<<24 | 0x1<<20 | 0xa<<8 | 0x4<<4 + case ANMULSD: + return o | 0xe<<24 | 0x1<<20 | 0xb<<8 + case ANMULSF: + return o | 0xe<<24 | 0x1<<20 | 0xa<<8 + case AFMULAD: + return o | 0xe<<24 | 0xa<<20 | 0xb<<8 + case AFMULAF: + return o | 0xe<<24 | 0xa<<20 | 0xa<<8 + case AFMULSD: + return o | 0xe<<24 | 0xa<<20 | 0xb<<8 | 0x4<<4 + case AFMULSF: + return o | 0xe<<24 | 0xa<<20 | 0xa<<8 | 0x4<<4 + case AFNMULAD: + return o | 0xe<<24 | 0x9<<20 | 0xb<<8 | 0x4<<4 + case AFNMULAF: + return o | 0xe<<24 | 0x9<<20 | 0xa<<8 | 0x4<<4 + case AFNMULSD: + return o | 0xe<<24 | 0x9<<20 | 0xb<<8 + case AFNMULSF: + return o | 0xe<<24 | 0x9<<20 | 0xa<<8 + case ADIVD: + return o | 0xe<<24 | 0x8<<20 | 0xb<<8 | 0<<4 + case ADIVF: + return o | 0xe<<24 | 0x8<<20 | 0xa<<8 | 0<<4 + case ASQRTD: + return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xb<<8 | 0xc<<4 + case ASQRTF: + return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xa<<8 | 0xc<<4 + case AABSD: + return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xb<<8 | 0xc<<4 + case AABSF: + return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xa<<8 | 0xc<<4 + case ANEGD: + return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xb<<8 | 0x4<<4 + case ANEGF: + return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xa<<8 | 0x4<<4 + case ACMPD: + return o | 0xe<<24 | 0xb<<20 | 4<<16 | 0xb<<8 | 0xc<<4 + case ACMPF: + return o | 0xe<<24 | 0xb<<20 | 4<<16 | 0xa<<8 | 0xc<<4 + + case AMOVF: + return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xa<<8 | 4<<4 + case AMOVD: + return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xb<<8 | 4<<4 + + case AMOVDF: + return o | 0xe<<24 | 0xb<<20 | 7<<16 | 0xa<<8 | 0xc<<4 | 1<<8 // dtof + case AMOVFD: + return o | 0xe<<24 | 0xb<<20 | 7<<16 | 0xa<<8 | 0xc<<4 | 0<<8 // dtof + + case AMOVWF: + if sc&C_UBIT == 0 { + o |= 1 << 7 /* signed */ + } + return o | 0xe<<24 | 0xb<<20 | 8<<16 | 0xa<<8 | 4<<4 | 0<<18 | 0<<8 // toint, double + + case AMOVWD: + if sc&C_UBIT == 0 { + o |= 1 << 7 /* signed */ + } + return o | 0xe<<24 | 0xb<<20 | 8<<16 | 0xa<<8 | 4<<4 | 0<<18 | 1<<8 // toint, double + + case AMOVFW: + if sc&C_UBIT == 0 { + o |= 1 << 16 /* signed */ + } + return o | 0xe<<24 | 0xb<<20 | 8<<16 | 0xa<<8 | 4<<4 | 1<<18 | 0<<8 | 1<<7 // toint, double, trunc + + case AMOVDW: + if sc&C_UBIT == 0 { + o |= 1 << 16 /* signed */ + } + return o | 0xe<<24 | 0xb<<20 | 8<<16 | 0xa<<8 | 4<<4 | 1<<18 | 1<<8 | 1<<7 // toint, double, trunc + + case -AMOVWF: // copy WtoF + return o | 0xe<<24 | 0x0<<20 | 0xb<<8 | 1<<4 + + case -AMOVFW: // copy FtoW + return o | 0xe<<24 | 0x1<<20 | 0xb<<8 | 1<<4 + + case -ACMP: // cmp imm + return o | 0x3<<24 | 0x5<<20 + + case ABFX: + return o | 0x3d<<21 | 0x5<<4 + + case ABFXU: + return o | 0x3f<<21 | 0x5<<4 + + case ABFC: + return o | 0x3e<<21 | 0x1f + + case ABFI: + return o | 0x3e<<21 | 0x1<<4 + + case AXTAB: + return o | 0x6a<<20 | 0x7<<4 + + case AXTAH: + return o | 0x6b<<20 | 0x7<<4 + + case AXTABU: + return o | 0x6e<<20 | 0x7<<4 + + case AXTAHU: + return o | 0x6f<<20 | 0x7<<4 + + // CLZ doesn't support .nil + case ACLZ: + return o&(0xf<<28) | 0x16f<<16 | 0xf1<<4 + + case AREV: + return o&(0xf<<28) | 0x6bf<<16 | 0xf3<<4 + + case AREV16: + return o&(0xf<<28) | 0x6bf<<16 | 0xfb<<4 + + case AREVSH: + return o&(0xf<<28) | 0x6ff<<16 | 0xfb<<4 + + case ARBIT: + return o&(0xf<<28) | 0x6ff<<16 | 0xf3<<4 + + case AMULWT: + return o&(0xf<<28) | 0x12<<20 | 0xe<<4 + + case AMULWB: + return o&(0xf<<28) | 0x12<<20 | 0xa<<4 + + case AMULBB: + return o&(0xf<<28) | 0x16<<20 | 0x8<<4 + + case AMULAWT: + return o&(0xf<<28) | 0x12<<20 | 0xc<<4 + + case AMULAWB: + return o&(0xf<<28) | 0x12<<20 | 0x8<<4 + + case AMULABB: + return o&(0xf<<28) | 0x10<<20 | 0x8<<4 + + case ABL: // BLX REG + return o&(0xf<<28) | 0x12fff3<<4 + } + + c.ctxt.Diag("%v: bad rrr %d", p, a) + return 0 +} + +func (c *ctxt5) opbra(p *obj.Prog, a obj.As, sc int) uint32 { + sc &= C_SCOND + sc ^= C_SCOND_XOR + if a == ABL || a == obj.ADUFFZERO || a == obj.ADUFFCOPY { + return uint32(sc)<<28 | 0x5<<25 | 0x1<<24 + } + if sc != 0xe { + c.ctxt.Diag("%v: .COND on bcond instruction", p) + } + switch a { + case ABEQ: + return 0x0<<28 | 0x5<<25 + case ABNE: + return 0x1<<28 | 0x5<<25 + case ABCS: + return 0x2<<28 | 0x5<<25 + case ABHS: + return 0x2<<28 | 0x5<<25 + case ABCC: + return 0x3<<28 | 0x5<<25 + case ABLO: + return 0x3<<28 | 0x5<<25 + case ABMI: + return 0x4<<28 | 0x5<<25 + case ABPL: + return 0x5<<28 | 0x5<<25 + case ABVS: + return 0x6<<28 | 0x5<<25 + case ABVC: + return 0x7<<28 | 0x5<<25 + case ABHI: + return 0x8<<28 | 0x5<<25 + case ABLS: + return 0x9<<28 | 0x5<<25 + case ABGE: + return 0xa<<28 | 0x5<<25 + case ABLT: + return 0xb<<28 | 0x5<<25 + case ABGT: + return 0xc<<28 | 0x5<<25 + case ABLE: + return 0xd<<28 | 0x5<<25 + case AB: + return 0xe<<28 | 0x5<<25 + } + + c.ctxt.Diag("%v: bad bra %v", p, a) + return 0 +} + +func (c *ctxt5) olr(v int32, b int, r int, sc int) uint32 { + o := ((uint32(sc) & C_SCOND) ^ C_SCOND_XOR) << 28 + if sc&C_PBIT == 0 { + o |= 1 << 24 + } + if sc&C_UBIT == 0 { + o |= 1 << 23 + } + if sc&C_WBIT != 0 { + o |= 1 << 21 + } + o |= 1<<26 | 1<<20 + if v < 0 { + if sc&C_UBIT != 0 { + c.ctxt.Diag(".U on neg offset") + } + v = -v + o ^= 1 << 23 + } + + if v >= 1<<12 || v < 0 { + c.ctxt.Diag("literal span too large: %d (R%d)\n%v", v, b, c.printp) + } + o |= uint32(v) + o |= (uint32(b) & 15) << 16 + o |= (uint32(r) & 15) << 12 + return o +} + +func (c *ctxt5) olhr(v int32, b int, r int, sc int) uint32 { + o := ((uint32(sc) & C_SCOND) ^ C_SCOND_XOR) << 28 + if sc&C_PBIT == 0 { + o |= 1 << 24 + } + if sc&C_WBIT != 0 { + o |= 1 << 21 + } + o |= 1<<23 | 1<<20 | 0xb<<4 + if v < 0 { + v = -v + o ^= 1 << 23 + } + + if v >= 1<<8 || v < 0 { + c.ctxt.Diag("literal span too large: %d (R%d)\n%v", v, b, c.printp) + } + o |= uint32(v)&0xf | (uint32(v)>>4)<<8 | 1<<22 + o |= (uint32(b) & 15) << 16 + o |= (uint32(r) & 15) << 12 + return o +} + +func (c *ctxt5) osr(a obj.As, r int, v int32, b int, sc int) uint32 { + o := c.olr(v, b, r, sc) ^ (1 << 20) + if a != AMOVW { + o |= 1 << 22 + } + return o +} + +func (c *ctxt5) oshr(r int, v int32, b int, sc int) uint32 { + o := c.olhr(v, b, r, sc) ^ (1 << 20) + return o +} + +func (c *ctxt5) osrr(r int, i int, b int, sc int) uint32 { + return c.olr(int32(i), b, r, sc) ^ (1<<25 | 1<<20) +} + +func (c *ctxt5) oshrr(r int, i int, b int, sc int) uint32 { + return c.olhr(int32(i), b, r, sc) ^ (1<<22 | 1<<20) +} + +func (c *ctxt5) olrr(i int, b int, r int, sc int) uint32 { + return c.olr(int32(i), b, r, sc) ^ (1 << 25) +} + +func (c *ctxt5) olhrr(i int, b int, r int, sc int) uint32 { + return c.olhr(int32(i), b, r, sc) ^ (1 << 22) +} + +func (c *ctxt5) ofsr(a obj.As, r int, v int32, b int, sc int, p *obj.Prog) uint32 { + o := ((uint32(sc) & C_SCOND) ^ C_SCOND_XOR) << 28 + if sc&C_PBIT == 0 { + o |= 1 << 24 + } + if sc&C_WBIT != 0 { + o |= 1 << 21 + } + o |= 6<<25 | 1<<24 | 1<<23 | 10<<8 + if v < 0 { + v = -v + o ^= 1 << 23 + } + + if v&3 != 0 { + c.ctxt.Diag("odd offset for floating point op: %d\n%v", v, p) + } else if v >= 1<<10 || v < 0 { + c.ctxt.Diag("literal span too large: %d\n%v", v, p) + } + o |= (uint32(v) >> 2) & 0xFF + o |= (uint32(b) & 15) << 16 + o |= (uint32(r) & 15) << 12 + + switch a { + default: + c.ctxt.Diag("bad fst %v", a) + fallthrough + + case AMOVD: + o |= 1 << 8 + fallthrough + + case AMOVF: + break + } + + return o +} + +// MOVW $"lower 16-bit", Reg +func (c *ctxt5) omvs(p *obj.Prog, a *obj.Addr, dr int) uint32 { + o1 := ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28 + o1 |= 0x30 << 20 + o1 |= (uint32(dr) & 15) << 12 + o1 |= uint32(a.Offset) & 0x0fff + o1 |= (uint32(a.Offset) & 0xf000) << 4 + return o1 +} + +// MVN $C_NCON, Reg -> MOVW $C_RCON, Reg +func (c *ctxt5) omvr(p *obj.Prog, a *obj.Addr, dr int) uint32 { + o1 := c.oprrr(p, AMOVW, int(p.Scond)) + o1 |= (uint32(dr) & 15) << 12 + v := immrot(^uint32(a.Offset)) + if v == 0 { + c.ctxt.Diag("%v: missing literal", p) + return 0 + } + o1 |= uint32(v) + return o1 +} + +func (c *ctxt5) omvl(p *obj.Prog, a *obj.Addr, dr int) uint32 { + var o1 uint32 + if p.Pool == nil { + c.aclass(a) + v := immrot(^uint32(c.instoffset)) + if v == 0 { + c.ctxt.Diag("%v: missing literal", p) + return 0 + } + + o1 = c.oprrr(p, AMVN, int(p.Scond)&C_SCOND) + o1 |= uint32(v) + o1 |= (uint32(dr) & 15) << 12 + } else { + v := int32(p.Pool.Pc - p.Pc - 8) + o1 = c.olr(v, REGPC, dr, int(p.Scond)&C_SCOND) + } + + return o1 +} + +func (c *ctxt5) chipzero5(e float64) int { + // We use GOARM=7 to gate the use of VFPv3 vmov (imm) instructions. + if buildcfg.GOARM < 7 || math.Float64bits(e) != 0 { + return -1 + } + return 0 +} + +func (c *ctxt5) chipfloat5(e float64) int { + // We use GOARM=7 to gate the use of VFPv3 vmov (imm) instructions. + if buildcfg.GOARM < 7 { + return -1 + } + + ei := math.Float64bits(e) + l := uint32(ei) + h := uint32(ei >> 32) + + if l != 0 || h&0xffff != 0 { + return -1 + } + h1 := h & 0x7fc00000 + if h1 != 0x40000000 && h1 != 0x3fc00000 { + return -1 + } + n := 0 + + // sign bit (a) + if h&0x80000000 != 0 { + n |= 1 << 7 + } + + // exp sign bit (b) + if h1 == 0x3fc00000 { + n |= 1 << 6 + } + + // rest of exp and mantissa (cd-efgh) + n |= int((h >> 16) & 0x3f) + + //print("match %.8lux %.8lux %d\n", l, h, n); + return n +} + +func nocache(p *obj.Prog) { + p.Optab = 0 + p.From.Class = 0 + if p.GetFrom3() != nil { + p.GetFrom3().Class = 0 + } + p.To.Class = 0 +} diff --git a/src/cmd/internal/obj/arm/list5.go b/src/cmd/internal/obj/arm/list5.go new file mode 100644 index 0000000..6d630f6 --- /dev/null +++ b/src/cmd/internal/obj/arm/list5.go @@ -0,0 +1,124 @@ +// Inferno utils/5c/list.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/5c/list.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm + +import ( + "cmd/internal/obj" + "fmt" +) + +func init() { + obj.RegisterRegister(obj.RBaseARM, MAXREG, rconv) + obj.RegisterOpcode(obj.ABaseARM, Anames) + obj.RegisterRegisterList(obj.RegListARMLo, obj.RegListARMHi, rlconv) + obj.RegisterOpSuffix("arm", obj.CConvARM) +} + +func rconv(r int) string { + if r == 0 { + return "NONE" + } + if r == REGG { + // Special case. + return "g" + } + if REG_R0 <= r && r <= REG_R15 { + return fmt.Sprintf("R%d", r-REG_R0) + } + if REG_F0 <= r && r <= REG_F15 { + return fmt.Sprintf("F%d", r-REG_F0) + } + + switch r { + case REG_FPSR: + return "FPSR" + + case REG_FPCR: + return "FPCR" + + case REG_CPSR: + return "CPSR" + + case REG_SPSR: + return "SPSR" + + case REG_MB_SY: + return "MB_SY" + case REG_MB_ST: + return "MB_ST" + case REG_MB_ISH: + return "MB_ISH" + case REG_MB_ISHST: + return "MB_ISHST" + case REG_MB_NSH: + return "MB_NSH" + case REG_MB_NSHST: + return "MB_NSHST" + case REG_MB_OSH: + return "MB_OSH" + case REG_MB_OSHST: + return "MB_OSHST" + } + + return fmt.Sprintf("Rgok(%d)", r-obj.RBaseARM) +} + +func DRconv(a int) string { + s := "C_??" + if a >= C_NONE && a <= C_NCLASS { + s = cnames5[a] + } + var fp string + fp += s + return fp +} + +func rlconv(list int64) string { + str := "" + for i := 0; i < 16; i++ { + if list&(1<, C13, C0, 3 specially. + case AMRC: + if p.To.Offset&0xffff0fff == 0xee1d0f70 { + // Because the instruction might be rewritten to a BL which returns in R0 + // the register must be zero. + if p.To.Offset&0xf000 != 0 { + ctxt.Diag("%v: TLS MRC instruction must write to R0 as it might get translated into a BL instruction", p.Line()) + } + + if buildcfg.GOARM < 7 { + // Replace it with BL runtime.read_tls_fallback(SB) for ARM CPUs that lack the tls extension. + if progedit_tlsfallback == nil { + progedit_tlsfallback = ctxt.Lookup("runtime.read_tls_fallback") + } + + // MOVW LR, R11 + p.As = AMOVW + + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + + // BL runtime.read_tls_fallback(SB) + p = obj.Appendp(p, newprog) + + p.As = ABL + p.To.Type = obj.TYPE_BRANCH + p.To.Sym = progedit_tlsfallback + p.To.Offset = 0 + + // MOVW R11, LR + p = obj.Appendp(p, newprog) + + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + break + } + } + + // Otherwise, MRC/MCR instructions need no further treatment. + p.As = AWORD + } + + // Rewrite float constants to values stored in memory. + switch p.As { + case AMOVF: + if p.From.Type == obj.TYPE_FCONST && c.chipfloat5(p.From.Val.(float64)) < 0 && (c.chipzero5(p.From.Val.(float64)) < 0 || p.Scond&C_SCOND != C_SCOND_NONE) { + f32 := float32(p.From.Val.(float64)) + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float32Sym(f32) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + + case AMOVD: + if p.From.Type == obj.TYPE_FCONST && c.chipfloat5(p.From.Val.(float64)) < 0 && (c.chipzero5(p.From.Val.(float64)) < 0 || p.Scond&C_SCOND != C_SCOND_NONE) { + p.From.Type = obj.TYPE_MEM + p.From.Sym = ctxt.Float64Sym(p.From.Val.(float64)) + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } + } + + if ctxt.Flag_dynlink { + c.rewriteToUseGot(p) + } +} + +// Rewrite p, if necessary, to access global data via the global offset table. +func (c *ctxt5) rewriteToUseGot(p *obj.Prog) { + if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { + // ADUFFxxx $offset + // becomes + // MOVW runtime.duffxxx@GOT, R9 + // ADD $offset, R9 + // CALL (R9) + var sym *obj.LSym + if p.As == obj.ADUFFZERO { + sym = c.ctxt.Lookup("runtime.duffzero") + } else { + sym = c.ctxt.Lookup("runtime.duffcopy") + } + offset := p.To.Offset + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + p.From.Sym = sym + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R9 + p.To.Name = obj.NAME_NONE + p.To.Offset = 0 + p.To.Sym = nil + p1 := obj.Appendp(p, c.newprog) + p1.As = AADD + p1.From.Type = obj.TYPE_CONST + p1.From.Offset = offset + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REG_R9 + p2 := obj.Appendp(p1, c.newprog) + p2.As = obj.ACALL + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = REG_R9 + return + } + + // We only care about global data: NAME_EXTERN means a global + // symbol in the Go sense, and p.Sym.Local is true for a few + // internally defined symbols. + if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // MOVW $sym, Rx becomes MOVW sym@GOT, Rx + // MOVW $sym+, Rx becomes MOVW sym@GOT, Rx; ADD , Rx + if p.As != AMOVW { + c.ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -dynlink", p) + } + if p.To.Type != obj.TYPE_REG { + c.ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -dynlink", p) + } + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + if p.From.Offset != 0 { + q := obj.Appendp(p, c.newprog) + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = p.From.Offset + q.To = p.To + p.From.Offset = 0 + } + } + if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + var source *obj.Addr + // MOVx sym, Ry becomes MOVW sym@GOT, R9; MOVx (R9), Ry + // MOVx Ry, sym becomes MOVW sym@GOT, R9; MOVx Ry, (R9) + // An addition may be inserted between the two MOVs if there is an offset. + if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + c.ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) + } + source = &p.From + } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + source = &p.To + } else { + return + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { + return + } + if source.Sym.Type == objabi.STLSBSS { + return + } + if source.Type != obj.TYPE_MEM { + c.ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + p1 := obj.Appendp(p, c.newprog) + p2 := obj.Appendp(p1, c.newprog) + + p1.As = AMOVW + p1.From.Type = obj.TYPE_MEM + p1.From.Sym = source.Sym + p1.From.Name = obj.NAME_GOTREF + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REG_R9 + + p2.As = p.As + p2.From = p.From + p2.To = p.To + if p.From.Name == obj.NAME_EXTERN { + p2.From.Reg = REG_R9 + p2.From.Name = obj.NAME_NONE + p2.From.Sym = nil + } else if p.To.Name == obj.NAME_EXTERN { + p2.To.Reg = REG_R9 + p2.To.Name = obj.NAME_NONE + p2.To.Sym = nil + } else { + return + } + obj.Nopout(p) +} + +// Prog.mark +const ( + FOLL = 1 << 0 + LABEL = 1 << 1 + LEAF = 1 << 2 +) + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + autosize := int32(0) + + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + c := ctxt5{ctxt: ctxt, cursym: cursym, newprog: newprog} + + p := c.cursym.Func().Text + autoffset := int32(p.To.Offset) + if autoffset == -4 { + // Historical way to mark NOFRAME. + p.From.Sym.Set(obj.AttrNoFrame, true) + autoffset = 0 + } + if autoffset < 0 || autoffset%4 != 0 { + c.ctxt.Diag("frame size %d not 0 or a positive multiple of 4", autoffset) + } + if p.From.Sym.NoFrame() { + if autoffset != 0 { + c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", autoffset) + } + } + + cursym.Func().Locals = autoffset + cursym.Func().Args = p.To.Val.(int32) + + /* + * find leaf subroutines + */ + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.ATEXT: + p.Mark |= LEAF + + case ADIV, ADIVU, AMOD, AMODU: + cursym.Func().Text.Mark &^= LEAF + + case ABL, + ABX, + obj.ADUFFZERO, + obj.ADUFFCOPY: + cursym.Func().Text.Mark &^= LEAF + } + } + + var q2 *obj.Prog + for p := cursym.Func().Text; p != nil; p = p.Link { + o := p.As + switch o { + case obj.ATEXT: + autosize = autoffset + + if p.Mark&LEAF != 0 && autosize == 0 { + // A leaf function with no locals has no frame. + p.From.Sym.Set(obj.AttrNoFrame, true) + } + + if !p.From.Sym.NoFrame() { + // If there is a stack frame at all, it includes + // space to save the LR. + autosize += 4 + } + + if autosize == 0 && cursym.Func().Text.Mark&LEAF == 0 { + // A very few functions that do not return to their caller + // are not identified as leaves but still have no frame. + if ctxt.Debugvlog { + ctxt.Logf("save suppressed in: %s\n", cursym.Name) + } + + cursym.Func().Text.Mark |= LEAF + } + + // FP offsets need an updated p.To.Offset. + p.To.Offset = int64(autosize) - 4 + + if cursym.Func().Text.Mark&LEAF != 0 { + cursym.Set(obj.AttrLeaf, true) + if p.From.Sym.NoFrame() { + break + } + } + + if !p.From.Sym.NoSplit() { + p = c.stacksplit(p, autosize) // emit split check + } + + // MOVW.W R14,$-autosize(SP) + p = obj.Appendp(p, c.newprog) + + p.As = AMOVW + p.Scond |= C_WBIT + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_MEM + p.To.Offset = int64(-autosize) + p.To.Reg = REGSP + p.Spadj = autosize + + if cursym.Func().Text.From.Sym.Wrapper() { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOVW g_panic(g), R1 + // CMP $0, R1 + // B.NE checkargp + // end: + // NOP + // ... function ... + // checkargp: + // MOVW panic_argp(R1), R2 + // ADD $(autosize+4), R13, R3 + // CMP R2, R3 + // B.NE end + // ADD $4, R13, R4 + // MOVW R4, panic_argp(R1) + // B end + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not an ARM NOP: it encodes to 0 instruction bytes. + + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R1 + + p = obj.Appendp(p, newprog) + p.As = ACMP + p.From.Type = obj.TYPE_CONST + p.From.Offset = 0 + p.Reg = REG_R1 + + // B.NE checkargp + bne := obj.Appendp(p, newprog) + bne.As = ABNE + bne.To.Type = obj.TYPE_BRANCH + + // end: NOP + end := obj.Appendp(bne, newprog) + end.As = obj.ANOP + + // find end of function + var last *obj.Prog + for last = end; last.Link != nil; last = last.Link { + } + + // MOVW panic_argp(R1), R2 + mov := obj.Appendp(last, newprog) + mov.As = AMOVW + mov.From.Type = obj.TYPE_MEM + mov.From.Reg = REG_R1 + mov.From.Offset = 0 // Panic.argp + mov.To.Type = obj.TYPE_REG + mov.To.Reg = REG_R2 + + // B.NE branch target is MOVW above + bne.To.SetTarget(mov) + + // ADD $(autosize+4), R13, R3 + p = obj.Appendp(mov, newprog) + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(autosize) + 4 + p.Reg = REG_R13 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R3 + + // CMP R2, R3 + p = obj.Appendp(p, newprog) + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R2 + p.Reg = REG_R3 + + // B.NE end + p = obj.Appendp(p, newprog) + p.As = ABNE + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(end) + + // ADD $4, R13, R4 + p = obj.Appendp(p, newprog) + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = 4 + p.Reg = REG_R13 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R4 + + // MOVW R4, panic_argp(R1) + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R4 + p.To.Type = obj.TYPE_MEM + p.To.Reg = REG_R1 + p.To.Offset = 0 // Panic.argp + + // B end + p = obj.Appendp(p, newprog) + p.As = AB + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(end) + + // reset for subsequent passes + p = end + } + + case obj.ARET: + nocache(p) + if cursym.Func().Text.Mark&LEAF != 0 { + if autosize == 0 { + p.As = AB + p.From = obj.Addr{} + if p.To.Sym != nil { // retjmp + p.To.Type = obj.TYPE_BRANCH + } else { + p.To.Type = obj.TYPE_MEM + p.To.Offset = 0 + p.To.Reg = REGLINK + } + + break + } + } + + p.As = AMOVW + p.Scond |= C_PBIT + p.From.Type = obj.TYPE_MEM + p.From.Offset = int64(autosize) + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGPC + + // If there are instructions following + // this ARET, they come from a branch + // with the same stackframe, so no spadj. + + if p.To.Sym != nil { // retjmp + p.To.Reg = REGLINK + q2 = obj.Appendp(p, newprog) + q2.As = AB + q2.To.Type = obj.TYPE_BRANCH + q2.To.Sym = p.To.Sym + p.To.Sym = nil + p.To.Name = obj.NAME_NONE + p = q2 + } + + case AADD: + if p.From.Type == obj.TYPE_CONST && p.From.Reg == 0 && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { + p.Spadj = int32(-p.From.Offset) + } + + case ASUB: + if p.From.Type == obj.TYPE_CONST && p.From.Reg == 0 && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { + p.Spadj = int32(p.From.Offset) + } + + case ADIV, ADIVU, AMOD, AMODU: + if cursym.Func().Text.From.Sym.NoSplit() { + ctxt.Diag("cannot divide in NOSPLIT function") + } + const debugdivmod = false + if debugdivmod { + break + } + if p.From.Type != obj.TYPE_REG { + break + } + if p.To.Type != obj.TYPE_REG { + break + } + + // Make copy because we overwrite p below. + q1 := *p + if q1.Reg == REGTMP || q1.Reg == 0 && q1.To.Reg == REGTMP { + ctxt.Diag("div already using REGTMP: %v", p) + } + + /* MOV m(g),REGTMP */ + p.As = AMOVW + p.Pos = q1.Pos + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 6 * 4 // offset of g.m + p.Reg = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + + /* MOV a,m_divmod(REGTMP) */ + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.Pos = q1.Pos + p.From.Type = obj.TYPE_REG + p.From.Reg = q1.From.Reg + p.To.Type = obj.TYPE_MEM + p.To.Reg = REGTMP + p.To.Offset = 8 * 4 // offset of m.divmod + + /* MOV b, R8 */ + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.Pos = q1.Pos + p.From.Type = obj.TYPE_REG + p.From.Reg = q1.Reg + if q1.Reg == 0 { + p.From.Reg = q1.To.Reg + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R8 + p.To.Offset = 0 + + /* CALL appropriate */ + p = obj.Appendp(p, newprog) + p.As = ABL + p.Pos = q1.Pos + p.To.Type = obj.TYPE_BRANCH + switch o { + case ADIV: + p.To.Sym = symdiv + case ADIVU: + p.To.Sym = symdivu + case AMOD: + p.To.Sym = symmod + case AMODU: + p.To.Sym = symmodu + } + + /* MOV REGTMP, b */ + p = obj.Appendp(p, newprog) + p.As = AMOVW + p.Pos = q1.Pos + p.From.Type = obj.TYPE_REG + p.From.Reg = REGTMP + p.From.Offset = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = q1.To.Reg + + case AMOVW: + if (p.Scond&C_WBIT != 0) && p.To.Type == obj.TYPE_MEM && p.To.Reg == REGSP { + p.Spadj = int32(-p.To.Offset) + } + if (p.Scond&C_PBIT != 0) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REGSP && p.To.Reg != REGPC { + p.Spadj = int32(-p.From.Offset) + } + if p.From.Type == obj.TYPE_ADDR && p.From.Reg == REGSP && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { + p.Spadj = int32(-p.From.Offset) + } + + case obj.AGETCALLERPC: + if cursym.Leaf() { + /* MOVW LR, Rd */ + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + } else { + /* MOVW (RSP), Rd */ + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + } + } + + if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { + f := c.cursym.Func() + if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 { + c.cursym.Func().FuncFlag |= objabi.FuncFlag_SPWRITE + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", c.cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + } +} + +func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { + if c.ctxt.Flag_maymorestack != "" { + // Save LR and make room for REGCTXT. + const frameSize = 8 + // MOVW.W R14,$-8(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVW + p.Scond |= C_WBIT + p.From.Type = obj.TYPE_REG + p.From.Reg = REGLINK + p.To.Type = obj.TYPE_MEM + p.To.Offset = -frameSize + p.To.Reg = REGSP + p.Spadj = frameSize + + // MOVW REGCTXT, 4(SP) + p = obj.Appendp(p, c.newprog) + p.As = AMOVW + p.From.Type = obj.TYPE_REG + p.From.Reg = REGCTXT + p.To.Type = obj.TYPE_MEM + p.To.Offset = 4 + p.To.Reg = REGSP + + // CALL maymorestack + p = obj.Appendp(p, c.newprog) + p.As = obj.ACALL + p.To.Type = obj.TYPE_BRANCH + // See ../x86/obj6.go + p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI()) + + // Restore REGCTXT and LR. + + // MOVW 4(SP), REGCTXT + p = obj.Appendp(p, c.newprog) + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Offset = 4 + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGCTXT + + // MOVW.P 8(SP), R14 + p.As = AMOVW + p.Scond |= C_PBIT + p.From.Type = obj.TYPE_MEM + p.From.Offset = frameSize + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + p.Spadj = -frameSize + } + + // Jump back to here after morestack returns. + startPred := p + + // MOVW g_stackguard(g), R1 + p = obj.Appendp(p, c.newprog) + + p.As = AMOVW + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0 + if c.cursym.CFunc() { + p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R1 + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = c.ctxt.StartUnsafePoint(p, c.newprog) + + if framesize <= objabi.StackSmall { + // small stack: SP < stackguard + // CMP stackguard, SP + p = obj.Appendp(p, c.newprog) + + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R1 + p.Reg = REGSP + } else if framesize <= objabi.StackBig { + // large stack: SP-framesize < stackguard-StackSmall + // MOVW $-(framesize-StackSmall)(SP), R2 + // CMP stackguard, R2 + p = obj.Appendp(p, c.newprog) + + p.As = AMOVW + p.From.Type = obj.TYPE_ADDR + p.From.Reg = REGSP + p.From.Offset = -(int64(framesize) - objabi.StackSmall) + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R2 + + p = obj.Appendp(p, c.newprog) + p.As = ACMP + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R1 + p.Reg = REG_R2 + } else { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // // Try subtracting from SP and check for underflow. + // // If this underflows, it sets C to 0. + // SUB.S $(framesize-StackSmall), SP, R2 + // // If C is 1 (unsigned >=), compare with guard. + // CMP.HS stackguard, R2 + + p = obj.Appendp(p, c.newprog) + p.As = ASUB + p.Scond = C_SBIT + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(framesize) - objabi.StackSmall + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R2 + + p = obj.Appendp(p, c.newprog) + p.As = ACMP + p.Scond = C_SCOND_HS + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_R1 + p.Reg = REG_R2 + } + + // BLS call-to-morestack (C is 0 or Z is 1) + bls := obj.Appendp(p, c.newprog) + bls.As = ABLS + bls.To.Type = obj.TYPE_BRANCH + + end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) + + var last *obj.Prog + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { + } + + // Now we are at the end of the function, but logically + // we are still in function prologue. We need to fix the + // SP data and PCDATA. + spfix := obj.Appendp(last, c.newprog) + spfix.As = obj.ANOP + spfix.Spadj = -framesize + + pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog) + pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog) + + // MOVW LR, R3 + movw := obj.Appendp(pcdata, c.newprog) + movw.As = AMOVW + movw.From.Type = obj.TYPE_REG + movw.From.Reg = REGLINK + movw.To.Type = obj.TYPE_REG + movw.To.Reg = REG_R3 + + bls.To.SetTarget(movw) + + // BL runtime.morestack + call := obj.Appendp(movw, c.newprog) + call.As = obj.ACALL + call.To.Type = obj.TYPE_BRANCH + morestack := "runtime.morestack" + switch { + case c.cursym.CFunc(): + morestack = "runtime.morestackc" + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): + morestack = "runtime.morestack_noctxt" + } + call.To.Sym = c.ctxt.Lookup(morestack) + + pcdata = c.ctxt.EndUnsafePoint(call, c.newprog, -1) + + // B start + b := obj.Appendp(pcdata, c.newprog) + b.As = obj.AJMP + b.To.Type = obj.TYPE_BRANCH + b.To.SetTarget(startPred.Link) + b.Spadj = +framesize + + return end +} + +var unaryDst = map[obj.As]bool{ + ASWI: true, + AWORD: true, +} + +var Linkarm = obj.LinkArch{ + Arch: sys.ArchARM, + Init: buildop, + Preprocess: preprocess, + Assemble: span5, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: ARMDWARFRegisters, +} diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go new file mode 100644 index 0000000..3bfc675 --- /dev/null +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -0,0 +1,1213 @@ +// cmd/7c/7.out.h from Vita Nuova. +// https://code.google.com/p/ken-cc/source/browse/src/cmd/7c/7.out.h +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm64 + +import "cmd/internal/obj" + +const ( + NSNAME = 8 + NSYM = 50 + NREG = 32 /* number of general registers */ + NFREG = 32 /* number of floating point registers */ +) + +// General purpose registers, kept in the low bits of Prog.Reg. +const ( + // integer + REG_R0 = obj.RBaseARM64 + iota + REG_R1 + REG_R2 + REG_R3 + REG_R4 + REG_R5 + REG_R6 + REG_R7 + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + REG_R16 + REG_R17 + REG_R18 + REG_R19 + REG_R20 + REG_R21 + REG_R22 + REG_R23 + REG_R24 + REG_R25 + REG_R26 + REG_R27 + REG_R28 + REG_R29 + REG_R30 + REG_R31 + + // scalar floating point + REG_F0 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + REG_F8 + REG_F9 + REG_F10 + REG_F11 + REG_F12 + REG_F13 + REG_F14 + REG_F15 + REG_F16 + REG_F17 + REG_F18 + REG_F19 + REG_F20 + REG_F21 + REG_F22 + REG_F23 + REG_F24 + REG_F25 + REG_F26 + REG_F27 + REG_F28 + REG_F29 + REG_F30 + REG_F31 + + // SIMD + REG_V0 + REG_V1 + REG_V2 + REG_V3 + REG_V4 + REG_V5 + REG_V6 + REG_V7 + REG_V8 + REG_V9 + REG_V10 + REG_V11 + REG_V12 + REG_V13 + REG_V14 + REG_V15 + REG_V16 + REG_V17 + REG_V18 + REG_V19 + REG_V20 + REG_V21 + REG_V22 + REG_V23 + REG_V24 + REG_V25 + REG_V26 + REG_V27 + REG_V28 + REG_V29 + REG_V30 + REG_V31 + + REG_RSP = REG_V31 + 32 // to differentiate ZR/SP, REG_RSP&0x1f = 31 +) + +// bits 0-4 indicates register: Vn +// bits 5-8 indicates arrangement: +const ( + REG_ARNG = obj.RBaseARM64 + 1<<10 + iota<<9 // Vn. + REG_ELEM // Vn.[index] + REG_ELEM_END +) + +// Not registers, but flags that can be combined with regular register +// constants to indicate extended register conversion. When checking, +// you should subtract obj.RBaseARM64 first. From this difference, bit 11 +// indicates extended register, bits 8-10 select the conversion mode. +// REG_LSL is the index shift specifier, bit 9 indicates shifted offset register. +const REG_LSL = obj.RBaseARM64 + 1<<9 +const REG_EXT = obj.RBaseARM64 + 1<<11 + +const ( + REG_UXTB = REG_EXT + iota<<8 + REG_UXTH + REG_UXTW + REG_UXTX + REG_SXTB + REG_SXTH + REG_SXTW + REG_SXTX +) + +// Special registers, after subtracting obj.RBaseARM64, bit 12 indicates +// a special register and the low bits select the register. +// SYSREG_END is the last item in the automatically generated system register +// declaration, and it is defined in the sysRegEnc.go file. +// Define the special register after REG_SPECIAL, the first value of it should be +// REG_{name} = SYSREG_END + iota. +const ( + REG_SPECIAL = obj.RBaseARM64 + 1<<12 +) + +// Register assignments: +// +// compiler allocates R0 up as temps +// compiler allocates register variables R7-R25 +// compiler allocates external registers R26 down +// +// compiler allocates register variables F7-F26 +// compiler allocates external registers F26 down +const ( + REGMIN = REG_R7 // register variables allocated from here to REGMAX + REGRT1 = REG_R16 // ARM64 IP0, external linker may use as a scrach register in trampoline + REGRT2 = REG_R17 // ARM64 IP1, external linker may use as a scrach register in trampoline + REGPR = REG_R18 // ARM64 platform register, unused in the Go toolchain + REGMAX = REG_R25 + + REGCTXT = REG_R26 // environment for closures + REGTMP = REG_R27 // reserved for liblink + REGG = REG_R28 // G + REGFP = REG_R29 // frame pointer + REGLINK = REG_R30 + + // ARM64 uses R31 as both stack pointer and zero register, + // depending on the instruction. To differentiate RSP from ZR, + // we use a different numeric value for REGZERO and REGSP. + REGZERO = REG_R31 + REGSP = REG_RSP + + FREGRET = REG_F0 + FREGMIN = REG_F7 // first register variable + FREGMAX = REG_F26 // last register variable for 7g only + FREGEXT = REG_F26 // first external register +) + +// http://infocenter.arm.com/help/topic/com.arm.doc.ecm0665627/abi_sve_aadwarf_100985_0000_00_en.pdf +var ARM64DWARFRegisters = map[int16]int16{ + REG_R0: 0, + REG_R1: 1, + REG_R2: 2, + REG_R3: 3, + REG_R4: 4, + REG_R5: 5, + REG_R6: 6, + REG_R7: 7, + REG_R8: 8, + REG_R9: 9, + REG_R10: 10, + REG_R11: 11, + REG_R12: 12, + REG_R13: 13, + REG_R14: 14, + REG_R15: 15, + REG_R16: 16, + REG_R17: 17, + REG_R18: 18, + REG_R19: 19, + REG_R20: 20, + REG_R21: 21, + REG_R22: 22, + REG_R23: 23, + REG_R24: 24, + REG_R25: 25, + REG_R26: 26, + REG_R27: 27, + REG_R28: 28, + REG_R29: 29, + REG_R30: 30, + + // floating point + REG_F0: 64, + REG_F1: 65, + REG_F2: 66, + REG_F3: 67, + REG_F4: 68, + REG_F5: 69, + REG_F6: 70, + REG_F7: 71, + REG_F8: 72, + REG_F9: 73, + REG_F10: 74, + REG_F11: 75, + REG_F12: 76, + REG_F13: 77, + REG_F14: 78, + REG_F15: 79, + REG_F16: 80, + REG_F17: 81, + REG_F18: 82, + REG_F19: 83, + REG_F20: 84, + REG_F21: 85, + REG_F22: 86, + REG_F23: 87, + REG_F24: 88, + REG_F25: 89, + REG_F26: 90, + REG_F27: 91, + REG_F28: 92, + REG_F29: 93, + REG_F30: 94, + REG_F31: 95, + + // SIMD + REG_V0: 64, + REG_V1: 65, + REG_V2: 66, + REG_V3: 67, + REG_V4: 68, + REG_V5: 69, + REG_V6: 70, + REG_V7: 71, + REG_V8: 72, + REG_V9: 73, + REG_V10: 74, + REG_V11: 75, + REG_V12: 76, + REG_V13: 77, + REG_V14: 78, + REG_V15: 79, + REG_V16: 80, + REG_V17: 81, + REG_V18: 82, + REG_V19: 83, + REG_V20: 84, + REG_V21: 85, + REG_V22: 86, + REG_V23: 87, + REG_V24: 88, + REG_V25: 89, + REG_V26: 90, + REG_V27: 91, + REG_V28: 92, + REG_V29: 93, + REG_V30: 94, + REG_V31: 95, +} + +const ( + BIG = 2048 - 8 +) + +const ( + /* mark flags */ + LABEL = 1 << iota + LEAF + FLOAT + BRANCH + LOAD + FCMP + SYNC + LIST + FOLL + NOSCHED +) + +const ( + // optab is sorted based on the order of these constants + // and the first match is chosen. + // The more specific class needs to come earlier. + C_NONE = iota + C_REG // R0..R30 + C_ZREG // R0..R30, ZR + C_RSP // R0..R30, RSP + C_FREG // F0..F31 + C_VREG // V0..V31 + C_PAIR // (Rn, Rm) + C_SHIFT // Rn<<2 + C_EXTREG // Rn.UXTB[<<3] + C_SPR // REG_NZCV + C_COND // condition code, EQ, NE, etc. + C_SPOP // special operand, PLDL1KEEP, VMALLE1IS, etc. + C_ARNG // Vn. + C_ELEM // Vn.[index] + C_LIST // [V1, V2, V3] + + C_ZCON // $0 + C_ABCON0 // could be C_ADDCON0 or C_BITCON + C_ADDCON0 // 12-bit unsigned, unshifted + C_ABCON // could be C_ADDCON or C_BITCON + C_AMCON // could be C_ADDCON or C_MOVCON + C_ADDCON // 12-bit unsigned, shifted left by 0 or 12 + C_MBCON // could be C_MOVCON or C_BITCON + C_MOVCON // generated by a 16-bit constant, optionally inverted and/or shifted by multiple of 16 + C_BITCON // bitfield and logical immediate masks + C_ADDCON2 // 24-bit constant + C_LCON // 32-bit constant + C_MOVCON2 // a constant that can be loaded with one MOVZ/MOVN and one MOVK + C_MOVCON3 // a constant that can be loaded with one MOVZ/MOVN and two MOVKs + C_VCON // 64-bit constant + C_FCON // floating-point constant + C_VCONADDR // 64-bit memory address + + C_AACON // ADDCON offset in auto constant $a(FP) + C_AACON2 // 24-bit offset in auto constant $a(FP) + C_LACON // 32-bit offset in auto constant $a(FP) + C_AECON // ADDCON offset in extern constant $e(SB) + + // TODO(aram): only one branch class should be enough + C_SBRA // for TYPE_BRANCH + C_LBRA + + C_ZAUTO // 0(RSP) + C_NSAUTO_16 // -256 <= x < 0, 0 mod 16 + C_NSAUTO_8 // -256 <= x < 0, 0 mod 8 + C_NSAUTO_4 // -256 <= x < 0, 0 mod 4 + C_NSAUTO // -256 <= x < 0 + C_NPAUTO_16 // -512 <= x < 0, 0 mod 16 + C_NPAUTO // -512 <= x < 0, 0 mod 8 + C_NQAUTO_16 // -1024 <= x < 0, 0 mod 16 + C_NAUTO4K // -4095 <= x < 0 + C_PSAUTO_16 // 0 to 255, 0 mod 16 + C_PSAUTO_8 // 0 to 255, 0 mod 8 + C_PSAUTO_4 // 0 to 255, 0 mod 4 + C_PSAUTO // 0 to 255 + C_PPAUTO_16 // 0 to 504, 0 mod 16 + C_PPAUTO // 0 to 504, 0 mod 8 + C_PQAUTO_16 // 0 to 1008, 0 mod 16 + C_UAUTO4K_16 // 0 to 4095, 0 mod 16 + C_UAUTO4K_8 // 0 to 4095, 0 mod 8 + C_UAUTO4K_4 // 0 to 4095, 0 mod 4 + C_UAUTO4K_2 // 0 to 4095, 0 mod 2 + C_UAUTO4K // 0 to 4095 + C_UAUTO8K_16 // 0 to 8190, 0 mod 16 + C_UAUTO8K_8 // 0 to 8190, 0 mod 8 + C_UAUTO8K_4 // 0 to 8190, 0 mod 4 + C_UAUTO8K // 0 to 8190, 0 mod 2 + C_PSAUTO + C_UAUTO16K_16 // 0 to 16380, 0 mod 16 + C_UAUTO16K_8 // 0 to 16380, 0 mod 8 + C_UAUTO16K // 0 to 16380, 0 mod 4 + C_PSAUTO + C_UAUTO32K_16 // 0 to 32760, 0 mod 16 + C_PSAUTO + C_UAUTO32K // 0 to 32760, 0 mod 8 + C_PSAUTO + C_UAUTO64K // 0 to 65520, 0 mod 16 + C_PSAUTO + C_LAUTO // any other 32-bit constant + + C_SEXT1 // 0 to 4095, direct + C_SEXT2 // 0 to 8190 + C_SEXT4 // 0 to 16380 + C_SEXT8 // 0 to 32760 + C_SEXT16 // 0 to 65520 + C_LEXT + + C_ZOREG // 0(R) + C_NSOREG_16 // must mirror C_NSAUTO_16, etc + C_NSOREG_8 + C_NSOREG_4 + C_NSOREG + C_NPOREG_16 + C_NPOREG + C_NQOREG_16 + C_NOREG4K + C_PSOREG_16 + C_PSOREG_8 + C_PSOREG_4 + C_PSOREG + C_PPOREG_16 + C_PPOREG + C_PQOREG_16 + C_UOREG4K_16 + C_UOREG4K_8 + C_UOREG4K_4 + C_UOREG4K_2 + C_UOREG4K + C_UOREG8K_16 + C_UOREG8K_8 + C_UOREG8K_4 + C_UOREG8K + C_UOREG16K_16 + C_UOREG16K_8 + C_UOREG16K + C_UOREG32K_16 + C_UOREG32K + C_UOREG64K + C_LOREG + + C_ADDR // TODO(aram): explain difference from C_VCONADDR + + // The GOT slot for a symbol in -dynlink mode. + C_GOTADDR + + // TLS "var" in local exec mode: will become a constant offset from + // thread local base that is ultimately chosen by the program linker. + C_TLS_LE + + // TLS "var" in initial exec mode: will become a memory address (chosen + // by the program linker) that the dynamic linker will fill with the + // offset from the thread local base. + C_TLS_IE + + C_ROFF // register offset (including register extended) + + C_GOK + C_TEXTSIZE + C_NCLASS // must be last +) + +const ( + C_XPRE = 1 << 6 // match arm.C_WBIT, so Prog.String know how to print it + C_XPOST = 1 << 5 // match arm.C_PBIT, so Prog.String know how to print it +) + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p arm64 + +const ( + AADC = obj.ABaseARM64 + obj.A_ARCHSPECIFIC + iota + AADCS + AADCSW + AADCW + AADD + AADDS + AADDSW + AADDW + AADR + AADRP + AAND + AANDS + AANDSW + AANDW + AASR + AASRW + AAT + ABFI + ABFIW + ABFM + ABFMW + ABFXIL + ABFXILW + ABIC + ABICS + ABICSW + ABICW + ABRK + ACBNZ + ACBNZW + ACBZ + ACBZW + ACCMN + ACCMNW + ACCMP + ACCMPW + ACINC + ACINCW + ACINV + ACINVW + ACLREX + ACLS + ACLSW + ACLZ + ACLZW + ACMN + ACMNW + ACMP + ACMPW + ACNEG + ACNEGW + ACRC32B + ACRC32CB + ACRC32CH + ACRC32CW + ACRC32CX + ACRC32H + ACRC32W + ACRC32X + ACSEL + ACSELW + ACSET + ACSETM + ACSETMW + ACSETW + ACSINC + ACSINCW + ACSINV + ACSINVW + ACSNEG + ACSNEGW + ADC + ADCPS1 + ADCPS2 + ADCPS3 + ADMB + ADRPS + ADSB + AEON + AEONW + AEOR + AEORW + AERET + AEXTR + AEXTRW + AHINT + AHLT + AHVC + AIC + AISB + ALDADDAB + ALDADDAD + ALDADDAH + ALDADDAW + ALDADDALB + ALDADDALD + ALDADDALH + ALDADDALW + ALDADDB + ALDADDD + ALDADDH + ALDADDW + ALDADDLB + ALDADDLD + ALDADDLH + ALDADDLW + ALDAR + ALDARB + ALDARH + ALDARW + ALDAXP + ALDAXPW + ALDAXR + ALDAXRB + ALDAXRH + ALDAXRW + ALDCLRAB + ALDCLRAD + ALDCLRAH + ALDCLRAW + ALDCLRALB + ALDCLRALD + ALDCLRALH + ALDCLRALW + ALDCLRB + ALDCLRD + ALDCLRH + ALDCLRW + ALDCLRLB + ALDCLRLD + ALDCLRLH + ALDCLRLW + ALDEORAB + ALDEORAD + ALDEORAH + ALDEORAW + ALDEORALB + ALDEORALD + ALDEORALH + ALDEORALW + ALDEORB + ALDEORD + ALDEORH + ALDEORW + ALDEORLB + ALDEORLD + ALDEORLH + ALDEORLW + ALDORAB + ALDORAD + ALDORAH + ALDORAW + ALDORALB + ALDORALD + ALDORALH + ALDORALW + ALDORB + ALDORD + ALDORH + ALDORW + ALDORLB + ALDORLD + ALDORLH + ALDORLW + ALDP + ALDPW + ALDPSW + ALDXR + ALDXRB + ALDXRH + ALDXRW + ALDXP + ALDXPW + ALSL + ALSLW + ALSR + ALSRW + AMADD + AMADDW + AMNEG + AMNEGW + AMOVK + AMOVKW + AMOVN + AMOVNW + AMOVZ + AMOVZW + AMRS + AMSR + AMSUB + AMSUBW + AMUL + AMULW + AMVN + AMVNW + ANEG + ANEGS + ANEGSW + ANEGW + ANGC + ANGCS + ANGCSW + ANGCW + ANOOP + AORN + AORNW + AORR + AORRW + APRFM + APRFUM + ARBIT + ARBITW + AREM + AREMW + AREV + AREV16 + AREV16W + AREV32 + AREVW + AROR + ARORW + ASBC + ASBCS + ASBCSW + ASBCW + ASBFIZ + ASBFIZW + ASBFM + ASBFMW + ASBFX + ASBFXW + ASDIV + ASDIVW + ASEV + ASEVL + ASMADDL + ASMC + ASMNEGL + ASMSUBL + ASMULH + ASMULL + ASTXR + ASTXRB + ASTXRH + ASTXP + ASTXPW + ASTXRW + ASTLP + ASTLPW + ASTLR + ASTLRB + ASTLRH + ASTLRW + ASTLXP + ASTLXPW + ASTLXR + ASTLXRB + ASTLXRH + ASTLXRW + ASTP + ASTPW + ASUB + ASUBS + ASUBSW + ASUBW + ASVC + ASXTB + ASXTBW + ASXTH + ASXTHW + ASXTW + ASYS + ASYSL + ATBNZ + ATBZ + ATLBI + ATST + ATSTW + AUBFIZ + AUBFIZW + AUBFM + AUBFMW + AUBFX + AUBFXW + AUDIV + AUDIVW + AUMADDL + AUMNEGL + AUMSUBL + AUMULH + AUMULL + AUREM + AUREMW + AUXTB + AUXTH + AUXTW + AUXTBW + AUXTHW + AWFE + AWFI + AYIELD + AMOVB + AMOVBU + AMOVH + AMOVHU + AMOVW + AMOVWU + AMOVD + AMOVNP + AMOVNPW + AMOVP + AMOVPD + AMOVPQ + AMOVPS + AMOVPSW + AMOVPW + ASWPAD + ASWPAW + ASWPAH + ASWPAB + ASWPALD + ASWPALW + ASWPALH + ASWPALB + ASWPD + ASWPW + ASWPH + ASWPB + ASWPLD + ASWPLW + ASWPLH + ASWPLB + ACASD + ACASW + ACASH + ACASB + ACASAD + ACASAW + ACASLD + ACASLW + ACASALD + ACASALW + ACASALH + ACASALB + ACASPD + ACASPW + ABEQ + ABNE + ABCS + ABHS + ABCC + ABLO + ABMI + ABPL + ABVS + ABVC + ABHI + ABLS + ABGE + ABLT + ABGT + ABLE + AFABSD + AFABSS + AFADDD + AFADDS + AFCCMPD + AFCCMPED + AFCCMPS + AFCCMPES + AFCMPD + AFCMPED + AFCMPES + AFCMPS + AFCVTSD + AFCVTDS + AFCVTZSD + AFCVTZSDW + AFCVTZSS + AFCVTZSSW + AFCVTZUD + AFCVTZUDW + AFCVTZUS + AFCVTZUSW + AFDIVD + AFDIVS + AFLDPD + AFLDPQ + AFLDPS + AFMOVQ + AFMOVD + AFMOVS + AVMOVQ + AVMOVD + AVMOVS + AFMULD + AFMULS + AFNEGD + AFNEGS + AFSQRTD + AFSQRTS + AFSTPD + AFSTPQ + AFSTPS + AFSUBD + AFSUBS + ASCVTFD + ASCVTFS + ASCVTFWD + ASCVTFWS + AUCVTFD + AUCVTFS + AUCVTFWD + AUCVTFWS + AWORD + ADWORD + AFCSELS + AFCSELD + AFMAXS + AFMINS + AFMAXD + AFMIND + AFMAXNMS + AFMAXNMD + AFNMULS + AFNMULD + AFRINTNS + AFRINTND + AFRINTPS + AFRINTPD + AFRINTMS + AFRINTMD + AFRINTZS + AFRINTZD + AFRINTAS + AFRINTAD + AFRINTXS + AFRINTXD + AFRINTIS + AFRINTID + AFMADDS + AFMADDD + AFMSUBS + AFMSUBD + AFNMADDS + AFNMADDD + AFNMSUBS + AFNMSUBD + AFMINNMS + AFMINNMD + AFCVTDH + AFCVTHS + AFCVTHD + AFCVTSH + AAESD + AAESE + AAESIMC + AAESMC + ASHA1C + ASHA1H + ASHA1M + ASHA1P + ASHA1SU0 + ASHA1SU1 + ASHA256H + ASHA256H2 + ASHA256SU0 + ASHA256SU1 + ASHA512H + ASHA512H2 + ASHA512SU0 + ASHA512SU1 + AVADD + AVADDP + AVAND + AVBIF + AVBCAX + AVCMEQ + AVCNT + AVEOR + AVEOR3 + AVMOV + AVLD1 + AVLD2 + AVLD3 + AVLD4 + AVLD1R + AVLD2R + AVLD3R + AVLD4R + AVORR + AVREV16 + AVREV32 + AVREV64 + AVST1 + AVST2 + AVST3 + AVST4 + AVDUP + AVADDV + AVMOVI + AVUADDLV + AVSUB + AVFMLA + AVFMLS + AVPMULL + AVPMULL2 + AVEXT + AVRBIT + AVRAX1 + AVUMAX + AVUMIN + AVUSHR + AVUSHLL + AVUSHLL2 + AVUXTL + AVUXTL2 + AVUZP1 + AVUZP2 + AVSHL + AVSRI + AVSLI + AVBSL + AVBIT + AVTBL + AVTBX + AVXAR + AVZIP1 + AVZIP2 + AVCMTST + AVUADDW2 + AVUADDW + AVUSRA + AVTRN1 + AVTRN2 + ALAST + AB = obj.AJMP + ABL = obj.ACALL +) + +const ( + // shift types + SHIFT_LL = 0 << 22 + SHIFT_LR = 1 << 22 + SHIFT_AR = 2 << 22 + SHIFT_ROR = 3 << 22 +) + +// Arrangement for ARM64 SIMD instructions +const ( + // arrangement types + ARNG_8B = iota + ARNG_16B + ARNG_1D + ARNG_4H + ARNG_8H + ARNG_2S + ARNG_4S + ARNG_2D + ARNG_1Q + ARNG_B + ARNG_H + ARNG_S + ARNG_D +) + +//go:generate stringer -type SpecialOperand -trimprefix SPOP_ +type SpecialOperand int + +const ( + // PRFM + SPOP_PLDL1KEEP SpecialOperand = iota // must be the first one + SPOP_BEGIN SpecialOperand = iota - 1 // set as the lower bound + SPOP_PLDL1STRM + SPOP_PLDL2KEEP + SPOP_PLDL2STRM + SPOP_PLDL3KEEP + SPOP_PLDL3STRM + SPOP_PLIL1KEEP + SPOP_PLIL1STRM + SPOP_PLIL2KEEP + SPOP_PLIL2STRM + SPOP_PLIL3KEEP + SPOP_PLIL3STRM + SPOP_PSTL1KEEP + SPOP_PSTL1STRM + SPOP_PSTL2KEEP + SPOP_PSTL2STRM + SPOP_PSTL3KEEP + SPOP_PSTL3STRM + + // TLBI + SPOP_VMALLE1IS + SPOP_VAE1IS + SPOP_ASIDE1IS + SPOP_VAAE1IS + SPOP_VALE1IS + SPOP_VAALE1IS + SPOP_VMALLE1 + SPOP_VAE1 + SPOP_ASIDE1 + SPOP_VAAE1 + SPOP_VALE1 + SPOP_VAALE1 + SPOP_IPAS2E1IS + SPOP_IPAS2LE1IS + SPOP_ALLE2IS + SPOP_VAE2IS + SPOP_ALLE1IS + SPOP_VALE2IS + SPOP_VMALLS12E1IS + SPOP_IPAS2E1 + SPOP_IPAS2LE1 + SPOP_ALLE2 + SPOP_VAE2 + SPOP_ALLE1 + SPOP_VALE2 + SPOP_VMALLS12E1 + SPOP_ALLE3IS + SPOP_VAE3IS + SPOP_VALE3IS + SPOP_ALLE3 + SPOP_VAE3 + SPOP_VALE3 + SPOP_VMALLE1OS + SPOP_VAE1OS + SPOP_ASIDE1OS + SPOP_VAAE1OS + SPOP_VALE1OS + SPOP_VAALE1OS + SPOP_RVAE1IS + SPOP_RVAAE1IS + SPOP_RVALE1IS + SPOP_RVAALE1IS + SPOP_RVAE1OS + SPOP_RVAAE1OS + SPOP_RVALE1OS + SPOP_RVAALE1OS + SPOP_RVAE1 + SPOP_RVAAE1 + SPOP_RVALE1 + SPOP_RVAALE1 + SPOP_RIPAS2E1IS + SPOP_RIPAS2LE1IS + SPOP_ALLE2OS + SPOP_VAE2OS + SPOP_ALLE1OS + SPOP_VALE2OS + SPOP_VMALLS12E1OS + SPOP_RVAE2IS + SPOP_RVALE2IS + SPOP_IPAS2E1OS + SPOP_RIPAS2E1 + SPOP_RIPAS2E1OS + SPOP_IPAS2LE1OS + SPOP_RIPAS2LE1 + SPOP_RIPAS2LE1OS + SPOP_RVAE2OS + SPOP_RVALE2OS + SPOP_RVAE2 + SPOP_RVALE2 + SPOP_ALLE3OS + SPOP_VAE3OS + SPOP_VALE3OS + SPOP_RVAE3IS + SPOP_RVALE3IS + SPOP_RVAE3OS + SPOP_RVALE3OS + SPOP_RVAE3 + SPOP_RVALE3 + + // DC + SPOP_IVAC + SPOP_ISW + SPOP_CSW + SPOP_CISW + SPOP_ZVA + SPOP_CVAC + SPOP_CVAU + SPOP_CIVAC + SPOP_IGVAC + SPOP_IGSW + SPOP_IGDVAC + SPOP_IGDSW + SPOP_CGSW + SPOP_CGDSW + SPOP_CIGSW + SPOP_CIGDSW + SPOP_GVA + SPOP_GZVA + SPOP_CGVAC + SPOP_CGDVAC + SPOP_CGVAP + SPOP_CGDVAP + SPOP_CGVADP + SPOP_CGDVADP + SPOP_CIGVAC + SPOP_CIGDVAC + SPOP_CVAP + SPOP_CVADP + + // PSTATE fields + SPOP_DAIFSet + SPOP_DAIFClr + + // Condition code, EQ, NE, etc. Their relative order to EQ is matter. + SPOP_EQ + SPOP_NE + SPOP_HS + SPOP_LO + SPOP_MI + SPOP_PL + SPOP_VS + SPOP_VC + SPOP_HI + SPOP_LS + SPOP_GE + SPOP_LT + SPOP_GT + SPOP_LE + SPOP_AL + SPOP_NV + // Condition code end. + + SPOP_END +) diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go new file mode 100644 index 0000000..03222f9 --- /dev/null +++ b/src/cmd/internal/obj/arm64/anames.go @@ -0,0 +1,544 @@ +// Code generated by stringer -i a.out.go -o anames.go -p arm64; DO NOT EDIT. + +package arm64 + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "ADC", + "ADCS", + "ADCSW", + "ADCW", + "ADD", + "ADDS", + "ADDSW", + "ADDW", + "ADR", + "ADRP", + "AND", + "ANDS", + "ANDSW", + "ANDW", + "ASR", + "ASRW", + "AT", + "BFI", + "BFIW", + "BFM", + "BFMW", + "BFXIL", + "BFXILW", + "BIC", + "BICS", + "BICSW", + "BICW", + "BRK", + "CBNZ", + "CBNZW", + "CBZ", + "CBZW", + "CCMN", + "CCMNW", + "CCMP", + "CCMPW", + "CINC", + "CINCW", + "CINV", + "CINVW", + "CLREX", + "CLS", + "CLSW", + "CLZ", + "CLZW", + "CMN", + "CMNW", + "CMP", + "CMPW", + "CNEG", + "CNEGW", + "CRC32B", + "CRC32CB", + "CRC32CH", + "CRC32CW", + "CRC32CX", + "CRC32H", + "CRC32W", + "CRC32X", + "CSEL", + "CSELW", + "CSET", + "CSETM", + "CSETMW", + "CSETW", + "CSINC", + "CSINCW", + "CSINV", + "CSINVW", + "CSNEG", + "CSNEGW", + "DC", + "DCPS1", + "DCPS2", + "DCPS3", + "DMB", + "DRPS", + "DSB", + "EON", + "EONW", + "EOR", + "EORW", + "ERET", + "EXTR", + "EXTRW", + "HINT", + "HLT", + "HVC", + "IC", + "ISB", + "LDADDAB", + "LDADDAD", + "LDADDAH", + "LDADDAW", + "LDADDALB", + "LDADDALD", + "LDADDALH", + "LDADDALW", + "LDADDB", + "LDADDD", + "LDADDH", + "LDADDW", + "LDADDLB", + "LDADDLD", + "LDADDLH", + "LDADDLW", + "LDAR", + "LDARB", + "LDARH", + "LDARW", + "LDAXP", + "LDAXPW", + "LDAXR", + "LDAXRB", + "LDAXRH", + "LDAXRW", + "LDCLRAB", + "LDCLRAD", + "LDCLRAH", + "LDCLRAW", + "LDCLRALB", + "LDCLRALD", + "LDCLRALH", + "LDCLRALW", + "LDCLRB", + "LDCLRD", + "LDCLRH", + "LDCLRW", + "LDCLRLB", + "LDCLRLD", + "LDCLRLH", + "LDCLRLW", + "LDEORAB", + "LDEORAD", + "LDEORAH", + "LDEORAW", + "LDEORALB", + "LDEORALD", + "LDEORALH", + "LDEORALW", + "LDEORB", + "LDEORD", + "LDEORH", + "LDEORW", + "LDEORLB", + "LDEORLD", + "LDEORLH", + "LDEORLW", + "LDORAB", + "LDORAD", + "LDORAH", + "LDORAW", + "LDORALB", + "LDORALD", + "LDORALH", + "LDORALW", + "LDORB", + "LDORD", + "LDORH", + "LDORW", + "LDORLB", + "LDORLD", + "LDORLH", + "LDORLW", + "LDP", + "LDPW", + "LDPSW", + "LDXR", + "LDXRB", + "LDXRH", + "LDXRW", + "LDXP", + "LDXPW", + "LSL", + "LSLW", + "LSR", + "LSRW", + "MADD", + "MADDW", + "MNEG", + "MNEGW", + "MOVK", + "MOVKW", + "MOVN", + "MOVNW", + "MOVZ", + "MOVZW", + "MRS", + "MSR", + "MSUB", + "MSUBW", + "MUL", + "MULW", + "MVN", + "MVNW", + "NEG", + "NEGS", + "NEGSW", + "NEGW", + "NGC", + "NGCS", + "NGCSW", + "NGCW", + "NOOP", + "ORN", + "ORNW", + "ORR", + "ORRW", + "PRFM", + "PRFUM", + "RBIT", + "RBITW", + "REM", + "REMW", + "REV", + "REV16", + "REV16W", + "REV32", + "REVW", + "ROR", + "RORW", + "SBC", + "SBCS", + "SBCSW", + "SBCW", + "SBFIZ", + "SBFIZW", + "SBFM", + "SBFMW", + "SBFX", + "SBFXW", + "SDIV", + "SDIVW", + "SEV", + "SEVL", + "SMADDL", + "SMC", + "SMNEGL", + "SMSUBL", + "SMULH", + "SMULL", + "STXR", + "STXRB", + "STXRH", + "STXP", + "STXPW", + "STXRW", + "STLP", + "STLPW", + "STLR", + "STLRB", + "STLRH", + "STLRW", + "STLXP", + "STLXPW", + "STLXR", + "STLXRB", + "STLXRH", + "STLXRW", + "STP", + "STPW", + "SUB", + "SUBS", + "SUBSW", + "SUBW", + "SVC", + "SXTB", + "SXTBW", + "SXTH", + "SXTHW", + "SXTW", + "SYS", + "SYSL", + "TBNZ", + "TBZ", + "TLBI", + "TST", + "TSTW", + "UBFIZ", + "UBFIZW", + "UBFM", + "UBFMW", + "UBFX", + "UBFXW", + "UDIV", + "UDIVW", + "UMADDL", + "UMNEGL", + "UMSUBL", + "UMULH", + "UMULL", + "UREM", + "UREMW", + "UXTB", + "UXTH", + "UXTW", + "UXTBW", + "UXTHW", + "WFE", + "WFI", + "YIELD", + "MOVB", + "MOVBU", + "MOVH", + "MOVHU", + "MOVW", + "MOVWU", + "MOVD", + "MOVNP", + "MOVNPW", + "MOVP", + "MOVPD", + "MOVPQ", + "MOVPS", + "MOVPSW", + "MOVPW", + "SWPAD", + "SWPAW", + "SWPAH", + "SWPAB", + "SWPALD", + "SWPALW", + "SWPALH", + "SWPALB", + "SWPD", + "SWPW", + "SWPH", + "SWPB", + "SWPLD", + "SWPLW", + "SWPLH", + "SWPLB", + "CASD", + "CASW", + "CASH", + "CASB", + "CASAD", + "CASAW", + "CASLD", + "CASLW", + "CASALD", + "CASALW", + "CASALH", + "CASALB", + "CASPD", + "CASPW", + "BEQ", + "BNE", + "BCS", + "BHS", + "BCC", + "BLO", + "BMI", + "BPL", + "BVS", + "BVC", + "BHI", + "BLS", + "BGE", + "BLT", + "BGT", + "BLE", + "FABSD", + "FABSS", + "FADDD", + "FADDS", + "FCCMPD", + "FCCMPED", + "FCCMPS", + "FCCMPES", + "FCMPD", + "FCMPED", + "FCMPES", + "FCMPS", + "FCVTSD", + "FCVTDS", + "FCVTZSD", + "FCVTZSDW", + "FCVTZSS", + "FCVTZSSW", + "FCVTZUD", + "FCVTZUDW", + "FCVTZUS", + "FCVTZUSW", + "FDIVD", + "FDIVS", + "FLDPD", + "FLDPQ", + "FLDPS", + "FMOVQ", + "FMOVD", + "FMOVS", + "VMOVQ", + "VMOVD", + "VMOVS", + "FMULD", + "FMULS", + "FNEGD", + "FNEGS", + "FSQRTD", + "FSQRTS", + "FSTPD", + "FSTPQ", + "FSTPS", + "FSUBD", + "FSUBS", + "SCVTFD", + "SCVTFS", + "SCVTFWD", + "SCVTFWS", + "UCVTFD", + "UCVTFS", + "UCVTFWD", + "UCVTFWS", + "WORD", + "DWORD", + "FCSELS", + "FCSELD", + "FMAXS", + "FMINS", + "FMAXD", + "FMIND", + "FMAXNMS", + "FMAXNMD", + "FNMULS", + "FNMULD", + "FRINTNS", + "FRINTND", + "FRINTPS", + "FRINTPD", + "FRINTMS", + "FRINTMD", + "FRINTZS", + "FRINTZD", + "FRINTAS", + "FRINTAD", + "FRINTXS", + "FRINTXD", + "FRINTIS", + "FRINTID", + "FMADDS", + "FMADDD", + "FMSUBS", + "FMSUBD", + "FNMADDS", + "FNMADDD", + "FNMSUBS", + "FNMSUBD", + "FMINNMS", + "FMINNMD", + "FCVTDH", + "FCVTHS", + "FCVTHD", + "FCVTSH", + "AESD", + "AESE", + "AESIMC", + "AESMC", + "SHA1C", + "SHA1H", + "SHA1M", + "SHA1P", + "SHA1SU0", + "SHA1SU1", + "SHA256H", + "SHA256H2", + "SHA256SU0", + "SHA256SU1", + "SHA512H", + "SHA512H2", + "SHA512SU0", + "SHA512SU1", + "VADD", + "VADDP", + "VAND", + "VBIF", + "VBCAX", + "VCMEQ", + "VCNT", + "VEOR", + "VEOR3", + "VMOV", + "VLD1", + "VLD2", + "VLD3", + "VLD4", + "VLD1R", + "VLD2R", + "VLD3R", + "VLD4R", + "VORR", + "VREV16", + "VREV32", + "VREV64", + "VST1", + "VST2", + "VST3", + "VST4", + "VDUP", + "VADDV", + "VMOVI", + "VUADDLV", + "VSUB", + "VFMLA", + "VFMLS", + "VPMULL", + "VPMULL2", + "VEXT", + "VRBIT", + "VRAX1", + "VUMAX", + "VUMIN", + "VUSHR", + "VUSHLL", + "VUSHLL2", + "VUXTL", + "VUXTL2", + "VUZP1", + "VUZP2", + "VSHL", + "VSRI", + "VSLI", + "VBSL", + "VBIT", + "VTBL", + "VTBX", + "VXAR", + "VZIP1", + "VZIP2", + "VCMTST", + "VUADDW2", + "VUADDW", + "VUSRA", + "VTRN1", + "VTRN2", + "LAST", +} diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go new file mode 100644 index 0000000..2f20dfe --- /dev/null +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -0,0 +1,124 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package arm64 + +// This order should be strictly consistent to that in a.out.go +var cnames7 = []string{ + "NONE", + "REG", + "ZREG", + "RSP", + "FREG", + "VREG", + "PAIR", + "SHIFT", + "EXTREG", + "SPR", + "SPOP", + "COND", + "ARNG", + "ELEM", + "LIST", + "ZCON", + "ABCON0", + "ADDCON0", + "ABCON", + "AMCON", + "ADDCON", + "MBCON", + "MOVCON", + "BITCON", + "ADDCON2", + "LCON", + "MOVCON2", + "MOVCON3", + "VCON", + "FCON", + "VCONADDR", + "AACON", + "AACON2", + "LACON", + "AECON", + "SBRA", + "LBRA", + "ZAUTO", + "NSAUTO_16", + "NSAUTO_8", + "NSAUTO_4", + "NSAUTO", + "NPAUTO_16", + "NPAUTO", + "NQAUTO_16", + "NAUTO4K", + "PSAUTO_16", + "PSAUTO_8", + "PSAUTO_4", + "PSAUTO", + "PPAUTO_16", + "PPAUTO", + "PQAUTO_16", + "UAUTO4K_16", + "UAUTO4K_8", + "UAUTO4K_4", + "UAUTO4K_2", + "UAUTO4K", + "UAUTO8K_16", + "UAUTO8K_8", + "UAUTO8K_4", + "UAUTO8K", + "UAUTO16K_16", + "UAUTO16K_8", + "UAUTO16K", + "UAUTO32K_8", + "UAUTO32K", + "UAUTO64K", + "LAUTO", + "SEXT1", + "SEXT2", + "SEXT4", + "SEXT8", + "SEXT16", + "LEXT", + "ZOREG", + "NSOREG_16", + "NSOREG_8", + "NSOREG_4", + "NSOREG", + "NPOREG_16", + "NPOREG", + "NQOREG_16", + "NOREG4K", + "PSOREG_16", + "PSOREG_8", + "PSOREG_4", + "PSOREG", + "PPOREG_16", + "PPOREG", + "PQOREG_16", + "UOREG4K_16", + "UOREG4K_8", + "UOREG4K_4", + "UOREG4K_2", + "UOREG4K", + "UOREG8K_16", + "UOREG8K_8", + "UOREG8K_4", + "UOREG8K", + "UOREG16K_16", + "UOREG16K_8", + "UOREG16K", + "UOREG32K_16", + "UOREG32K", + "UOREG64K", + "LOREG", + "ADDR", + "GOTADDR", + "TLS_LE", + "TLS_IE", + "ROFF", + "GOK", + "TEXTSIZE", + "NCLASS", +} diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go new file mode 100644 index 0000000..db18bc8 --- /dev/null +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -0,0 +1,7657 @@ +// cmd/7l/asm.c, cmd/7l/asmout.c, cmd/7l/optab.c, cmd/7l/span.c, cmd/ld/sub.c, cmd/ld/mod.c, from Vita Nuova. +// https://code.google.com/p/ken-cc/source/browse/ +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package arm64 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "fmt" + "log" + "math" + "sort" + "strings" +) + +// ctxt7 holds state while assembling a single function. +// Each function gets a fresh ctxt7. +// This allows for multiple functions to be safely concurrently assembled. +type ctxt7 struct { + ctxt *obj.Link + newprog obj.ProgAlloc + cursym *obj.LSym + blitrl *obj.Prog + elitrl *obj.Prog + autosize int32 + extrasize int32 + instoffset int64 + pc int64 + pool struct { + start uint32 + size uint32 + } +} + +const ( + funcAlign = 16 +) + +const ( + REGFROM = 1 +) + +type Optab struct { + as obj.As + a1 uint8 + a2 uint8 + a3 uint8 + a4 uint8 + type_ int8 + size_ int8 // the value of this field is not static, use the size() method to return the value + param int16 + flag int8 + scond uint16 +} + +func IsAtomicInstruction(as obj.As) bool { + if _, ok := atomicLDADD[as]; ok { + return true + } + if _, ok := atomicSWP[as]; ok { + return true + } + return false +} + +// known field values of an instruction. +var atomicLDADD = map[obj.As]uint32{ + ALDADDAD: 3<<30 | 0x1c5<<21 | 0x00<<10, + ALDADDAW: 2<<30 | 0x1c5<<21 | 0x00<<10, + ALDADDAH: 1<<30 | 0x1c5<<21 | 0x00<<10, + ALDADDAB: 0<<30 | 0x1c5<<21 | 0x00<<10, + ALDADDALD: 3<<30 | 0x1c7<<21 | 0x00<<10, + ALDADDALW: 2<<30 | 0x1c7<<21 | 0x00<<10, + ALDADDALH: 1<<30 | 0x1c7<<21 | 0x00<<10, + ALDADDALB: 0<<30 | 0x1c7<<21 | 0x00<<10, + ALDADDD: 3<<30 | 0x1c1<<21 | 0x00<<10, + ALDADDW: 2<<30 | 0x1c1<<21 | 0x00<<10, + ALDADDH: 1<<30 | 0x1c1<<21 | 0x00<<10, + ALDADDB: 0<<30 | 0x1c1<<21 | 0x00<<10, + ALDADDLD: 3<<30 | 0x1c3<<21 | 0x00<<10, + ALDADDLW: 2<<30 | 0x1c3<<21 | 0x00<<10, + ALDADDLH: 1<<30 | 0x1c3<<21 | 0x00<<10, + ALDADDLB: 0<<30 | 0x1c3<<21 | 0x00<<10, + ALDCLRAD: 3<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAW: 2<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAH: 1<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAB: 0<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRALD: 3<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALW: 2<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALH: 1<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALB: 0<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRD: 3<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRW: 2<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRH: 1<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRB: 0<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRLD: 3<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLW: 2<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLH: 1<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLB: 0<<30 | 0x1c3<<21 | 0x04<<10, + ALDEORAD: 3<<30 | 0x1c5<<21 | 0x08<<10, + ALDEORAW: 2<<30 | 0x1c5<<21 | 0x08<<10, + ALDEORAH: 1<<30 | 0x1c5<<21 | 0x08<<10, + ALDEORAB: 0<<30 | 0x1c5<<21 | 0x08<<10, + ALDEORALD: 3<<30 | 0x1c7<<21 | 0x08<<10, + ALDEORALW: 2<<30 | 0x1c7<<21 | 0x08<<10, + ALDEORALH: 1<<30 | 0x1c7<<21 | 0x08<<10, + ALDEORALB: 0<<30 | 0x1c7<<21 | 0x08<<10, + ALDEORD: 3<<30 | 0x1c1<<21 | 0x08<<10, + ALDEORW: 2<<30 | 0x1c1<<21 | 0x08<<10, + ALDEORH: 1<<30 | 0x1c1<<21 | 0x08<<10, + ALDEORB: 0<<30 | 0x1c1<<21 | 0x08<<10, + ALDEORLD: 3<<30 | 0x1c3<<21 | 0x08<<10, + ALDEORLW: 2<<30 | 0x1c3<<21 | 0x08<<10, + ALDEORLH: 1<<30 | 0x1c3<<21 | 0x08<<10, + ALDEORLB: 0<<30 | 0x1c3<<21 | 0x08<<10, + ALDORAD: 3<<30 | 0x1c5<<21 | 0x0c<<10, + ALDORAW: 2<<30 | 0x1c5<<21 | 0x0c<<10, + ALDORAH: 1<<30 | 0x1c5<<21 | 0x0c<<10, + ALDORAB: 0<<30 | 0x1c5<<21 | 0x0c<<10, + ALDORALD: 3<<30 | 0x1c7<<21 | 0x0c<<10, + ALDORALW: 2<<30 | 0x1c7<<21 | 0x0c<<10, + ALDORALH: 1<<30 | 0x1c7<<21 | 0x0c<<10, + ALDORALB: 0<<30 | 0x1c7<<21 | 0x0c<<10, + ALDORD: 3<<30 | 0x1c1<<21 | 0x0c<<10, + ALDORW: 2<<30 | 0x1c1<<21 | 0x0c<<10, + ALDORH: 1<<30 | 0x1c1<<21 | 0x0c<<10, + ALDORB: 0<<30 | 0x1c1<<21 | 0x0c<<10, + ALDORLD: 3<<30 | 0x1c3<<21 | 0x0c<<10, + ALDORLW: 2<<30 | 0x1c3<<21 | 0x0c<<10, + ALDORLH: 1<<30 | 0x1c3<<21 | 0x0c<<10, + ALDORLB: 0<<30 | 0x1c3<<21 | 0x0c<<10, +} + +var atomicSWP = map[obj.As]uint32{ + ASWPAD: 3<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAW: 2<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAH: 1<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAB: 0<<30 | 0x1c5<<21 | 0x20<<10, + ASWPALD: 3<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALW: 2<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALH: 1<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALB: 0<<30 | 0x1c7<<21 | 0x20<<10, + ASWPD: 3<<30 | 0x1c1<<21 | 0x20<<10, + ASWPW: 2<<30 | 0x1c1<<21 | 0x20<<10, + ASWPH: 1<<30 | 0x1c1<<21 | 0x20<<10, + ASWPB: 0<<30 | 0x1c1<<21 | 0x20<<10, + ASWPLD: 3<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLW: 2<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLH: 1<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLB: 0<<30 | 0x1c3<<21 | 0x20<<10, + ACASD: 3<<30 | 0x45<<21 | 0x1f<<10, + ACASW: 2<<30 | 0x45<<21 | 0x1f<<10, + ACASH: 1<<30 | 0x45<<21 | 0x1f<<10, + ACASB: 0<<30 | 0x45<<21 | 0x1f<<10, + ACASAD: 3<<30 | 0x47<<21 | 0x1f<<10, + ACASAW: 2<<30 | 0x47<<21 | 0x1f<<10, + ACASLD: 3<<30 | 0x45<<21 | 0x3f<<10, + ACASLW: 2<<30 | 0x45<<21 | 0x3f<<10, + ACASALD: 3<<30 | 0x47<<21 | 0x3f<<10, + ACASALW: 2<<30 | 0x47<<21 | 0x3f<<10, + ACASALH: 1<<30 | 0x47<<21 | 0x3f<<10, + ACASALB: 0<<30 | 0x47<<21 | 0x3f<<10, +} +var atomicCASP = map[obj.As]uint32{ + ACASPD: 1<<30 | 0x41<<21 | 0x1f<<10, + ACASPW: 0<<30 | 0x41<<21 | 0x1f<<10, +} + +var oprange [ALAST & obj.AMask][]Optab + +var xcmp [C_NCLASS][C_NCLASS]bool + +const ( + S32 = 0 << 31 + S64 = 1 << 31 + Sbit = 1 << 29 + LSL0_32 = 2 << 13 + LSL0_64 = 3 << 13 +) + +func OPDP2(x uint32) uint32 { + return 0<<30 | 0<<29 | 0xd6<<21 | x<<10 +} + +func OPDP3(sf uint32, op54 uint32, op31 uint32, o0 uint32) uint32 { + return sf<<31 | op54<<29 | 0x1B<<24 | op31<<21 | o0<<15 +} + +func OPBcc(x uint32) uint32 { + return 0x2A<<25 | 0<<24 | 0<<4 | x&15 +} + +func OPBLR(x uint32) uint32 { + /* x=0, JMP; 1, CALL; 2, RET */ + return 0x6B<<25 | 0<<23 | x<<21 | 0x1F<<16 | 0<<10 +} + +func SYSOP(l uint32, op0 uint32, op1 uint32, crn uint32, crm uint32, op2 uint32, rt uint32) uint32 { + return 0x354<<22 | l<<21 | op0<<19 | op1<<16 | crn&15<<12 | crm&15<<8 | op2<<5 | rt +} + +func SYSHINT(x uint32) uint32 { + return SYSOP(0, 0, 3, 2, 0, x, 0x1F) +} + +func LDSTR(sz uint32, v uint32, opc uint32) uint32 { + return sz<<30 | 7<<27 | v<<26 | opc<<22 +} + +func LD2STR(o uint32) uint32 { + return o &^ (3 << 22) +} + +func LDSTX(sz uint32, o2 uint32, l uint32, o1 uint32, o0 uint32) uint32 { + return sz<<30 | 0x8<<24 | o2<<23 | l<<22 | o1<<21 | o0<<15 +} + +func FPCMP(m uint32, s uint32, type_ uint32, op uint32, op2 uint32) uint32 { + return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | op<<14 | 8<<10 | op2 +} + +func FPCCMP(m uint32, s uint32, type_ uint32, op uint32) uint32 { + return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | 1<<10 | op<<4 +} + +func FPOP1S(m uint32, s uint32, type_ uint32, op uint32) uint32 { + return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | op<<15 | 0x10<<10 +} + +func FPOP2S(m uint32, s uint32, type_ uint32, op uint32) uint32 { + return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | op<<12 | 2<<10 +} + +func FPOP3S(m uint32, s uint32, type_ uint32, op uint32, op2 uint32) uint32 { + return m<<31 | s<<29 | 0x1F<<24 | type_<<22 | op<<21 | op2<<15 +} + +func FPCVTI(sf uint32, s uint32, type_ uint32, rmode uint32, op uint32) uint32 { + return sf<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | rmode<<19 | op<<16 | 0<<10 +} + +func ADR(p uint32, o uint32, rt uint32) uint32 { + return p<<31 | (o&3)<<29 | 0x10<<24 | ((o>>2)&0x7FFFF)<<5 | rt&31 +} + +func OPBIT(x uint32) uint32 { + return 1<<30 | 0<<29 | 0xD6<<21 | 0<<16 | x<<10 +} + +func MOVCONST(d int64, s int, rt int) uint32 { + return uint32(((d>>uint(s*16))&0xFFFF)<<5) | uint32(s)&3<<21 | uint32(rt&31) +} + +const ( + // Optab.flag + LFROM = 1 << iota // p.From uses constant pool + LFROM128 // p.From3<<64+p.From forms a 128-bit constant in literal pool + LTO // p.To uses constant pool + NOTUSETMP // p expands to multiple instructions, but does NOT use REGTMP + BRANCH14BITS // branch instruction encodes 14 bits + BRANCH19BITS // branch instruction encodes 19 bits +) + +var optab = []Optab{ + /* struct Optab: + OPCODE, from, prog->reg, from3, to, type,size,param,flag,scond */ + {obj.ATEXT, C_ADDR, C_NONE, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0}, + + /* arithmetic operations */ + {AADD, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AADD, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AADC, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AADC, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {ANEG, C_ZREG, C_NONE, C_NONE, C_ZREG, 25, 4, 0, 0, 0}, + {ANEG, C_NONE, C_NONE, C_NONE, C_ZREG, 25, 4, 0, 0, 0}, + {ANGC, C_ZREG, C_NONE, C_NONE, C_ZREG, 17, 4, 0, 0, 0}, + {ACMP, C_ZREG, C_ZREG, C_NONE, C_NONE, 1, 4, 0, 0, 0}, + {AADD, C_ADDCON, C_RSP, C_NONE, C_RSP, 2, 4, 0, 0, 0}, + {AADD, C_ADDCON, C_NONE, C_NONE, C_RSP, 2, 4, 0, 0, 0}, + {ACMP, C_ADDCON, C_RSP, C_NONE, C_NONE, 2, 4, 0, 0, 0}, + {AADD, C_MOVCON, C_RSP, C_NONE, C_RSP, 62, 8, 0, 0, 0}, + {AADD, C_MOVCON, C_NONE, C_NONE, C_RSP, 62, 8, 0, 0, 0}, + {ACMP, C_MOVCON, C_RSP, C_NONE, C_NONE, 62, 8, 0, 0, 0}, + {AADD, C_BITCON, C_RSP, C_NONE, C_RSP, 62, 8, 0, 0, 0}, + {AADD, C_BITCON, C_NONE, C_NONE, C_RSP, 62, 8, 0, 0, 0}, + {ACMP, C_BITCON, C_RSP, C_NONE, C_NONE, 62, 8, 0, 0, 0}, + {AADD, C_ADDCON2, C_RSP, C_NONE, C_RSP, 48, 8, 0, NOTUSETMP, 0}, + {AADD, C_ADDCON2, C_NONE, C_NONE, C_RSP, 48, 8, 0, NOTUSETMP, 0}, + {AADD, C_MOVCON2, C_RSP, C_NONE, C_RSP, 13, 12, 0, 0, 0}, + {AADD, C_MOVCON2, C_NONE, C_NONE, C_RSP, 13, 12, 0, 0, 0}, + {AADD, C_MOVCON3, C_RSP, C_NONE, C_RSP, 13, 16, 0, 0, 0}, + {AADD, C_MOVCON3, C_NONE, C_NONE, C_RSP, 13, 16, 0, 0, 0}, + {AADD, C_VCON, C_RSP, C_NONE, C_RSP, 13, 20, 0, 0, 0}, + {AADD, C_VCON, C_NONE, C_NONE, C_RSP, 13, 20, 0, 0, 0}, + {ACMP, C_MOVCON2, C_ZREG, C_NONE, C_NONE, 13, 12, 0, 0, 0}, + {ACMP, C_MOVCON3, C_ZREG, C_NONE, C_NONE, 13, 16, 0, 0, 0}, + {ACMP, C_VCON, C_ZREG, C_NONE, C_NONE, 13, 20, 0, 0, 0}, + {AADD, C_SHIFT, C_ZREG, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AADD, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AMVN, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {ACMP, C_SHIFT, C_ZREG, C_NONE, C_NONE, 3, 4, 0, 0, 0}, + {ANEG, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AADD, C_ZREG, C_RSP, C_NONE, C_RSP, 27, 4, 0, 0, 0}, + {AADD, C_ZREG, C_NONE, C_NONE, C_RSP, 27, 4, 0, 0, 0}, + {ACMP, C_ZREG, C_RSP, C_NONE, C_NONE, 27, 4, 0, 0, 0}, + {AADD, C_EXTREG, C_RSP, C_NONE, C_RSP, 27, 4, 0, 0, 0}, + {AADD, C_EXTREG, C_NONE, C_NONE, C_RSP, 27, 4, 0, 0, 0}, + {ACMP, C_EXTREG, C_RSP, C_NONE, C_NONE, 27, 4, 0, 0, 0}, + {AADD, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AADD, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AMUL, C_ZREG, C_ZREG, C_NONE, C_ZREG, 15, 4, 0, 0, 0}, + {AMUL, C_ZREG, C_NONE, C_NONE, C_ZREG, 15, 4, 0, 0, 0}, + {AMADD, C_ZREG, C_ZREG, C_ZREG, C_ZREG, 15, 4, 0, 0, 0}, + {AREM, C_ZREG, C_ZREG, C_NONE, C_ZREG, 16, 8, 0, 0, 0}, + {AREM, C_ZREG, C_NONE, C_NONE, C_ZREG, 16, 8, 0, 0, 0}, + {ASDIV, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {ASDIV, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + + {AFADDS, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFADDS, C_FREG, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMSUBD, C_FREG, C_FREG, C_FREG, C_FREG, 15, 4, 0, 0, 0}, + {AFCMPS, C_FREG, C_FREG, C_NONE, C_NONE, 56, 4, 0, 0, 0}, + {AFCMPS, C_FCON, C_FREG, C_NONE, C_NONE, 56, 4, 0, 0, 0}, + {AVADDP, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVADD, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVADD, C_VREG, C_VREG, C_NONE, C_VREG, 89, 4, 0, 0, 0}, + {AVADD, C_VREG, C_NONE, C_NONE, C_VREG, 89, 4, 0, 0, 0}, + {AVADDV, C_ARNG, C_NONE, C_NONE, C_VREG, 85, 4, 0, 0, 0}, + + /* logical operations */ + {AAND, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AAND, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AANDS, C_ZREG, C_ZREG, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {AANDS, C_ZREG, C_NONE, C_NONE, C_ZREG, 1, 4, 0, 0, 0}, + {ATST, C_ZREG, C_ZREG, C_NONE, C_NONE, 1, 4, 0, 0, 0}, + {AAND, C_MBCON, C_ZREG, C_NONE, C_RSP, 53, 4, 0, 0, 0}, + {AAND, C_MBCON, C_NONE, C_NONE, C_RSP, 53, 4, 0, 0, 0}, + {AANDS, C_MBCON, C_ZREG, C_NONE, C_ZREG, 53, 4, 0, 0, 0}, + {AANDS, C_MBCON, C_NONE, C_NONE, C_ZREG, 53, 4, 0, 0, 0}, + {ATST, C_MBCON, C_ZREG, C_NONE, C_NONE, 53, 4, 0, 0, 0}, + {AAND, C_BITCON, C_ZREG, C_NONE, C_RSP, 53, 4, 0, 0, 0}, + {AAND, C_BITCON, C_NONE, C_NONE, C_RSP, 53, 4, 0, 0, 0}, + {AANDS, C_BITCON, C_ZREG, C_NONE, C_ZREG, 53, 4, 0, 0, 0}, + {AANDS, C_BITCON, C_NONE, C_NONE, C_ZREG, 53, 4, 0, 0, 0}, + {ATST, C_BITCON, C_ZREG, C_NONE, C_NONE, 53, 4, 0, 0, 0}, + {AAND, C_MOVCON, C_ZREG, C_NONE, C_ZREG, 62, 8, 0, 0, 0}, + {AAND, C_MOVCON, C_NONE, C_NONE, C_ZREG, 62, 8, 0, 0, 0}, + {AANDS, C_MOVCON, C_ZREG, C_NONE, C_ZREG, 62, 8, 0, 0, 0}, + {AANDS, C_MOVCON, C_NONE, C_NONE, C_ZREG, 62, 8, 0, 0, 0}, + {ATST, C_MOVCON, C_ZREG, C_NONE, C_NONE, 62, 8, 0, 0, 0}, + {AAND, C_MOVCON2, C_ZREG, C_NONE, C_ZREG, 28, 12, 0, 0, 0}, + {AAND, C_MOVCON2, C_NONE, C_NONE, C_ZREG, 28, 12, 0, 0, 0}, + {AAND, C_MOVCON3, C_ZREG, C_NONE, C_ZREG, 28, 16, 0, 0, 0}, + {AAND, C_MOVCON3, C_NONE, C_NONE, C_ZREG, 28, 16, 0, 0, 0}, + {AAND, C_VCON, C_ZREG, C_NONE, C_ZREG, 28, 20, 0, 0, 0}, + {AAND, C_VCON, C_NONE, C_NONE, C_ZREG, 28, 20, 0, 0, 0}, + {AANDS, C_MOVCON2, C_ZREG, C_NONE, C_ZREG, 28, 12, 0, 0, 0}, + {AANDS, C_MOVCON2, C_NONE, C_NONE, C_ZREG, 28, 12, 0, 0, 0}, + {AANDS, C_MOVCON3, C_ZREG, C_NONE, C_ZREG, 28, 16, 0, 0, 0}, + {AANDS, C_MOVCON3, C_NONE, C_NONE, C_ZREG, 28, 16, 0, 0, 0}, + {AANDS, C_VCON, C_ZREG, C_NONE, C_ZREG, 28, 20, 0, 0, 0}, + {AANDS, C_VCON, C_NONE, C_NONE, C_ZREG, 28, 20, 0, 0, 0}, + {ATST, C_MOVCON2, C_ZREG, C_NONE, C_NONE, 28, 12, 0, 0, 0}, + {ATST, C_MOVCON3, C_ZREG, C_NONE, C_NONE, 28, 16, 0, 0, 0}, + {ATST, C_VCON, C_ZREG, C_NONE, C_NONE, 28, 20, 0, 0, 0}, + {AAND, C_SHIFT, C_ZREG, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AAND, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AANDS, C_SHIFT, C_ZREG, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {AANDS, C_SHIFT, C_NONE, C_NONE, C_ZREG, 3, 4, 0, 0, 0}, + {ATST, C_SHIFT, C_ZREG, C_NONE, C_NONE, 3, 4, 0, 0, 0}, + {AMOVD, C_RSP, C_NONE, C_NONE, C_RSP, 24, 4, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_ZREG, 24, 4, 0, 0, 0}, + {AMVN, C_ZREG, C_NONE, C_NONE, C_ZREG, 24, 4, 0, 0, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_ZREG, 45, 4, 0, 0, 0}, /* also MOVBU */ + {AMOVH, C_ZREG, C_NONE, C_NONE, C_ZREG, 45, 4, 0, 0, 0}, /* also MOVHU */ + {AMOVW, C_ZREG, C_NONE, C_NONE, C_ZREG, 45, 4, 0, 0, 0}, /* also MOVWU */ + /* TODO: MVN C_SHIFT */ + + /* MOVs that become MOVK/MOVN/MOVZ/ADD/SUB/OR */ + {AMOVW, C_MBCON, C_NONE, C_NONE, C_ZREG, 32, 4, 0, 0, 0}, + {AMOVD, C_MBCON, C_NONE, C_NONE, C_ZREG, 32, 4, 0, 0, 0}, + {AMOVW, C_MOVCON, C_NONE, C_NONE, C_ZREG, 32, 4, 0, 0, 0}, + {AMOVD, C_MOVCON, C_NONE, C_NONE, C_ZREG, 32, 4, 0, 0, 0}, + {AMOVW, C_BITCON, C_NONE, C_NONE, C_RSP, 32, 4, 0, 0, 0}, + {AMOVD, C_BITCON, C_NONE, C_NONE, C_RSP, 32, 4, 0, 0, 0}, + {AMOVW, C_MOVCON2, C_NONE, C_NONE, C_ZREG, 12, 8, 0, NOTUSETMP, 0}, + {AMOVD, C_MOVCON2, C_NONE, C_NONE, C_ZREG, 12, 8, 0, NOTUSETMP, 0}, + {AMOVD, C_MOVCON3, C_NONE, C_NONE, C_ZREG, 12, 12, 0, NOTUSETMP, 0}, + {AMOVD, C_VCON, C_NONE, C_NONE, C_ZREG, 12, 16, 0, NOTUSETMP, 0}, + + {AMOVK, C_VCON, C_NONE, C_NONE, C_ZREG, 33, 4, 0, 0, 0}, + {AMOVD, C_AACON, C_NONE, C_NONE, C_RSP, 4, 4, REGFROM, 0, 0}, + {AMOVD, C_AACON2, C_NONE, C_NONE, C_RSP, 4, 8, REGFROM, NOTUSETMP, 0}, + + /* load long effective stack address (load int32 offset and add) */ + {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, + + // Move a large constant to a vector register. + {AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM128, 0}, + {AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + + /* jump operations */ + {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, + {ABL, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, + {AB, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, + {ABL, C_NONE, C_NONE, C_NONE, C_ZREG, 6, 4, 0, 0, 0}, + {ABL, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, + {obj.ARET, C_NONE, C_NONE, C_NONE, C_ZREG, 6, 4, 0, 0, 0}, + {obj.ARET, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, + {ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 7, 4, 0, BRANCH19BITS, 0}, + {ACBZ, C_ZREG, C_NONE, C_NONE, C_SBRA, 39, 4, 0, BRANCH19BITS, 0}, + {ATBZ, C_VCON, C_ZREG, C_NONE, C_SBRA, 40, 4, 0, BRANCH14BITS, 0}, + {AERET, C_NONE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0}, + + // get a PC-relative address + {AADRP, C_SBRA, C_NONE, C_NONE, C_ZREG, 60, 4, 0, 0, 0}, + {AADR, C_SBRA, C_NONE, C_NONE, C_ZREG, 61, 4, 0, 0, 0}, + + {ACLREX, C_NONE, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0}, + {ACLREX, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0}, + {ABFM, C_VCON, C_ZREG, C_VCON, C_ZREG, 42, 4, 0, 0, 0}, + {ABFI, C_VCON, C_ZREG, C_VCON, C_ZREG, 43, 4, 0, 0, 0}, + {AEXTR, C_VCON, C_ZREG, C_ZREG, C_ZREG, 44, 4, 0, 0, 0}, + {ASXTB, C_ZREG, C_NONE, C_NONE, C_ZREG, 45, 4, 0, 0, 0}, + {ACLS, C_ZREG, C_NONE, C_NONE, C_ZREG, 46, 4, 0, 0, 0}, + {ALSL, C_VCON, C_ZREG, C_NONE, C_ZREG, 8, 4, 0, 0, 0}, + {ALSL, C_VCON, C_NONE, C_NONE, C_ZREG, 8, 4, 0, 0, 0}, + {ALSL, C_ZREG, C_NONE, C_NONE, C_ZREG, 9, 4, 0, 0, 0}, + {ALSL, C_ZREG, C_ZREG, C_NONE, C_ZREG, 9, 4, 0, 0, 0}, + {ASVC, C_VCON, C_NONE, C_NONE, C_NONE, 10, 4, 0, 0, 0}, + {ASVC, C_NONE, C_NONE, C_NONE, C_NONE, 10, 4, 0, 0, 0}, + {ADWORD, C_NONE, C_NONE, C_NONE, C_VCON, 11, 8, 0, NOTUSETMP, 0}, + {ADWORD, C_NONE, C_NONE, C_NONE, C_LEXT, 11, 8, 0, NOTUSETMP, 0}, + {ADWORD, C_NONE, C_NONE, C_NONE, C_ADDR, 11, 8, 0, NOTUSETMP, 0}, + {ADWORD, C_NONE, C_NONE, C_NONE, C_LACON, 11, 8, 0, NOTUSETMP, 0}, + {AWORD, C_NONE, C_NONE, C_NONE, C_LCON, 14, 4, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_NONE, C_LEXT, 14, 4, 0, 0, 0}, + {AWORD, C_NONE, C_NONE, C_NONE, C_ADDR, 14, 4, 0, 0, 0}, + {AMOVW, C_VCONADDR, C_NONE, C_NONE, C_ZREG, 68, 8, 0, NOTUSETMP, 0}, + {AMOVD, C_VCONADDR, C_NONE, C_NONE, C_ZREG, 68, 8, 0, NOTUSETMP, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AMOVB, C_ADDR, C_NONE, C_NONE, C_ZREG, 65, 12, 0, 0, 0}, + {AMOVH, C_ADDR, C_NONE, C_NONE, C_ZREG, 65, 12, 0, 0, 0}, + {AMOVW, C_ADDR, C_NONE, C_NONE, C_ZREG, 65, 12, 0, 0, 0}, + {AMOVD, C_ADDR, C_NONE, C_NONE, C_ZREG, 65, 12, 0, 0, 0}, + {AMOVD, C_GOTADDR, C_NONE, C_NONE, C_ZREG, 71, 8, 0, 0, 0}, + {AMOVD, C_TLS_LE, C_NONE, C_NONE, C_ZREG, 69, 4, 0, 0, 0}, + {AMOVD, C_TLS_IE, C_NONE, C_NONE, C_ZREG, 70, 8, 0, 0, 0}, + + {AFMOVS, C_FREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AFMOVS, C_ADDR, C_NONE, C_NONE, C_FREG, 65, 12, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AFMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, 65, 12, 0, 0, 0}, + {AFMOVS, C_FCON, C_NONE, C_NONE, C_FREG, 55, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMOVD, C_FCON, C_NONE, C_NONE, C_FREG, 55, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMOVS, C_ZREG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_ZREG, 29, 4, 0, 0, 0}, + {AFMOVD, C_ZREG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_ZREG, 29, 4, 0, 0, 0}, + {AFCVTZSD, C_FREG, C_NONE, C_NONE, C_ZREG, 29, 4, 0, 0, 0}, + {ASCVTFD, C_ZREG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFCVTSD, C_FREG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_NONE, C_ZREG, 73, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_NONE, C_ELEM, 92, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_NONE, C_VREG, 80, 4, 0, 0, 0}, + {AVMOV, C_ZREG, C_NONE, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, + {AVMOV, C_ZREG, C_NONE, C_NONE, C_ELEM, 78, 4, 0, 0, 0}, + {AVMOV, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, + {AVDUP, C_ELEM, C_NONE, C_NONE, C_ARNG, 79, 4, 0, 0, 0}, + {AVDUP, C_ELEM, C_NONE, C_NONE, C_VREG, 80, 4, 0, 0, 0}, + {AVDUP, C_ZREG, C_NONE, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, + {AVMOVI, C_ADDCON, C_NONE, C_NONE, C_ARNG, 86, 4, 0, 0, 0}, + {AVFMLA, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVEXT, C_VCON, C_ARNG, C_ARNG, C_ARNG, 94, 4, 0, 0, 0}, + {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0}, + {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0}, + {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, + {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, + {AVUADDW, C_ARNG, C_ARNG, C_NONE, C_ARNG, 105, 4, 0, 0, 0}, + + /* conditional operations */ + {ACSEL, C_COND, C_ZREG, C_ZREG, C_ZREG, 18, 4, 0, 0, 0}, + {ACINC, C_COND, C_ZREG, C_NONE, C_ZREG, 18, 4, 0, 0, 0}, + {ACSET, C_COND, C_NONE, C_NONE, C_ZREG, 18, 4, 0, 0, 0}, + {AFCSELD, C_COND, C_FREG, C_FREG, C_FREG, 18, 4, 0, 0, 0}, + {ACCMN, C_COND, C_ZREG, C_ZREG, C_VCON, 19, 4, 0, 0, 0}, + {ACCMN, C_COND, C_ZREG, C_VCON, C_VCON, 19, 4, 0, 0, 0}, + {AFCCMPS, C_COND, C_FREG, C_FREG, C_VCON, 57, 4, 0, 0, 0}, + + /* scaled 12-bit unsigned displacement store */ + {AMOVB, C_ZREG, C_NONE, C_NONE, C_UAUTO4K, 20, 4, REGSP, 0, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_UOREG4K, 20, 4, 0, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_UAUTO8K, 20, 4, REGSP, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_UOREG8K, 20, 4, 0, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, + + {AFMOVS, C_FREG, C_NONE, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UAUTO64K, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UOREG64K, 20, 4, 0, 0, 0}, + + /* unscaled 9-bit signed displacement store */ + {AMOVB, C_ZREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + + {AFMOVS, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + + /* scaled 12-bit unsigned displacement load */ + {AMOVB, C_UAUTO4K, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVB, C_UOREG4K, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVH, C_UAUTO8K, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVH, C_UOREG8K, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVW, C_UAUTO16K, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVW, C_UOREG16K, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVD, C_UAUTO32K, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVD, C_UOREG32K, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + + {AFMOVS, C_UAUTO16K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVS, C_UOREG16K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVD, C_UAUTO32K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVD, C_UOREG32K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_UAUTO64K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_UOREG64K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + + /* unscaled 9-bit signed displacement load */ + {AMOVB, C_NSAUTO, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVB, C_NSOREG, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVH, C_NSAUTO, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVH, C_NSOREG, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVW, C_NSAUTO, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVW, C_NSOREG, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + {AMOVD, C_NSAUTO, C_NONE, C_NONE, C_ZREG, 21, 4, REGSP, 0, 0}, + {AMOVD, C_NSOREG, C_NONE, C_NONE, C_ZREG, 21, 4, 0, 0, 0}, + + {AFMOVS, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVS, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVD, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVD, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + + /* long displacement store */ + {AMOVB, C_ZREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + + {AFMOVS, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + + /* long displacement load */ + {AMOVB, C_LAUTO, C_NONE, C_NONE, C_ZREG, 31, 8, REGSP, LFROM, 0}, + {AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, 31, 8, 0, LFROM, 0}, + {AMOVH, C_LAUTO, C_NONE, C_NONE, C_ZREG, 31, 8, REGSP, LFROM, 0}, + {AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, 31, 8, 0, LFROM, 0}, + {AMOVW, C_LAUTO, C_NONE, C_NONE, C_ZREG, 31, 8, REGSP, LFROM, 0}, + {AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, 31, 8, 0, LFROM, 0}, + {AMOVD, C_LAUTO, C_NONE, C_NONE, C_ZREG, 31, 8, REGSP, LFROM, 0}, + {AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, 31, 8, 0, LFROM, 0}, + + {AFMOVS, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + {AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + {AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + + /* pre/post-indexed load (unscaled, signed 9-bit offset) */ + {AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPOST}, + {AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPOST}, + {AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPOST}, + {AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + + {AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPRE}, + {AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPRE}, + {AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPRE}, + {AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + + /* pre/post-indexed store (unscaled, signed 9-bit offset) */ + {AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + + {AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + + /* load with shifted or extended register offset */ + {AMOVD, C_ROFF, C_NONE, C_NONE, C_ZREG, 98, 4, 0, 0, 0}, + {AMOVW, C_ROFF, C_NONE, C_NONE, C_ZREG, 98, 4, 0, 0, 0}, + {AMOVH, C_ROFF, C_NONE, C_NONE, C_ZREG, 98, 4, 0, 0, 0}, + {AMOVB, C_ROFF, C_NONE, C_NONE, C_ZREG, 98, 4, 0, 0, 0}, + {AFMOVS, C_ROFF, C_NONE, C_NONE, C_FREG, 98, 4, 0, 0, 0}, + {AFMOVD, C_ROFF, C_NONE, C_NONE, C_FREG, 98, 4, 0, 0, 0}, + + /* store with extended register offset */ + {AMOVD, C_ZREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AMOVW, C_ZREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AMOVH, C_ZREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AMOVB, C_ZREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, + + /* pre/post-indexed/signed-offset load/store register pair + (unscaled, signed 10-bit quad-aligned and long offset). + The pre/post-indexed format only supports OREG cases because + the RSP and pseudo registers are not allowed to be modified + in this way. */ + {AFLDPQ, C_NQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {AFLDPQ, C_PQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {AFLDPQ, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {AFLDPQ, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {AFLDPQ, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0}, + {AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {AFLDPQ, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {AFLDPQ, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {AFLDPQ, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0}, + {AFLDPQ, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0}, + + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQAUTO_16, 67, 4, REGSP, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQAUTO_16, 67, 4, REGSP, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 8, REGSP, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, C_XPRE}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, C_XPOST}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, C_XPRE}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, C_XPOST}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0}, + {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, + + {ALDP, C_NPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {ALDP, C_PPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {ALDP, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {ALDP, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {ALDP, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0}, + {ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {ALDP, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {ALDP, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {ALDP, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0}, + {ALDP, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0}, + + {ASTP, C_PAIR, C_NONE, C_NONE, C_NPAUTO, 67, 4, REGSP, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_PPAUTO, 67, 4, REGSP, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 8, REGSP, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, C_XPRE}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, C_XPOST}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, 67, 4, 0, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, 67, 4, 0, 0, C_XPRE}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, 67, 4, 0, 0, C_XPOST}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0}, + {ASTP, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, + + // differ from LDP/STP for C_NSAUTO_4/C_PSAUTO_4/C_NSOREG_4/C_PSOREG_4 + {ALDPW, C_NSAUTO_4, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {ALDPW, C_PSAUTO_4, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, + {ALDPW, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {ALDPW, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0}, + {ALDPW, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0}, + {ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0}, + {ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, + {ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, + {ALDPW, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {ALDPW, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0}, + {ALDPW, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0}, + {ALDPW, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0}, + + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NSAUTO_4, 67, 4, REGSP, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_PSAUTO_4, 67, 4, REGSP, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 8, REGSP, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, C_XPRE}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, C_XPOST}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, 67, 4, 0, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, 67, 4, 0, 0, C_XPRE}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, 67, 4, 0, 0, C_XPOST}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0}, + {ASTPW, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, + + {ASWPD, C_ZREG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0}, // RegTo2=C_REG + {ASWPD, C_ZREG, C_NONE, C_NONE, C_ZAUTO, 47, 4, REGSP, 0, 0}, // RegTo2=C_REG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZOREG, 106, 4, 0, 0, 0}, // RegTo2=C_REGREG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZAUTO, 106, 4, REGSP, 0, 0}, // RegTo2=C_REGREG + {ALDAR, C_ZOREG, C_NONE, C_NONE, C_ZREG, 58, 4, 0, 0, 0}, + {ALDXR, C_ZOREG, C_NONE, C_NONE, C_ZREG, 58, 4, 0, 0, 0}, + {ALDAXR, C_ZOREG, C_NONE, C_NONE, C_ZREG, 58, 4, 0, 0, 0}, + {ALDXP, C_ZOREG, C_NONE, C_NONE, C_PAIR, 58, 4, 0, 0, 0}, + {ASTLR, C_ZREG, C_NONE, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, // RegTo2=C_NONE + {ASTXR, C_ZREG, C_NONE, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, // RegTo2=C_REG + {ASTLXR, C_ZREG, C_NONE, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, // RegTo2=C_REG + {ASTXP, C_PAIR, C_NONE, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, + + /* VLD[1-4]/VST[1-4] */ + {AVLD1, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, + {AVLD1, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1R, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, + {AVLD1R, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1R, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, + {AVLD1, C_ROFF, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, + {AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, 0}, + {AVST1, C_LIST, C_NONE, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST1, C_LIST, C_NONE, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVST1, C_LIST, C_NONE, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST2, C_LIST, C_NONE, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST2, C_LIST, C_NONE, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVST2, C_LIST, C_NONE, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST3, C_LIST, C_NONE, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST3, C_LIST, C_NONE, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVST3, C_LIST, C_NONE, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST4, C_LIST, C_NONE, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST4, C_LIST, C_NONE, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVST4, C_LIST, C_NONE, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_NONE, C_LOREG, 96, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_NONE, C_ROFF, 96, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_NONE, C_LOREG, 96, 4, 0, 0, 0}, + + /* special */ + {AMOVD, C_SPR, C_NONE, C_NONE, C_ZREG, 35, 4, 0, 0, 0}, + {AMRS, C_SPR, C_NONE, C_NONE, C_ZREG, 35, 4, 0, 0, 0}, + {AMOVD, C_ZREG, C_NONE, C_NONE, C_SPR, 36, 4, 0, 0, 0}, + {AMSR, C_ZREG, C_NONE, C_NONE, C_SPR, 36, 4, 0, 0, 0}, + {AMOVD, C_VCON, C_NONE, C_NONE, C_SPR, 37, 4, 0, 0, 0}, + {AMSR, C_VCON, C_NONE, C_NONE, C_SPR, 37, 4, 0, 0, 0}, + {AMSR, C_VCON, C_NONE, C_NONE, C_SPOP, 37, 4, 0, 0, 0}, + {APRFM, C_UOREG32K, C_NONE, C_NONE, C_SPOP, 91, 4, 0, 0, 0}, + {APRFM, C_UOREG32K, C_NONE, C_NONE, C_LCON, 91, 4, 0, 0, 0}, + {ADMB, C_VCON, C_NONE, C_NONE, C_NONE, 51, 4, 0, 0, 0}, + {AHINT, C_VCON, C_NONE, C_NONE, C_NONE, 52, 4, 0, 0, 0}, + {ASYS, C_VCON, C_NONE, C_NONE, C_NONE, 50, 4, 0, 0, 0}, + {ASYS, C_VCON, C_NONE, C_NONE, C_ZREG, 50, 4, 0, 0, 0}, + {ASYSL, C_VCON, C_NONE, C_NONE, C_ZREG, 50, 4, 0, 0, 0}, + {ATLBI, C_SPOP, C_NONE, C_NONE, C_NONE, 107, 4, 0, 0, 0}, + {ATLBI, C_SPOP, C_NONE, C_NONE, C_ZREG, 107, 4, 0, 0, 0}, + + /* encryption instructions */ + {AAESD, C_VREG, C_NONE, C_NONE, C_VREG, 29, 4, 0, 0, 0}, // for compatibility with old code + {AAESD, C_ARNG, C_NONE, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, // recommend using the new one for better readability + {ASHA1C, C_VREG, C_ZREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, + {ASHA1C, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, + {ASHA1H, C_VREG, C_NONE, C_NONE, C_VREG, 29, 4, 0, 0, 0}, + {ASHA1SU0, C_ARNG, C_ARNG, C_NONE, C_ARNG, 1, 4, 0, 0, 0}, + {ASHA256H, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, + {AVREV32, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, + {AVPMULL, C_ARNG, C_ARNG, C_NONE, C_ARNG, 93, 4, 0, 0, 0}, + {AVEOR3, C_ARNG, C_ARNG, C_ARNG, C_ARNG, 103, 4, 0, 0, 0}, + {AVXAR, C_VCON, C_ARNG, C_ARNG, C_ARNG, 104, 4, 0, 0, 0}, + + {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, + {obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0}, + {obj.AFUNCDATA, C_VCON, C_NONE, C_NONE, C_ADDR, 0, 0, 0, 0, 0}, + {obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_ZREG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ANOP, C_VREG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, + {obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as AB/ABL + {obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as AB/ABL + {obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // align code + + {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0}, +} + +// Valid pstate field values, and value to use in instruction. +// Doesn't include special registers. +var pstatefield = []struct { + opd SpecialOperand + enc uint32 +}{ + {SPOP_DAIFSet, 3<<16 | 4<<12 | 6<<5}, + {SPOP_DAIFClr, 3<<16 | 4<<12 | 7<<5}, +} + +var prfopfield = map[SpecialOperand]uint32{ + SPOP_PLDL1KEEP: 0, + SPOP_PLDL1STRM: 1, + SPOP_PLDL2KEEP: 2, + SPOP_PLDL2STRM: 3, + SPOP_PLDL3KEEP: 4, + SPOP_PLDL3STRM: 5, + SPOP_PLIL1KEEP: 8, + SPOP_PLIL1STRM: 9, + SPOP_PLIL2KEEP: 10, + SPOP_PLIL2STRM: 11, + SPOP_PLIL3KEEP: 12, + SPOP_PLIL3STRM: 13, + SPOP_PSTL1KEEP: 16, + SPOP_PSTL1STRM: 17, + SPOP_PSTL2KEEP: 18, + SPOP_PSTL2STRM: 19, + SPOP_PSTL3KEEP: 20, + SPOP_PSTL3STRM: 21, +} + +// sysInstFields helps convert SYS alias instructions to SYS instructions. +// For example, the format of TLBI is: TLBI {, }. +// It's equivalent to: SYS #, C8, , #{, }. +// The field hasOperand2 indicates whether Xt is required. It helps to check +// some combinations that may be undefined, such as TLBI VMALLE1IS, R0. +var sysInstFields = map[SpecialOperand]struct { + op1 uint8 + cn uint8 + cm uint8 + op2 uint8 + hasOperand2 bool +}{ + // TLBI + SPOP_VMALLE1IS: {0, 8, 3, 0, false}, + SPOP_VAE1IS: {0, 8, 3, 1, true}, + SPOP_ASIDE1IS: {0, 8, 3, 2, true}, + SPOP_VAAE1IS: {0, 8, 3, 3, true}, + SPOP_VALE1IS: {0, 8, 3, 5, true}, + SPOP_VAALE1IS: {0, 8, 3, 7, true}, + SPOP_VMALLE1: {0, 8, 7, 0, false}, + SPOP_VAE1: {0, 8, 7, 1, true}, + SPOP_ASIDE1: {0, 8, 7, 2, true}, + SPOP_VAAE1: {0, 8, 7, 3, true}, + SPOP_VALE1: {0, 8, 7, 5, true}, + SPOP_VAALE1: {0, 8, 7, 7, true}, + SPOP_IPAS2E1IS: {4, 8, 0, 1, true}, + SPOP_IPAS2LE1IS: {4, 8, 0, 5, true}, + SPOP_ALLE2IS: {4, 8, 3, 0, false}, + SPOP_VAE2IS: {4, 8, 3, 1, true}, + SPOP_ALLE1IS: {4, 8, 3, 4, false}, + SPOP_VALE2IS: {4, 8, 3, 5, true}, + SPOP_VMALLS12E1IS: {4, 8, 3, 6, false}, + SPOP_IPAS2E1: {4, 8, 4, 1, true}, + SPOP_IPAS2LE1: {4, 8, 4, 5, true}, + SPOP_ALLE2: {4, 8, 7, 0, false}, + SPOP_VAE2: {4, 8, 7, 1, true}, + SPOP_ALLE1: {4, 8, 7, 4, false}, + SPOP_VALE2: {4, 8, 7, 5, true}, + SPOP_VMALLS12E1: {4, 8, 7, 6, false}, + SPOP_ALLE3IS: {6, 8, 3, 0, false}, + SPOP_VAE3IS: {6, 8, 3, 1, true}, + SPOP_VALE3IS: {6, 8, 3, 5, true}, + SPOP_ALLE3: {6, 8, 7, 0, false}, + SPOP_VAE3: {6, 8, 7, 1, true}, + SPOP_VALE3: {6, 8, 7, 5, true}, + SPOP_VMALLE1OS: {0, 8, 1, 0, false}, + SPOP_VAE1OS: {0, 8, 1, 1, true}, + SPOP_ASIDE1OS: {0, 8, 1, 2, true}, + SPOP_VAAE1OS: {0, 8, 1, 3, true}, + SPOP_VALE1OS: {0, 8, 1, 5, true}, + SPOP_VAALE1OS: {0, 8, 1, 7, true}, + SPOP_RVAE1IS: {0, 8, 2, 1, true}, + SPOP_RVAAE1IS: {0, 8, 2, 3, true}, + SPOP_RVALE1IS: {0, 8, 2, 5, true}, + SPOP_RVAALE1IS: {0, 8, 2, 7, true}, + SPOP_RVAE1OS: {0, 8, 5, 1, true}, + SPOP_RVAAE1OS: {0, 8, 5, 3, true}, + SPOP_RVALE1OS: {0, 8, 5, 5, true}, + SPOP_RVAALE1OS: {0, 8, 5, 7, true}, + SPOP_RVAE1: {0, 8, 6, 1, true}, + SPOP_RVAAE1: {0, 8, 6, 3, true}, + SPOP_RVALE1: {0, 8, 6, 5, true}, + SPOP_RVAALE1: {0, 8, 6, 7, true}, + SPOP_RIPAS2E1IS: {4, 8, 0, 2, true}, + SPOP_RIPAS2LE1IS: {4, 8, 0, 6, true}, + SPOP_ALLE2OS: {4, 8, 1, 0, false}, + SPOP_VAE2OS: {4, 8, 1, 1, true}, + SPOP_ALLE1OS: {4, 8, 1, 4, false}, + SPOP_VALE2OS: {4, 8, 1, 5, true}, + SPOP_VMALLS12E1OS: {4, 8, 1, 6, false}, + SPOP_RVAE2IS: {4, 8, 2, 1, true}, + SPOP_RVALE2IS: {4, 8, 2, 5, true}, + SPOP_IPAS2E1OS: {4, 8, 4, 0, true}, + SPOP_RIPAS2E1: {4, 8, 4, 2, true}, + SPOP_RIPAS2E1OS: {4, 8, 4, 3, true}, + SPOP_IPAS2LE1OS: {4, 8, 4, 4, true}, + SPOP_RIPAS2LE1: {4, 8, 4, 6, true}, + SPOP_RIPAS2LE1OS: {4, 8, 4, 7, true}, + SPOP_RVAE2OS: {4, 8, 5, 1, true}, + SPOP_RVALE2OS: {4, 8, 5, 5, true}, + SPOP_RVAE2: {4, 8, 6, 1, true}, + SPOP_RVALE2: {4, 8, 6, 5, true}, + SPOP_ALLE3OS: {6, 8, 1, 0, false}, + SPOP_VAE3OS: {6, 8, 1, 1, true}, + SPOP_VALE3OS: {6, 8, 1, 5, true}, + SPOP_RVAE3IS: {6, 8, 2, 1, true}, + SPOP_RVALE3IS: {6, 8, 2, 5, true}, + SPOP_RVAE3OS: {6, 8, 5, 1, true}, + SPOP_RVALE3OS: {6, 8, 5, 5, true}, + SPOP_RVAE3: {6, 8, 6, 1, true}, + SPOP_RVALE3: {6, 8, 6, 5, true}, + // DC + SPOP_IVAC: {0, 7, 6, 1, true}, + SPOP_ISW: {0, 7, 6, 2, true}, + SPOP_CSW: {0, 7, 10, 2, true}, + SPOP_CISW: {0, 7, 14, 2, true}, + SPOP_ZVA: {3, 7, 4, 1, true}, + SPOP_CVAC: {3, 7, 10, 1, true}, + SPOP_CVAU: {3, 7, 11, 1, true}, + SPOP_CIVAC: {3, 7, 14, 1, true}, + SPOP_IGVAC: {0, 7, 6, 3, true}, + SPOP_IGSW: {0, 7, 6, 4, true}, + SPOP_IGDVAC: {0, 7, 6, 5, true}, + SPOP_IGDSW: {0, 7, 6, 6, true}, + SPOP_CGSW: {0, 7, 10, 4, true}, + SPOP_CGDSW: {0, 7, 10, 6, true}, + SPOP_CIGSW: {0, 7, 14, 4, true}, + SPOP_CIGDSW: {0, 7, 14, 6, true}, + SPOP_GVA: {3, 7, 4, 3, true}, + SPOP_GZVA: {3, 7, 4, 4, true}, + SPOP_CGVAC: {3, 7, 10, 3, true}, + SPOP_CGDVAC: {3, 7, 10, 5, true}, + SPOP_CGVAP: {3, 7, 12, 3, true}, + SPOP_CGDVAP: {3, 7, 12, 5, true}, + SPOP_CGVADP: {3, 7, 13, 3, true}, + SPOP_CGDVADP: {3, 7, 13, 5, true}, + SPOP_CIGVAC: {3, 7, 14, 3, true}, + SPOP_CIGDVAC: {3, 7, 14, 5, true}, + SPOP_CVAP: {3, 7, 12, 1, true}, + SPOP_CVADP: {3, 7, 13, 1, true}, +} + +// Used for padinng NOOP instruction +const OP_NOOP = 0xd503201f + +// align code to a certain length by padding bytes. +func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int { + if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) { + ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue) + } + return int(-pc & (alignedValue - 1)) +} + +// size returns the size of the sequence of machine instructions when p is encoded with o. +// Usually it just returns o.size directly, in some cases it checks whether the optimization +// conditions are met, and if so returns the size of the optimized instruction sequence. +// These optimizations need to be synchronized with the asmout function. +func (o *Optab) size(ctxt *obj.Link, p *obj.Prog) int { + // Optimize adrp+add+ld/st to adrp+ld/st(offset). + sz := movesize(p.As) + if sz != -1 { + // Relocations R_AARCH64_LDST{64,32,16,8}_ABS_LO12_NC can only generate 8-byte, 4-byte, + // 2-byte and 1-byte aligned addresses, so the address of load/store must be aligned. + // Also symbols with prefix of "go:string." are Go strings, which will go into + // the symbol table, their addresses are not necessary aligned, rule this out. + align := int64(1 << sz) + if o.a1 == C_ADDR && p.From.Offset%align == 0 && !strings.HasPrefix(p.From.Sym.Name, "go:string.") || + o.a4 == C_ADDR && p.To.Offset%align == 0 && !strings.HasPrefix(p.To.Sym.Name, "go:string.") { + return 8 + } + } + return int(o.size_) +} + +func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on arm64") + ctxt.Retpoline = false // don't keep printing + } + + p := cursym.Func().Text + if p == nil || p.Link == nil { // handle external functions and ELF section symbols + return + } + + if oprange[AAND&obj.AMask] == nil { + ctxt.Diag("arm64 ops not initialized, call arm64.buildop first") + } + + c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)} + p.To.Offset &= 0xffffffff // extrasize is no longer needed + + bflag := 1 + pc := int64(0) + p.Pc = pc + var m int + var o *Optab + for p = p.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + m = o.size(c.ctxt, p) + if m == 0 { + switch p.As { + case obj.APCALIGN: + alignedValue := p.From.Offset + m = pcAlignPadLength(pc, alignedValue, ctxt) + // Update the current text symbol alignment value. + if int32(alignedValue) > cursym.Func().Align { + cursym.Func().Align = int32(alignedValue) + } + break + case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: + continue + default: + c.ctxt.Diag("zero-width instruction\n%v", p) + } + } + pc += int64(m) + + if o.flag&LFROM != 0 { + c.addpool(p, &p.From) + } + if o.flag&LFROM128 != 0 { + c.addpool128(p, &p.From, p.GetFrom3()) + } + if o.flag<O != 0 { + c.addpool(p, &p.To) + } + if c.blitrl != nil { + c.checkpool(p) + } + } + + c.cursym.Size = pc + + /* + * if any procedure is large enough to + * generate a large SBRA branch, then + * generate extra passes putting branches + * around jmps to fix. this is rare. + */ + for bflag != 0 { + bflag = 0 + pc = 0 + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { + p.Pc = pc + o = c.oplook(p) + + /* very large branches */ + if (o.flag&BRANCH14BITS != 0 || o.flag&BRANCH19BITS != 0) && p.To.Target() != nil { + otxt := p.To.Target().Pc - pc + var toofar bool + if o.flag&BRANCH14BITS != 0 { // branch instruction encodes 14 bits + toofar = otxt <= -(1<<15)+10 || otxt >= (1<<15)-10 + } else if o.flag&BRANCH19BITS != 0 { // branch instruction encodes 19 bits + toofar = otxt <= -(1<<20)+10 || otxt >= (1<<20)-10 + } + if toofar { + q := c.newprog() + q.Link = p.Link + p.Link = q + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.To.Target()) + p.To.SetTarget(q) + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(q.Link.Link) + bflag = 1 + } + } + m = o.size(c.ctxt, p) + + if m == 0 { + switch p.As { + case obj.APCALIGN: + alignedValue := p.From.Offset + m = pcAlignPadLength(pc, alignedValue, ctxt) + break + case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: + continue + default: + c.ctxt.Diag("zero-width instruction\n%v", p) + } + } + + pc += int64(m) + } + } + + pc += -pc & (funcAlign - 1) + c.cursym.Size = pc + + /* + * lay out the code, emitting code and data relocations. + */ + c.cursym.Grow(c.cursym.Size) + bp := c.cursym.P + psz := int32(0) + var i int + var out [6]uint32 + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { + c.pc = p.Pc + o = c.oplook(p) + sz := o.size(c.ctxt, p) + if sz > 4*len(out) { + log.Fatalf("out array in span7 is too small, need at least %d for %v", sz/4, p) + } + if p.As == obj.APCALIGN { + alignedValue := p.From.Offset + v := pcAlignPadLength(p.Pc, alignedValue, c.ctxt) + for i = 0; i < int(v/4); i++ { + // emit ANOOP instruction by the padding size + c.ctxt.Arch.ByteOrder.PutUint32(bp, OP_NOOP) + bp = bp[4:] + psz += 4 + } + } else { + c.asmout(p, o, out[:]) + for i = 0; i < sz/4; i++ { + c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i]) + bp = bp[4:] + psz += 4 + } + } + } + + // Mark nonpreemptible instruction sequences. + // We use REGTMP as a scratch register during call injection, + // so instruction sequences that use REGTMP are unsafe to + // preempt asynchronously. + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) + + // Now that we know byte offsets, we can generate jump table entries. + for _, jt := range cursym.Func().JumpTables { + for i, p := range jt.Targets { + // The ith jumptable entry points to the p.Pc'th + // byte in the function symbol s. + // TODO: try using relative PCs. + jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc) + } + } +} + +// isUnsafePoint returns whether p is an unsafe point. +func (c *ctxt7) isUnsafePoint(p *obj.Prog) bool { + // If p explicitly uses REGTMP, it's unsafe to preempt, because the + // preemption sequence clobbers REGTMP. + return p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP || + p.From.Type == obj.TYPE_REGREG && p.From.Offset == REGTMP || + p.To.Type == obj.TYPE_REGREG && p.To.Offset == REGTMP +} + +// isRestartable returns whether p is a multi-instruction sequence that, +// if preempted, can be restarted. +func (c *ctxt7) isRestartable(p *obj.Prog) bool { + if c.isUnsafePoint(p) { + return false + } + // If p is a multi-instruction sequence with uses REGTMP inserted by + // the assembler in order to materialize a large constant/offset, we + // can restart p (at the start of the instruction sequence), recompute + // the content of REGTMP, upon async preemption. Currently, all cases + // of assembler-inserted REGTMP fall into this category. + // If p doesn't use REGTMP, it can be simply preempted, so we don't + // mark it. + o := c.oplook(p) + return o.size(c.ctxt, p) > 4 && o.flag&NOTUSETMP == 0 +} + +/* + * when the first reference to the literal pool threatens + * to go out of range of a 1Mb PC-relative offset + * drop the pool now. + */ +func (c *ctxt7) checkpool(p *obj.Prog) { + // If the pool is going to go out of range or p is the last instruction of the function, + // flush the pool. + if c.pool.size >= 0xffff0 || !ispcdisp(int32(p.Pc+4+int64(c.pool.size)-int64(c.pool.start)+8)) || p.Link == nil { + c.flushpool(p) + } +} + +func (c *ctxt7) flushpool(p *obj.Prog) { + // Needs to insert a branch before flushing the pool. + // We don't need the jump if following an unconditional branch. + // TODO: other unconditional operations. + if !(p.As == AB || p.As == obj.ARET || p.As == AERET) { + if c.ctxt.Debugvlog { + fmt.Printf("note: flush literal pool at %#x: len=%d ref=%x\n", uint64(p.Pc+4), c.pool.size, c.pool.start) + } + q := c.newprog() + if p.Link == nil { + // If p is the last instruction of the function, insert an UNDEF instruction in case the + // exection fall through to the pool. + q.As = obj.AUNDEF + } else { + // Else insert a branch to the next instruction of p. + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.Link) + } + q.Link = c.blitrl + q.Pos = p.Pos + c.blitrl = q + } + + // The line number for constant pool entries doesn't really matter. + // We set it to the line number of the preceding instruction so that + // there are no deltas to encode in the pc-line tables. + for q := c.blitrl; q != nil; q = q.Link { + q.Pos = p.Pos + } + + c.elitrl.Link = p.Link + p.Link = c.blitrl + + c.blitrl = nil /* BUG: should refer back to values until out-of-range */ + c.elitrl = nil + c.pool.size = 0 + c.pool.start = 0 +} + +// addpool128 adds a 128-bit constant to literal pool by two consecutive DWORD +// instructions, the 128-bit constant is formed by ah.Offset<<64+al.Offset. +func (c *ctxt7) addpool128(p *obj.Prog, al, ah *obj.Addr) { + q := c.newprog() + q.As = ADWORD + q.To.Type = obj.TYPE_CONST + q.To.Offset = al.Offset // q.Pc is lower than t.Pc, so al.Offset is stored in q. + + t := c.newprog() + t.As = ADWORD + t.To.Type = obj.TYPE_CONST + t.To.Offset = ah.Offset + + q.Link = t + + if c.blitrl == nil { + c.blitrl = q + c.pool.start = uint32(p.Pc) + } else { + c.elitrl.Link = q + } + + c.elitrl = t + c.pool.size = roundUp(c.pool.size, 16) + c.pool.size += 16 + p.Pool = q +} + +/* + * MOVD foo(SB), R is actually + * MOVD addr, REGTMP + * MOVD REGTMP, R + * where addr is the address of the DWORD containing the address of foo. + * + * TODO: hash + */ +func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { + cls := c.aclass(a) + lit := c.instoffset + t := c.newprog() + t.As = AWORD + sz := 4 + + if a.Type == obj.TYPE_CONST { + if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD { + // out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit. + t.As = ADWORD + sz = 8 + } // else store 32-bit + } else if p.As == AMOVD && a.Type != obj.TYPE_MEM || cls == C_ADDR || cls == C_VCON || lit != int64(int32(lit)) || uint64(lit) != uint64(uint32(lit)) { + // conservative: don't know if we want signed or unsigned extension. + // in case of ambiguity, store 64-bit + t.As = ADWORD + sz = 8 + } + + t.To.Type = obj.TYPE_CONST + t.To.Offset = lit + + for q := c.blitrl; q != nil; q = q.Link { /* could hash on t.t0.offset */ + if q.To == t.To { + p.Pool = q + return + } + } + + if c.blitrl == nil { + c.blitrl = t + c.pool.start = uint32(p.Pc) + } else { + c.elitrl.Link = t + } + c.elitrl = t + if t.As == ADWORD { + // make DWORD 8-byte aligned, this is not required by ISA, + // just to avoid performance penalties when loading from + // the constant pool across a cache line. + c.pool.size = roundUp(c.pool.size, 8) + } + c.pool.size += uint32(sz) + p.Pool = t +} + +// roundUp rounds up x to "to". +func roundUp(x, to uint32) uint32 { + if to == 0 || to&(to-1) != 0 { + log.Fatalf("rounded up to a value that is not a power of 2: %d\n", to) + } + return (x + to - 1) &^ (to - 1) +} + +func (c *ctxt7) regoff(a *obj.Addr) uint32 { + c.instoffset = 0 + c.aclass(a) + return uint32(c.instoffset) +} + +func isSTLXRop(op obj.As) bool { + switch op { + case ASTLXR, ASTLXRW, ASTLXRB, ASTLXRH, + ASTXR, ASTXRW, ASTXRB, ASTXRH: + return true + } + return false +} + +func isSTXPop(op obj.As) bool { + switch op { + case ASTXP, ASTLXP, ASTXPW, ASTLXPW: + return true + } + return false +} + +func isANDop(op obj.As) bool { + switch op { + case AAND, AORR, AEOR, AANDS, ATST, + ABIC, AEON, AORN, ABICS: + return true + } + return false +} + +func isANDWop(op obj.As) bool { + switch op { + case AANDW, AORRW, AEORW, AANDSW, ATSTW, + ABICW, AEONW, AORNW, ABICSW: + return true + } + return false +} + +func isADDop(op obj.As) bool { + switch op { + case AADD, AADDS, ASUB, ASUBS, ACMN, ACMP: + return true + } + return false +} + +func isADDWop(op obj.As) bool { + switch op { + case AADDW, AADDSW, ASUBW, ASUBSW, ACMNW, ACMPW: + return true + } + return false +} + +func isADDSop(op obj.As) bool { + switch op { + case AADDS, AADDSW, ASUBS, ASUBSW: + return true + } + return false +} + +func isNEGop(op obj.As) bool { + switch op { + case ANEG, ANEGW, ANEGS, ANEGSW: + return true + } + return false +} + +func isRegShiftOrExt(a *obj.Addr) bool { + return (a.Index-obj.RBaseARM64)®_EXT != 0 || (a.Index-obj.RBaseARM64)®_LSL != 0 +} + +// Maximum PC-relative displacement. +// The actual limit is ±2²⁰, but we are conservative +// to avoid needing to recompute the literal pool flush points +// as span-dependent jumps are enlarged. +const maxPCDisp = 512 * 1024 + +// ispcdisp reports whether v is a valid PC-relative displacement. +func ispcdisp(v int32) bool { + return -maxPCDisp < v && v < maxPCDisp && v&3 == 0 +} + +func isaddcon(v int64) bool { + /* uimm12 or uimm24? */ + if v < 0 { + return false + } + if (v & 0xFFF) == 0 { + v >>= 12 + } + return v <= 0xFFF +} + +func isaddcon2(v int64) bool { + return 0 <= v && v <= 0xFFFFFF +} + +// isbitcon reports whether a constant can be encoded into a logical instruction. +// bitcon has a binary form of repetition of a bit sequence of length 2, 4, 8, 16, 32, or 64, +// which itself is a rotate (w.r.t. the length of the unit) of a sequence of ones. +// special cases: 0 and -1 are not bitcon. +// this function needs to run against virtually all the constants, so it needs to be fast. +// for this reason, bitcon testing and bitcon encoding are separate functions. +func isbitcon(x uint64) bool { + if x == 1<<64-1 || x == 0 { + return false + } + // determine the period and sign-extend a unit to 64 bits + switch { + case x != x>>32|x<<32: + // period is 64 + // nothing to do + case x != x>>16|x<<48: + // period is 32 + x = uint64(int64(int32(x))) + case x != x>>8|x<<56: + // period is 16 + x = uint64(int64(int16(x))) + case x != x>>4|x<<60: + // period is 8 + x = uint64(int64(int8(x))) + default: + // period is 4 or 2, always true + // 0001, 0010, 0100, 1000 -- 0001 rotate + // 0011, 0110, 1100, 1001 -- 0011 rotate + // 0111, 1011, 1101, 1110 -- 0111 rotate + // 0101, 1010 -- 01 rotate, repeat + return true + } + return sequenceOfOnes(x) || sequenceOfOnes(^x) +} + +// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros. +func sequenceOfOnes(x uint64) bool { + y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2 + y += x + return (y-1)&y == 0 +} + +// bitconEncode returns the encoding of a bitcon used in logical instructions +// x is known to be a bitcon +// a bitcon is a sequence of n ones at low bits (i.e. 1<>32|x<<32: + period = 64 + case x != x>>16|x<<48: + period = 32 + x = uint64(int64(int32(x))) + case x != x>>8|x<<56: + period = 16 + x = uint64(int64(int16(x))) + case x != x>>4|x<<60: + period = 8 + x = uint64(int64(int8(x))) + case x != x>>2|x<<62: + period = 4 + x = uint64(int64(x<<60) >> 60) + default: + period = 2 + x = uint64(int64(x<<62) >> 62) + } + neg := false + if int64(x) < 0 { + x = ^x + neg = true + } + y := x & -x // lowest set bit of x. + s := log2(y) + n := log2(x+y) - s // x (or ^x) is a sequence of n ones left shifted by s bits + if neg { + // ^x is a sequence of n ones left shifted by s bits + // adjust n, s for x + s = n + s + n = period - n + } + + N := uint32(0) + if mode == 64 && period == 64 { + N = 1 + } + R := (period - s) & (period - 1) & uint32(mode-1) // shift amount of right rotate + S := (n - 1) | 63&^(period<<1-1) // low bits = #ones - 1, high bits encodes period + return N<<22 | R<<16 | S<<10 +} + +func log2(x uint64) uint32 { + if x == 0 { + panic("log2 of 0") + } + n := uint32(0) + if x >= 1<<32 { + x >>= 32 + n += 32 + } + if x >= 1<<16 { + x >>= 16 + n += 16 + } + if x >= 1<<8 { + x >>= 8 + n += 8 + } + if x >= 1<<4 { + x >>= 4 + n += 4 + } + if x >= 1<<2 { + x >>= 2 + n += 2 + } + if x >= 1<<1 { + x >>= 1 + n += 1 + } + return n +} + +func autoclass(l int64) int { + if l == 0 { + return C_ZAUTO + } + + if l < 0 { + if l >= -256 && (l&15) == 0 { + return C_NSAUTO_16 + } + if l >= -256 && (l&7) == 0 { + return C_NSAUTO_8 + } + if l >= -256 && (l&3) == 0 { + return C_NSAUTO_4 + } + if l >= -256 { + return C_NSAUTO + } + if l >= -512 && (l&15) == 0 { + return C_NPAUTO_16 + } + if l >= -512 && (l&7) == 0 { + return C_NPAUTO + } + if l >= -1024 && (l&15) == 0 { + return C_NQAUTO_16 + } + if l >= -4095 { + return C_NAUTO4K + } + return C_LAUTO + } + + if l <= 255 { + if (l & 15) == 0 { + return C_PSAUTO_16 + } + if (l & 7) == 0 { + return C_PSAUTO_8 + } + if (l & 3) == 0 { + return C_PSAUTO_4 + } + return C_PSAUTO + } + if l <= 504 { + if l&15 == 0 { + return C_PPAUTO_16 + } + if l&7 == 0 { + return C_PPAUTO + } + } + if l <= 1008 { + if l&15 == 0 { + return C_PQAUTO_16 + } + } + if l <= 4095 { + if l&15 == 0 { + return C_UAUTO4K_16 + } + if l&7 == 0 { + return C_UAUTO4K_8 + } + if l&3 == 0 { + return C_UAUTO4K_4 + } + if l&1 == 0 { + return C_UAUTO4K_2 + } + return C_UAUTO4K + } + if l <= 8190 { + if l&15 == 0 { + return C_UAUTO8K_16 + } + if l&7 == 0 { + return C_UAUTO8K_8 + } + if l&3 == 0 { + return C_UAUTO8K_4 + } + if l&1 == 0 { + return C_UAUTO8K + } + } + if l <= 16380 { + if l&15 == 0 { + return C_UAUTO16K_16 + } + if l&7 == 0 { + return C_UAUTO16K_8 + } + if l&3 == 0 { + return C_UAUTO16K + } + } + if l <= 32760 { + if l&15 == 0 { + return C_UAUTO32K_16 + } + if l&7 == 0 { + return C_UAUTO32K + } + } + if l <= 65520 && (l&15) == 0 { + return C_UAUTO64K + } + return C_LAUTO +} + +func oregclass(l int64) int { + return autoclass(l) - C_ZAUTO + C_ZOREG +} + +/* + * given an offset v and a class c (see above) + * return the offset value to use in the instruction, + * scaled if necessary + */ +func (c *ctxt7) offsetshift(p *obj.Prog, v int64, cls int) int64 { + s := 0 + if cls >= C_SEXT1 && cls <= C_SEXT16 { + s = cls - C_SEXT1 + } else { + switch cls { + case C_UAUTO4K, C_UOREG4K, C_ZOREG: + s = 0 + case C_UAUTO8K, C_UOREG8K: + s = 1 + case C_UAUTO16K, C_UOREG16K: + s = 2 + case C_UAUTO32K, C_UOREG32K: + s = 3 + case C_UAUTO64K, C_UOREG64K: + s = 4 + default: + c.ctxt.Diag("bad class: %v\n%v", DRconv(cls), p) + } + } + vs := v >> uint(s) + if vs<= REG_ARNG && r < REG_ELEM: + return C_ARNG + case r >= REG_ELEM && r < REG_ELEM_END: + return C_ELEM + case r >= REG_UXTB && r < REG_SPECIAL, + r >= REG_LSL && r < REG_ARNG: + return C_EXTREG + case r >= REG_SPECIAL: + return C_SPR + } + return C_GOK +} + +// con32class reclassifies the constant of 32-bit instruction. Because the constant type is 32-bit, +// but saved in Offset which type is int64, con32class treats it as uint32 type and reclassifies it. +func (c *ctxt7) con32class(a *obj.Addr) int { + v := uint32(a.Offset) + // For 32-bit instruction with constant, rewrite + // the high 32-bit to be a repetition of the low + // 32-bit, so that the BITCON test can be shared + // for both 32-bit and 64-bit. 32-bit ops will + // zero the high 32-bit of the destination register + // anyway. + vbitcon := uint64(v)<<32 | uint64(v) + if v == 0 { + return C_ZCON + } + if isaddcon(int64(v)) { + if v <= 0xFFF { + if isbitcon(vbitcon) { + return C_ABCON0 + } + return C_ADDCON0 + } + if isbitcon(vbitcon) { + return C_ABCON + } + if movcon(int64(v)) >= 0 { + return C_AMCON + } + if movcon(int64(^v)) >= 0 { + return C_AMCON + } + return C_ADDCON + } + + t := movcon(int64(v)) + if t >= 0 { + if isbitcon(vbitcon) { + return C_MBCON + } + return C_MOVCON + } + + t = movcon(int64(^v)) + if t >= 0 { + if isbitcon(vbitcon) { + return C_MBCON + } + return C_MOVCON + } + + if isbitcon(vbitcon) { + return C_BITCON + } + + if 0 <= v && v <= 0xffffff { + return C_ADDCON2 + } + return C_LCON +} + +// con64class reclassifies the constant of C_VCON and C_LCON class. +func (c *ctxt7) con64class(a *obj.Addr) int { + zeroCount := 0 + negCount := 0 + for i := uint(0); i < 4; i++ { + immh := uint32(a.Offset >> (i * 16) & 0xffff) + if immh == 0 { + zeroCount++ + } else if immh == 0xffff { + negCount++ + } + } + if zeroCount >= 3 || negCount >= 3 { + return C_MOVCON + } else if zeroCount == 2 || negCount == 2 { + return C_MOVCON2 + } else if zeroCount == 1 || negCount == 1 { + return C_MOVCON3 + } else { + return C_VCON + } +} + +func (c *ctxt7) aclass(a *obj.Addr) int { + switch a.Type { + case obj.TYPE_NONE: + return C_NONE + + case obj.TYPE_REG: + return rclass(a.Reg) + + case obj.TYPE_REGREG: + return C_PAIR + + case obj.TYPE_SHIFT: + return C_SHIFT + + case obj.TYPE_REGLIST: + return C_LIST + + case obj.TYPE_MEM: + // The base register should be an integer register. + if int16(REG_F0) <= a.Reg && a.Reg <= int16(REG_V31) { + break + } + switch a.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + if a.Sym == nil { + break + } + c.instoffset = a.Offset + if a.Sym != nil { // use relocation + if a.Sym.Type == objabi.STLSBSS { + if c.ctxt.Flag_shared { + return C_TLS_IE + } else { + return C_TLS_LE + } + } + return C_ADDR + } + return C_LEXT + + case obj.NAME_GOTREF: + return C_GOTADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) + return autoclass(c.instoffset) + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + 8 + return autoclass(c.instoffset) + + case obj.NAME_NONE: + if a.Index != 0 { + if a.Offset != 0 { + if isRegShiftOrExt(a) { + // extended or shifted register offset, (Rn)(Rm.UXTW<<2) or (Rn)(Rm<<2). + return C_ROFF + } + return C_GOK + } + // register offset, (Rn)(Rm) + return C_ROFF + } + c.instoffset = a.Offset + return oregclass(c.instoffset) + } + return C_GOK + + case obj.TYPE_FCONST: + return C_FCON + + case obj.TYPE_TEXTSIZE: + return C_TEXTSIZE + + case obj.TYPE_CONST, obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_NONE: + c.instoffset = a.Offset + if a.Reg != 0 && a.Reg != REGZERO { + break + } + v := c.instoffset + if v == 0 { + return C_ZCON + } + if isaddcon(v) { + if v <= 0xFFF { + if isbitcon(uint64(v)) { + return C_ABCON0 + } + return C_ADDCON0 + } + if isbitcon(uint64(v)) { + return C_ABCON + } + if movcon(v) >= 0 { + return C_AMCON + } + if movcon(^v) >= 0 { + return C_AMCON + } + return C_ADDCON + } + + t := movcon(v) + if t >= 0 { + if isbitcon(uint64(v)) { + return C_MBCON + } + return C_MOVCON + } + + t = movcon(^v) + if t >= 0 { + if isbitcon(uint64(v)) { + return C_MBCON + } + return C_MOVCON + } + + if isbitcon(uint64(v)) { + return C_BITCON + } + + if 0 <= v && v <= 0xffffff { + return C_ADDCON2 + } + + if uint64(v) == uint64(uint32(v)) || v == int64(int32(v)) { + return C_LCON + } + return C_VCON + + case obj.NAME_EXTERN, obj.NAME_STATIC: + if a.Sym == nil { + return C_GOK + } + if a.Sym.Type == objabi.STLSBSS { + c.ctxt.Diag("taking address of TLS variable is not supported") + } + c.instoffset = a.Offset + return C_VCONADDR + + case obj.NAME_AUTO: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-SP. + a.Reg = obj.REG_NONE + } + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) + + case obj.NAME_PARAM: + if a.Reg == REGSP { + // unset base register for better printing, since + // a.Offset is still relative to pseudo-FP. + a.Reg = obj.REG_NONE + } + c.instoffset = int64(c.autosize) + a.Offset + 8 + default: + return C_GOK + } + cf := c.instoffset + if isaddcon(cf) || isaddcon(-cf) { + return C_AACON + } + if isaddcon2(cf) { + return C_AACON2 + } + + return C_LACON + + case obj.TYPE_BRANCH: + return C_SBRA + + case obj.TYPE_SPECIAL: + opd := SpecialOperand(a.Offset) + if SPOP_EQ <= opd && opd <= SPOP_NV { + return C_COND + } + return C_SPOP + } + return C_GOK +} + +func oclass(a *obj.Addr) int { + return int(a.Class) - 1 +} + +func (c *ctxt7) oplook(p *obj.Prog) *Optab { + a1 := int(p.Optab) + if a1 != 0 { + return &optab[a1-1] + } + a1 = int(p.From.Class) + if a1 == 0 { + a0 := c.aclass(&p.From) + // do not break C_ADDCON2 when S bit is set + if (p.As == AADDS || p.As == AADDSW || p.As == ASUBS || p.As == ASUBSW) && a0 == C_ADDCON2 { + a0 = C_LCON + } + a1 = a0 + 1 + p.From.Class = int8(a1) + if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE { + if p.As == AMOVW || isADDWop(p.As) || isANDWop(p.As) { + // For 32-bit instruction with constant, we need to + // treat its offset value as 32 bits to classify it. + ra0 := c.con32class(&p.From) + // do not break C_ADDCON2 when S bit is set + if (p.As == AADDSW || p.As == ASUBSW) && ra0 == C_ADDCON2 { + ra0 = C_LCON + } + a1 = ra0 + 1 + p.From.Class = int8(a1) + } + if ((p.As == AMOVD) || isANDop(p.As) || isADDop(p.As)) && (a0 == C_LCON || a0 == C_VCON) { + // more specific classification of 64-bit integers + a1 = c.con64class(&p.From) + 1 + p.From.Class = int8(a1) + } + } + } + + a1-- + a3 := C_NONE + 1 + if p.GetFrom3() != nil && p.RestArgs[0].Pos == 0 { + a3 = int(p.GetFrom3().Class) + if a3 == 0 { + a3 = c.aclass(p.GetFrom3()) + 1 + p.GetFrom3().Class = int8(a3) + } + } + + a3-- + a4 := int(p.To.Class) + if a4 == 0 { + a4 = c.aclass(&p.To) + 1 + p.To.Class = int8(a4) + } + + a4-- + a2 := C_NONE + if p.Reg != 0 { + a2 = rclass(p.Reg) + } + + if false { + fmt.Printf("oplook %v %d %d %d %d\n", p.As, a1, a2, a3, a4) + fmt.Printf("\t\t%d %d\n", p.From.Type, p.To.Type) + } + + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] + c2 := &xcmp[a2] + c3 := &xcmp[a3] + c4 := &xcmp[a4] + c5 := &xcmp[p.Scond>>5] + for i := range ops { + op := &ops[i] + if (int(op.a2) == a2 || c2[op.a2]) && c5[op.scond>>5] && c1[op.a1] && c3[op.a3] && c4[op.a4] { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } + } + + c.ctxt.Diag("illegal combination: %v %v %v %v %v, %d %d", p, DRconv(a1), DRconv(a2), DRconv(a3), DRconv(a4), p.From.Type, p.To.Type) + // Turn illegal instruction into an UNDEF, avoid crashing in asmout + return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0} +} + +func cmp(a int, b int) bool { + if a == b { + return true + } + switch a { + case C_RSP: + if b == C_REG { + return true + } + + case C_ZREG: + if b == C_REG { + return true + } + + case C_ADDCON0: + if b == C_ZCON || b == C_ABCON0 { + return true + } + + case C_ADDCON: + if b == C_ZCON || b == C_ABCON0 || b == C_ADDCON0 || b == C_ABCON || b == C_AMCON { + return true + } + + case C_MBCON: + if b == C_ABCON0 { + return true + } + + case C_BITCON: + if b == C_ABCON0 || b == C_ABCON || b == C_MBCON { + return true + } + + case C_MOVCON: + if b == C_MBCON || b == C_ZCON || b == C_ADDCON0 || b == C_ABCON0 || b == C_AMCON { + return true + } + + case C_ADDCON2: + if b == C_ZCON || b == C_ADDCON || b == C_ADDCON0 { + return true + } + + case C_LCON: + if b == C_ZCON || b == C_BITCON || b == C_ADDCON || b == C_ADDCON0 || b == C_ABCON || b == C_ABCON0 || b == C_MBCON || b == C_MOVCON || b == C_ADDCON2 || b == C_AMCON { + return true + } + + case C_MOVCON2: + return cmp(C_LCON, b) + + case C_VCON: + return cmp(C_LCON, b) + + case C_LACON: + if b == C_AACON || b == C_AACON2 { + return true + } + + case C_SEXT2: + if b == C_SEXT1 { + return true + } + + case C_SEXT4: + if b == C_SEXT1 || b == C_SEXT2 { + return true + } + + case C_SEXT8: + if b >= C_SEXT1 && b <= C_SEXT4 { + return true + } + + case C_SEXT16: + if b >= C_SEXT1 && b <= C_SEXT8 { + return true + } + + case C_LEXT: + if b >= C_SEXT1 && b <= C_SEXT16 { + return true + } + + case C_NSAUTO_8: + if b == C_NSAUTO_16 { + return true + } + + case C_NSAUTO_4: + if b == C_NSAUTO_16 || b == C_NSAUTO_8 { + return true + } + + case C_NSAUTO: + switch b { + case C_NSAUTO_4, C_NSAUTO_8, C_NSAUTO_16: + return true + } + + case C_NPAUTO_16: + switch b { + case C_NSAUTO_16: + return true + } + + case C_NPAUTO: + switch b { + case C_NSAUTO_16, C_NSAUTO_8, C_NPAUTO_16: + return true + } + + case C_NQAUTO_16: + switch b { + case C_NSAUTO_16, C_NPAUTO_16: + return true + } + + case C_NAUTO4K: + switch b { + case C_NSAUTO_16, C_NSAUTO_8, C_NSAUTO_4, C_NSAUTO, C_NPAUTO_16, + C_NPAUTO, C_NQAUTO_16: + return true + } + + case C_PSAUTO_16: + if b == C_ZAUTO { + return true + } + + case C_PSAUTO_8: + if b == C_ZAUTO || b == C_PSAUTO_16 { + return true + } + + case C_PSAUTO_4: + switch b { + case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8: + return true + } + + case C_PSAUTO: + switch b { + case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8, C_PSAUTO_4: + return true + } + + case C_PPAUTO_16: + switch b { + case C_ZAUTO, C_PSAUTO_16: + return true + } + + case C_PPAUTO: + switch b { + case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8, C_PPAUTO_16: + return true + } + + case C_PQAUTO_16: + switch b { + case C_ZAUTO, C_PSAUTO_16, C_PPAUTO_16: + return true + } + + case C_UAUTO4K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16: + return true + } + + case C_UAUTO8K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16: + return true + } + + case C_UAUTO16K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16: + return true + } + + case C_UAUTO32K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K_16: + return true + } + + case C_UAUTO64K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO_16, C_PQAUTO_16, C_UAUTO4K_16, C_UAUTO8K_16, C_UAUTO16K_16, + C_UAUTO32K_16: + return true + } + + case C_LAUTO: + switch b { + case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NSAUTO_16, C_NPAUTO_16, C_NPAUTO, C_NQAUTO_16, C_NAUTO4K, + C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16, + C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16, + C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K, C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K, C_UAUTO32K_16, + C_UAUTO64K: + return true + } + + case C_NSOREG_8: + if b == C_NSOREG_16 { + return true + } + + case C_NSOREG_4: + if b == C_NSOREG_8 || b == C_NSOREG_16 { + return true + } + + case C_NSOREG: + switch b { + case C_NSOREG_4, C_NSOREG_8, C_NSOREG_16: + return true + } + + case C_NPOREG_16: + switch b { + case C_NSOREG_16: + return true + } + + case C_NPOREG: + switch b { + case C_NSOREG_16, C_NSOREG_8, C_NPOREG_16: + return true + } + + case C_NQOREG_16: + switch b { + case C_NSOREG_16, C_NPOREG_16: + return true + } + + case C_NOREG4K: + switch b { + case C_NSOREG_16, C_NSOREG_8, C_NSOREG_4, C_NSOREG, C_NPOREG_16, C_NPOREG, C_NQOREG_16: + return true + } + + case C_PSOREG_16: + if b == C_ZOREG { + return true + } + + case C_PSOREG_8: + if b == C_ZOREG || b == C_PSOREG_16 { + return true + } + + case C_PSOREG_4: + switch b { + case C_ZOREG, C_PSOREG_16, C_PSOREG_8: + return true + } + + case C_PSOREG: + switch b { + case C_ZOREG, C_PSOREG_16, C_PSOREG_8, C_PSOREG_4: + return true + } + + case C_PPOREG_16: + switch b { + case C_ZOREG, C_PSOREG_16: + return true + } + + case C_PPOREG: + switch b { + case C_ZOREG, C_PSOREG_16, C_PSOREG_8, C_PPOREG_16: + return true + } + + case C_PQOREG_16: + switch b { + case C_ZOREG, C_PSOREG_16, C_PPOREG_16: + return true + } + + case C_UOREG4K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16: + return true + } + + case C_UOREG8K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16: + return true + } + + case C_UOREG16K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16: + return true + } + + case C_UOREG32K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K_16: + return true + } + + case C_UOREG64K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG_16, C_PQOREG_16, C_UOREG4K_16, C_UOREG8K_16, C_UOREG16K_16, + C_UOREG32K_16: + return true + } + + case C_LOREG: + switch b { + case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NSOREG_16, C_NPOREG, C_NPOREG_16, C_NQOREG_16, C_NOREG4K, + C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16, + C_PPOREG, C_PPOREG_16, C_PQOREG_16, + C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K, C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K, C_UOREG32K_16, + C_UOREG64K: + return true + } + + case C_LBRA: + if b == C_SBRA { + return true + } + } + + return false +} + +type ocmp []Optab + +func (x ocmp) Len() int { + return len(x) +} + +func (x ocmp) Swap(i, j int) { + x[i], x[j] = x[j], x[i] +} + +func (x ocmp) Less(i, j int) bool { + p1 := &x[i] + p2 := &x[j] + if p1.as != p2.as { + return p1.as < p2.as + } + if p1.a1 != p2.a1 { + return p1.a1 < p2.a1 + } + if p1.a2 != p2.a2 { + return p1.a2 < p2.a2 + } + if p1.a3 != p2.a3 { + return p1.a3 < p2.a3 + } + if p1.a4 != p2.a4 { + return p1.a4 < p2.a4 + } + if p1.scond != p2.scond { + return p1.scond < p2.scond + } + return false +} + +func oprangeset(a obj.As, t []Optab) { + oprange[a&obj.AMask] = t +} + +func buildop(ctxt *obj.Link) { + if oprange[AAND&obj.AMask] != nil { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + var n int + for i := 0; i < C_GOK; i++ { + for n = 0; n < C_GOK; n++ { + if cmp(n, i) { + xcmp[i][n] = true + } + } + } + for n = 0; optab[n].as != obj.AXXX; n++ { + } + sort.Sort(ocmp(optab[:n])) + for i := 0; i < n; i++ { + r := optab[i].as + start := i + for optab[i].as == r { + i++ + } + t := optab[start:i] + i-- + oprangeset(r, t) + switch r { + default: + ctxt.Diag("unknown op in build: %v", r) + ctxt.DiagFlush() + log.Fatalf("bad code") + + case AADD: + oprangeset(AADDS, t) + oprangeset(ASUB, t) + oprangeset(ASUBS, t) + oprangeset(AADDW, t) + oprangeset(AADDSW, t) + oprangeset(ASUBW, t) + oprangeset(ASUBSW, t) + + case AAND: /* logical immediate, logical shifted register */ + oprangeset(AANDW, t) + oprangeset(AEOR, t) + oprangeset(AEORW, t) + oprangeset(AORR, t) + oprangeset(AORRW, t) + oprangeset(ABIC, t) + oprangeset(ABICW, t) + oprangeset(AEON, t) + oprangeset(AEONW, t) + oprangeset(AORN, t) + oprangeset(AORNW, t) + + case AANDS: /* logical immediate, logical shifted register, set flags, cannot target RSP */ + oprangeset(AANDSW, t) + oprangeset(ABICS, t) + oprangeset(ABICSW, t) + + case ANEG: + oprangeset(ANEGS, t) + oprangeset(ANEGSW, t) + oprangeset(ANEGW, t) + + case AADC: /* rn=Rd */ + oprangeset(AADCW, t) + + oprangeset(AADCS, t) + oprangeset(AADCSW, t) + oprangeset(ASBC, t) + oprangeset(ASBCW, t) + oprangeset(ASBCS, t) + oprangeset(ASBCSW, t) + + case ANGC: /* rn=REGZERO */ + oprangeset(ANGCW, t) + + oprangeset(ANGCS, t) + oprangeset(ANGCSW, t) + + case ACMP: + oprangeset(ACMPW, t) + oprangeset(ACMN, t) + oprangeset(ACMNW, t) + + case ATST: + oprangeset(ATSTW, t) + + /* register/register, and shifted */ + case AMVN: + oprangeset(AMVNW, t) + + case AMOVK: + oprangeset(AMOVKW, t) + oprangeset(AMOVN, t) + oprangeset(AMOVNW, t) + oprangeset(AMOVZ, t) + oprangeset(AMOVZW, t) + + case ASWPD: + for i := range atomicLDADD { + oprangeset(i, t) + } + for i := range atomicSWP { + if i == ASWPD { + continue + } + oprangeset(i, t) + } + + case ACASPD: + oprangeset(ACASPW, t) + case ABEQ: + oprangeset(ABNE, t) + oprangeset(ABCS, t) + oprangeset(ABHS, t) + oprangeset(ABCC, t) + oprangeset(ABLO, t) + oprangeset(ABMI, t) + oprangeset(ABPL, t) + oprangeset(ABVS, t) + oprangeset(ABVC, t) + oprangeset(ABHI, t) + oprangeset(ABLS, t) + oprangeset(ABGE, t) + oprangeset(ABLT, t) + oprangeset(ABGT, t) + oprangeset(ABLE, t) + + case ALSL: + oprangeset(ALSLW, t) + oprangeset(ALSR, t) + oprangeset(ALSRW, t) + oprangeset(AASR, t) + oprangeset(AASRW, t) + oprangeset(AROR, t) + oprangeset(ARORW, t) + + case ACLS: + oprangeset(ACLSW, t) + oprangeset(ACLZ, t) + oprangeset(ACLZW, t) + oprangeset(ARBIT, t) + oprangeset(ARBITW, t) + oprangeset(AREV, t) + oprangeset(AREVW, t) + oprangeset(AREV16, t) + oprangeset(AREV16W, t) + oprangeset(AREV32, t) + + case ASDIV: + oprangeset(ASDIVW, t) + oprangeset(AUDIV, t) + oprangeset(AUDIVW, t) + oprangeset(ACRC32B, t) + oprangeset(ACRC32CB, t) + oprangeset(ACRC32CH, t) + oprangeset(ACRC32CW, t) + oprangeset(ACRC32CX, t) + oprangeset(ACRC32H, t) + oprangeset(ACRC32W, t) + oprangeset(ACRC32X, t) + + case AMADD: + oprangeset(AMADDW, t) + oprangeset(AMSUB, t) + oprangeset(AMSUBW, t) + oprangeset(ASMADDL, t) + oprangeset(ASMSUBL, t) + oprangeset(AUMADDL, t) + oprangeset(AUMSUBL, t) + + case AREM: + oprangeset(AREMW, t) + oprangeset(AUREM, t) + oprangeset(AUREMW, t) + + case AMUL: + oprangeset(AMULW, t) + oprangeset(AMNEG, t) + oprangeset(AMNEGW, t) + oprangeset(ASMNEGL, t) + oprangeset(ASMULL, t) + oprangeset(ASMULH, t) + oprangeset(AUMNEGL, t) + oprangeset(AUMULH, t) + oprangeset(AUMULL, t) + + case AMOVB: + oprangeset(AMOVBU, t) + + case AMOVH: + oprangeset(AMOVHU, t) + + case AMOVW: + oprangeset(AMOVWU, t) + + case ABFM: + oprangeset(ABFMW, t) + oprangeset(ASBFM, t) + oprangeset(ASBFMW, t) + oprangeset(AUBFM, t) + oprangeset(AUBFMW, t) + + case ABFI: + oprangeset(ABFIW, t) + oprangeset(ABFXIL, t) + oprangeset(ABFXILW, t) + oprangeset(ASBFIZ, t) + oprangeset(ASBFIZW, t) + oprangeset(ASBFX, t) + oprangeset(ASBFXW, t) + oprangeset(AUBFIZ, t) + oprangeset(AUBFIZW, t) + oprangeset(AUBFX, t) + oprangeset(AUBFXW, t) + + case AEXTR: + oprangeset(AEXTRW, t) + + case ASXTB: + oprangeset(ASXTBW, t) + oprangeset(ASXTH, t) + oprangeset(ASXTHW, t) + oprangeset(ASXTW, t) + oprangeset(AUXTB, t) + oprangeset(AUXTH, t) + oprangeset(AUXTW, t) + oprangeset(AUXTBW, t) + oprangeset(AUXTHW, t) + + case ACCMN: + oprangeset(ACCMNW, t) + oprangeset(ACCMP, t) + oprangeset(ACCMPW, t) + + case ACSEL: + oprangeset(ACSELW, t) + oprangeset(ACSINC, t) + oprangeset(ACSINCW, t) + oprangeset(ACSINV, t) + oprangeset(ACSINVW, t) + oprangeset(ACSNEG, t) + oprangeset(ACSNEGW, t) + + case ACINC: + // aliases Rm=Rn, !cond + oprangeset(ACINCW, t) + oprangeset(ACINV, t) + oprangeset(ACINVW, t) + oprangeset(ACNEG, t) + oprangeset(ACNEGW, t) + + // aliases, Rm=Rn=REGZERO, !cond + case ACSET: + oprangeset(ACSETW, t) + + oprangeset(ACSETM, t) + oprangeset(ACSETMW, t) + + case AMOVD, + AB, + ABL, + AWORD, + ADWORD, + obj.ARET, + obj.ATEXT: + break + + case AFLDPQ: + break + case AFSTPQ: + break + case ALDP: + oprangeset(AFLDPD, t) + + case ASTP: + oprangeset(AFSTPD, t) + + case ASTPW: + oprangeset(AFSTPS, t) + + case ALDPW: + oprangeset(ALDPSW, t) + oprangeset(AFLDPS, t) + + case AERET: + oprangeset(AWFE, t) + oprangeset(AWFI, t) + oprangeset(AYIELD, t) + oprangeset(ASEV, t) + oprangeset(ASEVL, t) + oprangeset(ANOOP, t) + oprangeset(ADRPS, t) + + case ACBZ: + oprangeset(ACBZW, t) + oprangeset(ACBNZ, t) + oprangeset(ACBNZW, t) + + case ATBZ: + oprangeset(ATBNZ, t) + + case AADR, AADRP: + break + + case ACLREX: + break + + case ASVC: + oprangeset(AHVC, t) + oprangeset(AHLT, t) + oprangeset(ASMC, t) + oprangeset(ABRK, t) + oprangeset(ADCPS1, t) + oprangeset(ADCPS2, t) + oprangeset(ADCPS3, t) + + case AFADDS: + oprangeset(AFADDD, t) + oprangeset(AFSUBS, t) + oprangeset(AFSUBD, t) + oprangeset(AFMULS, t) + oprangeset(AFMULD, t) + oprangeset(AFNMULS, t) + oprangeset(AFNMULD, t) + oprangeset(AFDIVS, t) + oprangeset(AFMAXD, t) + oprangeset(AFMAXS, t) + oprangeset(AFMIND, t) + oprangeset(AFMINS, t) + oprangeset(AFMAXNMD, t) + oprangeset(AFMAXNMS, t) + oprangeset(AFMINNMD, t) + oprangeset(AFMINNMS, t) + oprangeset(AFDIVD, t) + + case AFMSUBD: + oprangeset(AFMSUBS, t) + oprangeset(AFMADDS, t) + oprangeset(AFMADDD, t) + oprangeset(AFNMSUBS, t) + oprangeset(AFNMSUBD, t) + oprangeset(AFNMADDS, t) + oprangeset(AFNMADDD, t) + + case AFCVTSD: + oprangeset(AFCVTDS, t) + oprangeset(AFABSD, t) + oprangeset(AFABSS, t) + oprangeset(AFNEGD, t) + oprangeset(AFNEGS, t) + oprangeset(AFSQRTD, t) + oprangeset(AFSQRTS, t) + oprangeset(AFRINTNS, t) + oprangeset(AFRINTND, t) + oprangeset(AFRINTPS, t) + oprangeset(AFRINTPD, t) + oprangeset(AFRINTMS, t) + oprangeset(AFRINTMD, t) + oprangeset(AFRINTZS, t) + oprangeset(AFRINTZD, t) + oprangeset(AFRINTAS, t) + oprangeset(AFRINTAD, t) + oprangeset(AFRINTXS, t) + oprangeset(AFRINTXD, t) + oprangeset(AFRINTIS, t) + oprangeset(AFRINTID, t) + oprangeset(AFCVTDH, t) + oprangeset(AFCVTHS, t) + oprangeset(AFCVTHD, t) + oprangeset(AFCVTSH, t) + + case AFCMPS: + oprangeset(AFCMPD, t) + oprangeset(AFCMPES, t) + oprangeset(AFCMPED, t) + + case AFCCMPS: + oprangeset(AFCCMPD, t) + oprangeset(AFCCMPES, t) + oprangeset(AFCCMPED, t) + + case AFCSELD: + oprangeset(AFCSELS, t) + + case AFMOVQ, AFMOVD, AFMOVS, + AVMOVQ, AVMOVD, AVMOVS: + break + + case AFCVTZSD: + oprangeset(AFCVTZSDW, t) + oprangeset(AFCVTZSS, t) + oprangeset(AFCVTZSSW, t) + oprangeset(AFCVTZUD, t) + oprangeset(AFCVTZUDW, t) + oprangeset(AFCVTZUS, t) + oprangeset(AFCVTZUSW, t) + + case ASCVTFD: + oprangeset(ASCVTFS, t) + oprangeset(ASCVTFWD, t) + oprangeset(ASCVTFWS, t) + oprangeset(AUCVTFD, t) + oprangeset(AUCVTFS, t) + oprangeset(AUCVTFWD, t) + oprangeset(AUCVTFWS, t) + + case ASYS: + oprangeset(AAT, t) + oprangeset(AIC, t) + + case ATLBI: + oprangeset(ADC, t) + + case ASYSL, AHINT: + break + + case ADMB: + oprangeset(ADSB, t) + oprangeset(AISB, t) + + case AMRS, AMSR: + break + + case ALDAR: + oprangeset(ALDARW, t) + oprangeset(ALDARB, t) + oprangeset(ALDARH, t) + fallthrough + + case ALDXR: + oprangeset(ALDXRB, t) + oprangeset(ALDXRH, t) + oprangeset(ALDXRW, t) + + case ALDAXR: + oprangeset(ALDAXRB, t) + oprangeset(ALDAXRH, t) + oprangeset(ALDAXRW, t) + + case ALDXP: + oprangeset(ALDXPW, t) + oprangeset(ALDAXP, t) + oprangeset(ALDAXPW, t) + + case ASTLR: + oprangeset(ASTLRB, t) + oprangeset(ASTLRH, t) + oprangeset(ASTLRW, t) + + case ASTXR: + oprangeset(ASTXRB, t) + oprangeset(ASTXRH, t) + oprangeset(ASTXRW, t) + + case ASTLXR: + oprangeset(ASTLXRB, t) + oprangeset(ASTLXRH, t) + oprangeset(ASTLXRW, t) + + case ASTXP: + oprangeset(ASTLXP, t) + oprangeset(ASTLXPW, t) + oprangeset(ASTXPW, t) + + case AVADDP: + oprangeset(AVAND, t) + oprangeset(AVCMEQ, t) + oprangeset(AVORR, t) + oprangeset(AVEOR, t) + oprangeset(AVBSL, t) + oprangeset(AVBIT, t) + oprangeset(AVCMTST, t) + oprangeset(AVUMAX, t) + oprangeset(AVUMIN, t) + oprangeset(AVUZP1, t) + oprangeset(AVUZP2, t) + oprangeset(AVBIF, t) + + case AVADD: + oprangeset(AVSUB, t) + oprangeset(AVRAX1, t) + + case AAESD: + oprangeset(AAESE, t) + oprangeset(AAESMC, t) + oprangeset(AAESIMC, t) + oprangeset(ASHA1SU1, t) + oprangeset(ASHA256SU0, t) + oprangeset(ASHA512SU0, t) + + case ASHA1C: + oprangeset(ASHA1P, t) + oprangeset(ASHA1M, t) + + case ASHA256H: + oprangeset(ASHA256H2, t) + oprangeset(ASHA512H, t) + oprangeset(ASHA512H2, t) + + case ASHA1SU0: + oprangeset(ASHA256SU1, t) + oprangeset(ASHA512SU1, t) + + case AVADDV: + oprangeset(AVUADDLV, t) + + case AVFMLA: + oprangeset(AVFMLS, t) + + case AVPMULL: + oprangeset(AVPMULL2, t) + + case AVUSHR: + oprangeset(AVSHL, t) + oprangeset(AVSRI, t) + oprangeset(AVSLI, t) + oprangeset(AVUSRA, t) + + case AVREV32: + oprangeset(AVCNT, t) + oprangeset(AVRBIT, t) + oprangeset(AVREV64, t) + oprangeset(AVREV16, t) + + case AVZIP1: + oprangeset(AVZIP2, t) + oprangeset(AVTRN1, t) + oprangeset(AVTRN2, t) + + case AVUXTL: + oprangeset(AVUXTL2, t) + + case AVUSHLL: + oprangeset(AVUSHLL2, t) + + case AVLD1R: + oprangeset(AVLD2, t) + oprangeset(AVLD2R, t) + oprangeset(AVLD3, t) + oprangeset(AVLD3R, t) + oprangeset(AVLD4, t) + oprangeset(AVLD4R, t) + + case AVEOR3: + oprangeset(AVBCAX, t) + + case AVUADDW: + oprangeset(AVUADDW2, t) + + case AVTBL: + oprangeset(AVTBX, t) + + case ASHA1H, + AVCNT, + AVMOV, + AVLD1, + AVST1, + AVST2, + AVST3, + AVST4, + AVDUP, + AVMOVI, + APRFM, + AVEXT, + AVXAR: + break + + case obj.ANOP, + obj.AUNDEF, + obj.AFUNCDATA, + obj.APCALIGN, + obj.APCDATA, + obj.ADUFFZERO, + obj.ADUFFCOPY: + break + } + } +} + +// chipfloat7() checks if the immediate constants available in FMOVS/FMOVD instructions. +// For details of the range of constants available, see +// http://infocenter.arm.com/help/topic/com.arm.doc.dui0473m/dom1359731199385.html. +func (c *ctxt7) chipfloat7(e float64) int { + ei := math.Float64bits(e) + l := uint32(int32(ei)) + h := uint32(int32(ei >> 32)) + + if l != 0 || h&0xffff != 0 { + return -1 + } + h1 := h & 0x7fc00000 + if h1 != 0x40000000 && h1 != 0x3fc00000 { + return -1 + } + n := 0 + + // sign bit (a) + if h&0x80000000 != 0 { + n |= 1 << 7 + } + + // exp sign bit (b) + if h1 == 0x3fc00000 { + n |= 1 << 6 + } + + // rest of exp and mantissa (cd-efgh) + n |= int((h >> 16) & 0x3f) + + //print("match %.8lux %.8lux %d\n", l, h, n); + return n +} + +/* form offset parameter to SYS; special register number */ +func SYSARG5(op0 int, op1 int, Cn int, Cm int, op2 int) int { + return op0<<19 | op1<<16 | Cn<<12 | Cm<<8 | op2<<5 +} + +func SYSARG4(op1 int, Cn int, Cm int, op2 int) int { + return SYSARG5(0, op1, Cn, Cm, op2) +} + +// checkUnpredictable checks if the source and transfer registers are the same register. +// ARM64 manual says it is "constrained unpredictable" if the src and dst registers of STP/LDP are same. +func (c *ctxt7) checkUnpredictable(p *obj.Prog, isload bool, wback bool, rn int16, rt1 int16, rt2 int16) { + if wback && rn != REGSP && (rn == rt1 || rn == rt2) { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + if isload && rt1 == rt2 { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } +} + +/* checkindex checks if index >= 0 && index <= maxindex */ +func (c *ctxt7) checkindex(p *obj.Prog, index, maxindex int) { + if index < 0 || index > maxindex { + c.ctxt.Diag("register element index out of range 0 to %d: %v", maxindex, p) + } +} + +/* checkoffset checks whether the immediate offset is valid for VLD[1-4].P and VST[1-4].P */ +func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) { + var offset, list, n, expect int64 + switch as { + case AVLD1, AVLD2, AVLD3, AVLD4, AVLD1R, AVLD2R, AVLD3R, AVLD4R: + offset = p.From.Offset + list = p.To.Offset + case AVST1, AVST2, AVST3, AVST4: + offset = p.To.Offset + list = p.From.Offset + default: + c.ctxt.Diag("invalid operation on op %v", p.As) + } + opcode := (list >> 12) & 15 + q := (list >> 30) & 1 + size := (list >> 10) & 3 + if offset == 0 { + return + } + switch opcode { + case 0x7: + n = 1 // one register + case 0xa: + n = 2 // two registers + case 0x6: + n = 3 // three registers + case 0x2: + n = 4 // four registers + default: + c.ctxt.Diag("invalid register numbers in ARM64 register list: %v", p) + } + + switch as { + case AVLD1R, AVLD2R, AVLD3R, AVLD4R: + if offset != n*(1<> 5) & 7 + switch p.As { + case AMOVB, AMOVBU: + if amount != 0 { + c.ctxt.Diag("invalid index shift amount: %v", p) + } + case AMOVH, AMOVHU: + if amount != 1 && amount != 0 { + c.ctxt.Diag("invalid index shift amount: %v", p) + } + case AMOVW, AMOVWU, AFMOVS: + if amount != 2 && amount != 0 { + c.ctxt.Diag("invalid index shift amount: %v", p) + } + case AMOVD, AFMOVD: + if amount != 3 && amount != 0 { + c.ctxt.Diag("invalid index shift amount: %v", p) + } + default: + panic("invalid operation") + } +} + +func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { + var os [5]uint32 + o1 := uint32(0) + o2 := uint32(0) + o3 := uint32(0) + o4 := uint32(0) + o5 := uint32(0) + if false { /*debug['P']*/ + fmt.Printf("%x: %v\ttype %d\n", uint32(p.Pc), p, o.type_) + } + switch o.type_ { + default: + c.ctxt.Diag("%v: unknown asm %d", p, o.type_) + + case 0: /* pseudo ops */ + break + + case 1: /* op Rm,[Rn],Rd; default Rn=Rd -> op Rm<<0,[Rn,]Rd (shifted register) */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + if r == 0 { + r = rt + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 2: /* add/sub $(uimm12|uimm24)[,R],R; cmp $(uimm12|uimm24),R */ + if p.To.Reg == REG_RSP && isADDSop(p.As) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 = c.opirr(p, p.As) + + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + if (o1 & Sbit) == 0 { + c.ctxt.Diag("ineffective ZR destination\n%v", p) + } + rt = REGZERO + } + + r := int(p.Reg) + if r == 0 { + r = rt + } + v := int32(c.regoff(&p.From)) + o1 = c.oaddi(p, int32(o1), v, r, rt) + + case 3: /* op R<> 10) & 63 + is64bit := o1 & (1 << 31) + if is64bit == 0 && amount >= 32 { + c.ctxt.Diag("shift amount out of range 0 to 31: %v", p) + } + shift := (p.From.Offset >> 22) & 3 + if (shift > 2 || shift < 0) && (isADDop(p.As) || isADDWop(p.As) || isNEGop(p.As)) { + c.ctxt.Diag("unsupported shift operator: %v", p) + } + o1 |= uint32(p.From.Offset) /* includes reg, op, etc */ + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if p.As == AMVN || p.As == AMVNW || isNEGop(p.As) { + r = REGZERO + } else if r == 0 { + r = rt + } + o1 |= (uint32(r&31) << 5) | uint32(rt&31) + + case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R; mov $addcon2, R */ + rt := int(p.To.Reg) + r := int(o.param) + + if r == 0 { + r = REGZERO + } else if r == REGFROM { + r = int(p.From.Reg) + } + if r == 0 { + r = REGSP + } + + v := int32(c.regoff(&p.From)) + var op int32 + if v < 0 { + v = -v + op = int32(c.opirr(p, ASUB)) + } else { + op = int32(c.opirr(p, AADD)) + } + + if int(o.size(c.ctxt, p)) == 8 { + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + o1 = c.oaddi(p, op, v&0xfff000, r, rt) + o2 = c.oaddi(p, op, v&0x000fff, rt, rt) + break + } + + o1 = c.oaddi(p, op, v, r, rt) + + case 5: /* b s; bl s */ + o1 = c.opbra(p, p.As) + + if p.To.Sym == nil { + o1 |= uint32(c.brdist(p, 0, 26, 2)) + break + } + + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_CALLARM64 + + case 6: /* b ,O(R); bl ,O(R) */ + o1 = c.opbrr(p, p.As) + o1 |= uint32(p.To.Reg&31) << 5 + if p.As == obj.ACALL { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 0 + rel.Type = objabi.R_CALLIND + } + + case 7: /* beq s */ + o1 = c.opbra(p, p.As) + + o1 |= uint32(c.brdist(p, 0, 19, 2) << 5) + + case 8: /* lsl $c,[R],R -> ubfm $(W-1)-c,$(-c MOD (W-1)),Rn,Rd */ + rt := int(p.To.Reg) + + rf := int(p.Reg) + if rf == 0 { + rf = rt + } + v := int32(p.From.Offset) + switch p.As { + case AASR: + o1 = c.opbfm(p, ASBFM, int(v), 63, rf, rt) + + case AASRW: + o1 = c.opbfm(p, ASBFMW, int(v), 31, rf, rt) + + case ALSL: + o1 = c.opbfm(p, AUBFM, int((64-v)&63), int(63-v), rf, rt) + + case ALSLW: + o1 = c.opbfm(p, AUBFMW, int((32-v)&31), int(31-v), rf, rt) + + case ALSR: + o1 = c.opbfm(p, AUBFM, int(v), 63, rf, rt) + + case ALSRW: + o1 = c.opbfm(p, AUBFMW, int(v), 31, rf, rt) + + case AROR: + o1 = c.opextr(p, AEXTR, v, rf, rf, rt) + + case ARORW: + o1 = c.opextr(p, AEXTRW, v, rf, rf, rt) + + default: + c.ctxt.Diag("bad shift $con\n%v", p) + break + } + + case 9: /* lsl Rm,[Rn],Rd -> lslv Rm, Rn, Rd */ + o1 = c.oprrr(p, p.As) + + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 |= (uint32(p.From.Reg&31) << 16) | (uint32(r&31) << 5) | uint32(p.To.Reg&31) + + case 10: /* brk/hvc/.../svc [$con] */ + o1 = c.opimm(p, p.As) + + if p.From.Type != obj.TYPE_NONE { + o1 |= uint32((p.From.Offset & 0xffff) << 5) + } + + case 11: /* dword */ + c.aclass(&p.To) + + o1 = uint32(c.instoffset) + o2 = uint32(c.instoffset >> 32) + if p.To.Sym != nil { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDR + o2 = 0 + o1 = o2 + } + + case 12: /* movT $vcon, reg */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + num := c.omovlconst(p.As, p, &p.From, int(p.To.Reg), os[:]) + if num == 0 { + c.ctxt.Diag("invalid constant: %v", p) + } + o1 = os[0] + o2 = os[1] + o3 = os[2] + o4 = os[3] + + case 13: /* addop $vcon, [R], R (64 bit literal); cmp $lcon,R -> addop $lcon,R, ZR */ + if p.Reg == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v\n", p) + } + if p.To.Reg == REG_RSP && isADDSop(p.As) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o := uint32(0) + num := uint8(0) + cls := oclass(&p.From) + if isADDWop(p.As) { + if !cmp(C_LCON, cls) { + c.ctxt.Diag("illegal combination: %v", p) + } + num = c.omovlconst(AMOVW, p, &p.From, REGTMP, os[:]) + } else { + num = c.omovlconst(AMOVD, p, &p.From, REGTMP, os[:]) + } + if num == 0 { + c.ctxt.Diag("invalid constant: %v", p) + } + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + if p.To.Type != obj.TYPE_NONE && (p.To.Reg == REGSP || r == REGSP) { + o = c.opxrrr(p, p.As, false) + o |= REGTMP & 31 << 16 + o |= LSL0_64 + } else { + o = c.oprrr(p, p.As) + o |= REGTMP & 31 << 16 /* shift is 0 */ + } + + o |= uint32(r&31) << 5 + o |= uint32(rt & 31) + + os[num] = o + o1 = os[0] + o2 = os[1] + o3 = os[2] + o4 = os[3] + o5 = os[4] + + case 14: /* word */ + if c.aclass(&p.To) == C_ADDR { + c.ctxt.Diag("address constant needs DWORD\n%v", p) + } + o1 = uint32(c.instoffset) + if p.To.Sym != nil { + // This case happens with words generated + // in the PC stream as part of the literal pool. + rel := obj.Addrel(c.cursym) + + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDR + o1 = 0 + } + + case 15: /* mul/mneg/umulh/umull r,[r,]r; madd/msub/fmadd/fmsub/fnmadd/fnmsub Rm,Ra,Rn,Rd */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + var r int + var ra int + if p.From3Type() == obj.TYPE_REG { + r = int(p.GetFrom3().Reg) + ra = int(p.Reg) + if ra == 0 { + ra = REGZERO + } + } else { + r = int(p.Reg) + if r == 0 { + r = rt + } + ra = REGZERO + } + + o1 |= (uint32(rf&31) << 16) | (uint32(ra&31) << 10) | (uint32(r&31) << 5) | uint32(rt&31) + + case 16: /* XremY R[,R],R -> XdivY; XmsubY */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | REGTMP&31 + o2 = c.oprrr(p, AMSUBW) + o2 |= o1 & (1 << 31) /* same size */ + o2 |= (uint32(rf&31) << 16) | (uint32(r&31) << 10) | (REGTMP & 31 << 5) | uint32(rt&31) + + case 17: /* op Rm,[Rn],Rd; default Rn=ZR */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + if r == 0 { + r = REGZERO + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 18: /* csel cond,Rn,Rm,Rd; cinc/cinv/cneg cond,Rn,Rd; cset cond,Rd */ + o1 = c.oprrr(p, p.As) + + cond := SpecialOperand(p.From.Offset) + if cond < SPOP_EQ || cond > SPOP_NV || (cond == SPOP_AL || cond == SPOP_NV) && p.From3Type() == obj.TYPE_NONE { + c.ctxt.Diag("invalid condition: %v", p) + } else { + cond -= SPOP_EQ + } + + r := int(p.Reg) + var rf int = r + if p.From3Type() == obj.TYPE_NONE { + /* CINC/CINV/CNEG or CSET/CSETM*/ + if r == 0 { + /* CSET/CSETM */ + rf = REGZERO + r = rf + } + cond ^= 1 + } else { + rf = int(p.GetFrom3().Reg) /* CSEL */ + } + + rt := int(p.To.Reg) + o1 |= (uint32(rf&31) << 16) | (uint32(cond&15) << 12) | (uint32(r&31) << 5) | uint32(rt&31) + + case 19: /* CCMN cond, (Rm|uimm5),Rn, uimm4 -> ccmn Rn,Rm,uimm4,cond */ + nzcv := int(p.To.Offset) + + cond := SpecialOperand(p.From.Offset) + if cond < SPOP_EQ || cond > SPOP_NV { + c.ctxt.Diag("invalid condition\n%v", p) + } else { + cond -= SPOP_EQ + } + var rf int + if p.GetFrom3().Type == obj.TYPE_REG { + o1 = c.oprrr(p, p.As) + rf = int(p.GetFrom3().Reg) /* Rm */ + } else { + o1 = c.opirr(p, p.As) + rf = int(p.GetFrom3().Offset & 0x1F) + } + + o1 |= (uint32(rf&31) << 16) | (uint32(cond&15) << 12) | (uint32(p.Reg&31) << 5) | uint32(nzcv) + + case 20: /* movT R,O(R) -> strT */ + v := int32(c.regoff(&p.To)) + sz := int32(1 << uint(movesize(p.As))) + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + if v < 0 || v%sz != 0 { /* unscaled 9-bit signed */ + o1 = c.olsr9s(p, int32(c.opstr(p, p.As)), v, r, int(p.From.Reg)) + } else { + v = int32(c.offsetshift(p, int64(v), int(o.a4))) + o1 = c.olsr12u(p, int32(c.opstr(p, p.As)), v, r, int(p.From.Reg)) + } + + case 21: /* movT O(R),R -> ldrT */ + v := int32(c.regoff(&p.From)) + sz := int32(1 << uint(movesize(p.As))) + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + if v < 0 || v%sz != 0 { /* unscaled 9-bit signed */ + o1 = c.olsr9s(p, int32(c.opldr(p, p.As)), v, r, int(p.To.Reg)) + } else { + v = int32(c.offsetshift(p, int64(v), int(o.a1))) + //print("offset=%lld v=%ld a1=%d\n", instoffset, v, o->a1); + o1 = c.olsr12u(p, int32(c.opldr(p, p.As)), v, r, int(p.To.Reg)) + } + + case 22: /* movT (R)O!,R; movT O(R)!, R -> ldrT */ + if p.From.Reg != REGSP && p.From.Reg == p.To.Reg { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + + v := int32(p.From.Offset) + + if v < -256 || v > 255 { + c.ctxt.Diag("offset out of range [-256,255]: %v", p) + } + o1 = c.opldr(p, p.As) + if o.scond == C_XPOST { + o1 |= 1 << 10 + } else { + o1 |= 3 << 10 + } + o1 |= ((uint32(v) & 0x1FF) << 12) | (uint32(p.From.Reg&31) << 5) | uint32(p.To.Reg&31) + + case 23: /* movT R,(R)O!; movT O(R)!, R -> strT */ + if p.To.Reg != REGSP && p.From.Reg == p.To.Reg { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + + v := int32(p.To.Offset) + + if v < -256 || v > 255 { + c.ctxt.Diag("offset out of range [-256,255]: %v", p) + } + o1 = c.opstr(p, p.As) + if o.scond == C_XPOST { + o1 |= 1 << 10 + } else { + o1 |= 3 << 10 + } + o1 |= ((uint32(v) & 0x1FF) << 12) | (uint32(p.To.Reg&31) << 5) | uint32(p.From.Reg&31) + + case 24: /* mov/mvn Rs,Rd -> add $0,Rs,Rd or orr Rs,ZR,Rd */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + s := rf == REGSP || rt == REGSP + if p.As == AMVN || p.As == AMVNW { + if s { + c.ctxt.Diag("illegal SP reference\n%v", p) + } + o1 = c.oprrr(p, p.As) + o1 |= (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31) + } else if s { + o1 = c.opirr(p, p.As) + o1 |= (uint32(rf&31) << 5) | uint32(rt&31) + } else { + o1 = c.oprrr(p, p.As) + o1 |= (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31) + } + + case 25: /* negX Rs, Rd -> subX Rs<<0, ZR, Rd */ + o1 = c.oprrr(p, p.As) + + rf := int(p.From.Reg) + if rf == C_NONE { + rf = int(p.To.Reg) + } + rt := int(p.To.Reg) + o1 |= (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31) + + case 27: /* op Rm<= REG_LSL && p.From.Reg < REG_ARNG) { + amount := (p.From.Reg >> 5) & 7 + if amount > 4 { + c.ctxt.Diag("shift amount out of range 0 to 4: %v", p) + } + o1 = c.opxrrr(p, p.As, true) + o1 |= c.encRegShiftOrExt(p, &p.From, p.From.Reg) /* includes reg, op, etc */ + } else { + o1 = c.opxrrr(p, p.As, false) + o1 |= uint32(p.From.Reg&31) << 16 + } + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 |= (uint32(r&31) << 5) | uint32(rt&31) + + case 28: /* logop $vcon, [R], R (64 bit literal) */ + if p.Reg == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v\n", p) + } + o := uint32(0) + num := uint8(0) + cls := oclass(&p.From) + if isANDWop(p.As) { + if !cmp(C_LCON, cls) { + c.ctxt.Diag("illegal combination: %v", p) + } + num = c.omovlconst(AMOVW, p, &p.From, REGTMP, os[:]) + } else { + num = c.omovlconst(AMOVD, p, &p.From, REGTMP, os[:]) + } + + if num == 0 { + c.ctxt.Diag("invalid constant: %v", p) + } + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + o = c.oprrr(p, p.As) + o |= REGTMP & 31 << 16 /* shift is 0 */ + o |= uint32(r&31) << 5 + o |= uint32(rt & 31) + + os[num] = o + o1 = os[0] + o2 = os[1] + o3 = os[2] + o4 = os[3] + o5 = os[4] + + case 29: /* op Rn, Rd */ + fc := c.aclass(&p.From) + tc := c.aclass(&p.To) + if (p.As == AFMOVD || p.As == AFMOVS) && (fc == C_REG || fc == C_ZREG || tc == C_REG) { + // FMOV Rx, Fy or FMOV Fy, Rx + o1 = FPCVTI(0, 0, 0, 0, 6) + if p.As == AFMOVD { + o1 |= 1<<31 | 1<<22 // 64-bit + } + if fc == C_REG || fc == C_ZREG { + o1 |= 1 << 16 // FMOV Rx, Fy + } + } else { + o1 = c.oprrr(p, p.As) + } + o1 |= uint32(p.From.Reg&31)<<5 | uint32(p.To.Reg&31) + + case 30: /* movT R,L(R) -> strT */ + // if offset L can be split into hi+lo, and both fit into instructions, do + // add $hi, R, Rtmp + // str R, lo(Rtmp) + // otherwise, use constant pool + // mov $L, Rtmp (from constant pool) + // str R, (R+Rtmp) + s := movesize(o.as) + if s < 0 { + c.ctxt.Diag("unexpected long move, op %v tab %v\n%v", p.As, o.as, p) + } + + r := int(p.To.Reg) + if r == 0 { + r = int(o.param) + } + + v := int32(c.regoff(&p.To)) + var hi int32 + if v < 0 || (v&((1<>uint(s))&0xFFF, REGTMP, int(p.From.Reg)) + break + + storeusepool: + if r == REGTMP || p.From.Reg == REGTMP { + c.ctxt.Diag("REGTMP used in large offset store: %v", p) + } + o1 = c.omovlit(AMOVD, p, &p.To, REGTMP) + o2 = c.olsxrr(p, int32(c.opstrr(p, p.As, false)), int(p.From.Reg), r, REGTMP) + + case 31: /* movT L(R), R -> ldrT */ + // if offset L can be split into hi+lo, and both fit into instructions, do + // add $hi, R, Rtmp + // ldr lo(Rtmp), R + // otherwise, use constant pool + // mov $L, Rtmp (from constant pool) + // ldr (R+Rtmp), R + s := movesize(o.as) + if s < 0 { + c.ctxt.Diag("unexpected long move, op %v tab %v\n%v", p.As, o.as, p) + } + + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + + v := int32(c.regoff(&p.From)) + var hi int32 + if v < 0 || (v&((1<>uint(s))&0xFFF, REGTMP, int(p.To.Reg)) + break + + loadusepool: + if r == REGTMP || p.From.Reg == REGTMP { + c.ctxt.Diag("REGTMP used in large offset load: %v", p) + } + o1 = c.omovlit(AMOVD, p, &p.From, REGTMP) + o2 = c.olsxrr(p, int32(c.opldrr(p, p.As, false)), int(p.To.Reg), r, REGTMP) + + case 32: /* mov $con, R -> movz/movn */ + o1 = c.omovconst(p.As, p, &p.From, int(p.To.Reg)) + + case 33: /* movk $uimm16 << pos */ + o1 = c.opirr(p, p.As) + + d := p.From.Offset + if d == 0 { + c.ctxt.Diag("zero shifts cannot be handled correctly: %v", p) + } + s := movcon(d) + if s < 0 || s >= 4 { + c.ctxt.Diag("bad constant for MOVK: %#x\n%v", uint64(d), p) + } + if (o1&S64) == 0 && s >= 2 { + c.ctxt.Diag("illegal bit position\n%v", p) + } + if ((uint64(d) >> uint(s*16)) >> 16) != 0 { + c.ctxt.Diag("requires uimm16\n%v", p) + } + rt := int(p.To.Reg) + + o1 |= uint32((((d >> uint(s*16)) & 0xFFFF) << 5) | int64((uint32(s)&3)<<21) | int64(rt&31)) + + case 34: /* mov $lacon,R */ + o1 = c.omovlit(AMOVD, p, &p.From, REGTMP) + + if o1 == 0 { + break + } + o2 = c.opxrrr(p, AADD, false) + o2 |= REGTMP & 31 << 16 + o2 |= LSL0_64 + r := int(p.From.Reg) + if r == 0 { + r = int(o.param) + } + o2 |= uint32(r&31) << 5 + o2 |= uint32(p.To.Reg & 31) + + case 35: /* mov SPR,R -> mrs */ + o1 = c.oprrr(p, AMRS) + + // SysRegEnc function returns the system register encoding and accessFlags. + _, v, accessFlags := SysRegEnc(p.From.Reg) + if v == 0 { + c.ctxt.Diag("illegal system register:\n%v", p) + } + if (o1 & (v &^ (3 << 19))) != 0 { + c.ctxt.Diag("MRS register value overlap\n%v", p) + } + if accessFlags&SR_READ == 0 { + c.ctxt.Diag("system register is not readable: %v", p) + } + + o1 |= v + o1 |= uint32(p.To.Reg & 31) + + case 36: /* mov R,SPR */ + o1 = c.oprrr(p, AMSR) + + // SysRegEnc function returns the system register encoding and accessFlags. + _, v, accessFlags := SysRegEnc(p.To.Reg) + if v == 0 { + c.ctxt.Diag("illegal system register:\n%v", p) + } + if (o1 & (v &^ (3 << 19))) != 0 { + c.ctxt.Diag("MSR register value overlap\n%v", p) + } + if accessFlags&SR_WRITE == 0 { + c.ctxt.Diag("system register is not writable: %v", p) + } + + o1 |= v + o1 |= uint32(p.From.Reg & 31) + + case 37: /* mov $con,PSTATEfield -> MSR [immediate] */ + if (uint64(p.From.Offset) &^ uint64(0xF)) != 0 { + c.ctxt.Diag("illegal immediate for PSTATE field\n%v", p) + } + o1 = c.opirr(p, AMSR) + o1 |= uint32((p.From.Offset & 0xF) << 8) /* Crm */ + v := uint32(0) + // PSTATEfield can be special registers and special operands. + if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SPSel { + v = 0<<16 | 4<<12 | 5<<5 + } else if p.To.Type == obj.TYPE_SPECIAL { + opd := SpecialOperand(p.To.Offset) + for _, pf := range pstatefield { + if pf.opd == opd { + v = pf.enc + break + } + } + } + + if v == 0 { + c.ctxt.Diag("illegal PSTATE field for immediate move\n%v", p) + } + o1 |= v + + case 38: /* clrex [$imm] */ + o1 = c.opimm(p, p.As) + + if p.To.Type == obj.TYPE_NONE { + o1 |= 0xF << 8 + } else { + o1 |= uint32((p.To.Offset & 0xF) << 8) + } + + case 39: /* cbz R, rel */ + o1 = c.opirr(p, p.As) + + o1 |= uint32(p.From.Reg & 31) + o1 |= uint32(c.brdist(p, 0, 19, 2) << 5) + + case 40: /* tbz */ + o1 = c.opirr(p, p.As) + + v := int32(p.From.Offset) + if v < 0 || v > 63 { + c.ctxt.Diag("illegal bit number\n%v", p) + } + o1 |= ((uint32(v) & 0x20) << (31 - 5)) | ((uint32(v) & 0x1F) << 19) + o1 |= uint32(c.brdist(p, 0, 14, 2) << 5) + o1 |= uint32(p.Reg & 31) + + case 41: /* eret, nop, others with no operands */ + o1 = c.op0(p, p.As) + + case 42: /* bfm R,r,s,R */ + o1 = c.opbfm(p, p.As, int(p.From.Offset), int(p.GetFrom3().Offset), int(p.Reg), int(p.To.Reg)) + + case 43: /* bfm aliases */ + r := int(p.From.Offset) + s := int(p.GetFrom3().Offset) + rf := int(p.Reg) + rt := int(p.To.Reg) + if rf == 0 { + rf = rt + } + switch p.As { + case ABFI: + if r != 0 { + r = 64 - r + } + o1 = c.opbfm(p, ABFM, r, s-1, rf, rt) + + case ABFIW: + if r != 0 { + r = 32 - r + } + o1 = c.opbfm(p, ABFMW, r, s-1, rf, rt) + + case ABFXIL: + o1 = c.opbfm(p, ABFM, r, r+s-1, rf, rt) + + case ABFXILW: + o1 = c.opbfm(p, ABFMW, r, r+s-1, rf, rt) + + case ASBFIZ: + if r != 0 { + r = 64 - r + } + o1 = c.opbfm(p, ASBFM, r, s-1, rf, rt) + + case ASBFIZW: + if r != 0 { + r = 32 - r + } + o1 = c.opbfm(p, ASBFMW, r, s-1, rf, rt) + + case ASBFX: + o1 = c.opbfm(p, ASBFM, r, r+s-1, rf, rt) + + case ASBFXW: + o1 = c.opbfm(p, ASBFMW, r, r+s-1, rf, rt) + + case AUBFIZ: + if r != 0 { + r = 64 - r + } + o1 = c.opbfm(p, AUBFM, r, s-1, rf, rt) + + case AUBFIZW: + if r != 0 { + r = 32 - r + } + o1 = c.opbfm(p, AUBFMW, r, s-1, rf, rt) + + case AUBFX: + o1 = c.opbfm(p, AUBFM, r, r+s-1, rf, rt) + + case AUBFXW: + o1 = c.opbfm(p, AUBFMW, r, r+s-1, rf, rt) + + default: + c.ctxt.Diag("bad bfm alias\n%v", p) + break + } + + case 44: /* extr $b, Rn, Rm, Rd */ + o1 = c.opextr(p, p.As, int32(p.From.Offset), int(p.GetFrom3().Reg), int(p.Reg), int(p.To.Reg)) + + case 45: /* sxt/uxt[bhw] R,R; movT R,R -> sxtT R,R */ + rf := int(p.From.Reg) + + rt := int(p.To.Reg) + as := p.As + if rf == REGZERO { + as = AMOVWU /* clearer in disassembly */ + } + switch as { + case AMOVB, ASXTB: + o1 = c.opbfm(p, ASBFM, 0, 7, rf, rt) + + case AMOVH, ASXTH: + o1 = c.opbfm(p, ASBFM, 0, 15, rf, rt) + + case AMOVW, ASXTW: + o1 = c.opbfm(p, ASBFM, 0, 31, rf, rt) + + case AMOVBU, AUXTB: + o1 = c.opbfm(p, AUBFM, 0, 7, rf, rt) + + case AMOVHU, AUXTH: + o1 = c.opbfm(p, AUBFM, 0, 15, rf, rt) + + case AMOVWU: + o1 = c.oprrr(p, as) | (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31) + + case AUXTW: + o1 = c.opbfm(p, AUBFM, 0, 31, rf, rt) + + case ASXTBW: + o1 = c.opbfm(p, ASBFMW, 0, 7, rf, rt) + + case ASXTHW: + o1 = c.opbfm(p, ASBFMW, 0, 15, rf, rt) + + case AUXTBW: + o1 = c.opbfm(p, AUBFMW, 0, 7, rf, rt) + + case AUXTHW: + o1 = c.opbfm(p, AUBFMW, 0, 15, rf, rt) + + default: + c.ctxt.Diag("bad sxt %v", as) + break + } + + case 46: /* cls */ + o1 = c.opbit(p, p.As) + + o1 |= uint32(p.From.Reg&31) << 5 + o1 |= uint32(p.To.Reg & 31) + + case 47: // SWPx/LDADDx/LDCLRx/LDEORx/LDORx/CASx Rs, (Rb), Rt + rs := p.From.Reg + rt := p.RegTo2 + rb := p.To.Reg + + // rt can't be sp. + if rt == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + if enc, ok := atomicLDADD[p.As]; ok { + // for LDADDx-like instructions, rt can't be r31 when field.enc A is 0, A bit is the 23rd bit. + if (rt == REGZERO) && (enc&(1<<23) == 0) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc + } else if enc, ok := atomicSWP[p.As]; ok { + o1 |= enc + } else { + c.ctxt.Diag("invalid atomic instructions: %v\n", p) + } + o1 |= uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) + + case 48: /* ADD $C_ADDCON2, Rm, Rd */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + op := c.opirr(p, p.As) + if op&Sbit != 0 { + c.ctxt.Diag("can not break addition/subtraction when S bit is set", p) + } + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 = c.oaddi(p, int32(op), int32(c.regoff(&p.From))&0x000fff, r, rt) + o2 = c.oaddi(p, int32(op), int32(c.regoff(&p.From))&0xfff000, rt, rt) + + case 50: /* sys/sysl */ + o1 = c.opirr(p, p.As) + + if (p.From.Offset &^ int64(SYSARG4(0x7, 0xF, 0xF, 0x7))) != 0 { + c.ctxt.Diag("illegal SYS argument\n%v", p) + } + o1 |= uint32(p.From.Offset) + if p.To.Type == obj.TYPE_REG { + o1 |= uint32(p.To.Reg & 31) + } else { + o1 |= 0x1F + } + + case 51: /* dmb */ + o1 = c.opirr(p, p.As) + + if p.From.Type == obj.TYPE_CONST { + o1 |= uint32((p.From.Offset & 0xF) << 8) + } + + case 52: /* hint */ + o1 = c.opirr(p, p.As) + + o1 |= uint32((p.From.Offset & 0x7F) << 5) + + case 53: /* and/or/eor/bic/tst/... $bitcon, Rn, Rd */ + a := p.As + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + if r == REG_RSP { + c.ctxt.Diag("illegal source register: %v", p) + break + } + mode := 64 + v := uint64(p.From.Offset) + switch p.As { + case AANDW, AORRW, AEORW, AANDSW, ATSTW: + mode = 32 + case ABIC, AORN, AEON, ABICS: + v = ^v + case ABICW, AORNW, AEONW, ABICSW: + v = ^v + mode = 32 + } + o1 = c.opirr(p, a) + o1 |= bitconEncode(v, mode) | uint32(r&31)<<5 | uint32(rt&31) + + case 54: /* floating point arith */ + o1 = c.oprrr(p, p.As) + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if (o1&(0x1F<<24)) == (0x1E<<24) && (o1&(1<<11)) == 0 { /* monadic */ + r = rf + rf = 0 + } else if r == 0 { + r = rt + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 55: /* floating-point constant */ + var rf int + o1 = 0xf<<25 | 1<<21 | 1<<12 + rf = c.chipfloat7(p.From.Val.(float64)) + if rf < 0 { + c.ctxt.Diag("invalid floating-point immediate\n%v", p) + } + if p.As == AFMOVD { + o1 |= 1 << 22 + } + o1 |= (uint32(rf&0xff) << 13) | uint32(p.To.Reg&31) + + case 56: /* floating point compare */ + o1 = c.oprrr(p, p.As) + + var rf int + if p.From.Type == obj.TYPE_FCONST { + o1 |= 8 /* zero */ + rf = 0 + } else { + rf = int(p.From.Reg) + } + rt := int(p.Reg) + o1 |= uint32(rf&31)<<16 | uint32(rt&31)<<5 + + case 57: /* floating point conditional compare */ + o1 = c.oprrr(p, p.As) + + cond := SpecialOperand(p.From.Offset) + if cond < SPOP_EQ || cond > SPOP_NV { + c.ctxt.Diag("invalid condition\n%v", p) + } else { + cond -= SPOP_EQ + } + + nzcv := int(p.To.Offset) + if nzcv&^0xF != 0 { + c.ctxt.Diag("implausible condition\n%v", p) + } + rf := int(p.Reg) + if p.GetFrom3() == nil || p.GetFrom3().Reg < REG_F0 || p.GetFrom3().Reg > REG_F31 { + c.ctxt.Diag("illegal FCCMP\n%v", p) + break + } + rt := int(p.GetFrom3().Reg) + o1 |= uint32(rf&31)<<16 | uint32(cond&15)<<12 | uint32(rt&31)<<5 | uint32(nzcv) + + case 58: /* ldar/ldarb/ldarh/ldaxp/ldxp/ldaxr/ldxr */ + o1 = c.opload(p, p.As) + + o1 |= 0x1F << 16 + o1 |= uint32(p.From.Reg&31) << 5 + if p.As == ALDXP || p.As == ALDXPW || p.As == ALDAXP || p.As == ALDAXPW { + if int(p.To.Reg) == int(p.To.Offset) { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + o1 |= uint32(p.To.Offset&31) << 10 + } else { + o1 |= 0x1F << 10 + } + o1 |= uint32(p.To.Reg & 31) + + case 59: /* stxr/stlxr/stxp/stlxp */ + s := p.RegTo2 + n := p.To.Reg + t := p.From.Reg + if isSTLXRop(p.As) { + if s == t || (s == n && n != REGSP) { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + } else if isSTXPop(p.As) { + t2 := int16(p.From.Offset) + if (s == t || s == t2) || (s == n && n != REGSP) { + c.ctxt.Diag("constrained unpredictable behavior: %v", p) + } + } + if s == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 = c.opstore(p, p.As) + + if p.RegTo2 != obj.REG_NONE { + o1 |= uint32(p.RegTo2&31) << 16 + } else { + o1 |= 0x1F << 16 + } + if isSTXPop(p.As) { + o1 |= uint32(p.From.Offset&31) << 10 + } + o1 |= uint32(p.To.Reg&31)<<5 | uint32(p.From.Reg&31) + + case 60: /* adrp label,r */ + d := c.brdist(p, 12, 21, 0) + + o1 = ADR(1, uint32(d), uint32(p.To.Reg)) + + case 61: /* adr label, r */ + d := c.brdist(p, 0, 21, 0) + + o1 = ADR(0, uint32(d), uint32(p.To.Reg)) + + case 62: /* op $movcon, [R], R -> mov $movcon, REGTMP + op REGTMP, [R], R */ + if p.Reg == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v\n", p) + } + if p.To.Reg == REG_RSP && isADDSop(p.As) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + lsl0 := LSL0_64 + if isADDWop(p.As) || isANDWop(p.As) { + o1 = c.omovconst(AMOVW, p, &p.From, REGTMP) + lsl0 = LSL0_32 + } else { + o1 = c.omovconst(AMOVD, p, &p.From, REGTMP) + } + + rt := int(p.To.Reg) + if p.To.Type == obj.TYPE_NONE { + rt = REGZERO + } + r := int(p.Reg) + if r == 0 { + r = rt + } + if p.To.Reg == REGSP || r == REGSP { + o2 = c.opxrrr(p, p.As, false) + o2 |= REGTMP & 31 << 16 + o2 |= uint32(lsl0) + } else { + o2 = c.oprrr(p, p.As) + o2 |= REGTMP & 31 << 16 /* shift is 0 */ + } + o2 |= uint32(r&31) << 5 + o2 |= uint32(rt & 31) + + /* reloc ops */ + case 64: /* movT R,addr -> adrp + movT R, (REGTMP) */ + if p.From.Reg == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v\n", p) + } + o1 = ADR(1, 0, REGTMP) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + // For unaligned access, fall back to adrp + add + movT R, (REGTMP). + if o.size(c.ctxt, p) != 8 { + o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31 + o3 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg)) + rel.Type = objabi.R_ADDRARM64 + break + } + o2 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg)) + rel.Type = c.addrRelocType(p) + + case 65: /* movT addr,R -> adrp + movT (REGTMP), R */ + o1 = ADR(1, 0, REGTMP) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + // For unaligned access, fall back to adrp + add + movT (REGTMP), R. + if o.size(c.ctxt, p) != 8 { + o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31 + o3 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg)) + rel.Type = objabi.R_ADDRARM64 + break + } + o2 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg)) + rel.Type = c.addrRelocType(p) + + case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */ + v := int32(c.regoff(&p.From)) + r := int(p.From.Reg) + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid ldp source: %v\n", p) + } + o1 |= c.opldpstp(p, o, v, uint32(r), uint32(p.To.Reg), uint32(p.To.Offset), 1) + + case 67: /* stp (r1, r2), O(R)!; stp (r1, r2), (R)O! */ + r := int(p.To.Reg) + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid stp destination: %v\n", p) + } + v := int32(c.regoff(&p.To)) + o1 = c.opldpstp(p, o, v, uint32(r), uint32(p.From.Reg), uint32(p.From.Offset), 0) + + case 68: /* movT $vconaddr(SB), reg -> adrp + add + reloc */ + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. + if p.As == AMOVW { + c.ctxt.Diag("invalid load of 32-bit address: %v", p) + } + o1 = ADR(1, 0, uint32(p.To.Reg)) + o2 = c.opirr(p, AADD) | uint32(p.To.Reg&31)<<5 | uint32(p.To.Reg&31) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRARM64 + + case 69: /* LE model movd $tlsvar, reg -> movz reg, 0 + reloc */ + o1 = c.opirr(p, AMOVZ) + o1 |= uint32(p.To.Reg & 31) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 + rel.Sym = p.From.Sym + rel.Type = objabi.R_ARM64_TLS_LE + if p.From.Offset != 0 { + c.ctxt.Diag("invalid offset on MOVW $tlsvar") + } + + case 70: /* IE model movd $tlsvar, reg -> adrp REGTMP, 0; ldr reg, [REGTMP, #0] + relocs */ + o1 = ADR(1, 0, REGTMP) + o2 = c.olsr12u(p, int32(c.opldr(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = 0 + rel.Type = objabi.R_ARM64_TLS_IE + if p.From.Offset != 0 { + c.ctxt.Diag("invalid offset on MOVW $tlsvar") + } + + case 71: /* movd sym@GOT, reg -> adrp REGTMP, #0; ldr reg, [REGTMP, #0] + relocs */ + o1 = ADR(1, 0, REGTMP) + o2 = c.olsr12u(p, int32(c.opldr(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = 0 + rel.Type = objabi.R_ARM64_GOTPCREL + + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2/vrax1 Vm., Vn., Vd. */ + af := int((p.From.Reg >> 5) & 15) + af3 := int((p.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + if af != af3 || af != at { + c.ctxt.Diag("operand mismatch: %v", p) + break + } + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + + Q := 0 + size := 0 + switch af { + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_2D: + Q = 1 + size = 3 + case ARNG_2S: + Q = 0 + size = 2 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_4S: + Q = 1 + size = 2 + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_8H: + Q = 1 + size = 1 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + + switch p.As { + case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF: + if af != ARNG_16B && af != ARNG_8B { + c.ctxt.Diag("invalid arrangement: %v", p) + } + case AVFMLA, AVFMLS: + if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S { + c.ctxt.Diag("invalid arrangement: %v", p) + } + case AVUMAX, AVUMIN: + if af == ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + } + switch p.As { + case AVAND, AVEOR: + size = 0 + case AVBSL: + size = 1 + case AVORR, AVBIT, AVBIF: + size = 2 + case AVFMLA, AVFMLS: + if af == ARNG_2D { + size = 1 + } else { + size = 0 + } + case AVRAX1: + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + size = 0 + Q = 0 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 73: /* vmov V.[index], R */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + o1 = 7<<25 | 0xf<<10 + index := int(p.From.Index) + switch (p.From.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index, 15) + imm5 |= 1 + imm5 |= index << 1 + case ARNG_H: + c.checkindex(p, index, 7) + imm5 |= 2 + imm5 |= index << 2 + case ARNG_S: + c.checkindex(p, index, 3) + imm5 |= 4 + imm5 |= index << 3 + case ARNG_D: + c.checkindex(p, index, 1) + imm5 |= 8 + imm5 |= index << 4 + o1 |= 1 << 30 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 74: + // add $O, R, Rtmp or sub $O, R, Rtmp + // ldp (Rtmp), (R1, R2) + r := int(p.From.Reg) + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid ldp source: %v", p) + } + v := int32(c.regoff(&p.From)) + + if v > 0 { + if v > 4095 { + c.ctxt.Diag("offset out of range: %v", p) + } + o1 = c.oaddi(p, int32(c.opirr(p, AADD)), v, r, REGTMP) + } + if v < 0 { + if v < -4095 { + c.ctxt.Diag("offset out of range: %v", p) + } + o1 = c.oaddi(p, int32(c.opirr(p, ASUB)), -v, r, REGTMP) + } + o2 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.To.Reg), uint32(p.To.Offset), 1) + + case 75: + // mov $L, Rtmp (from constant pool) + // add Rtmp, R, Rtmp + // ldp (Rtmp), (R1, R2) + r := int(p.From.Reg) + if r == REGTMP { + c.ctxt.Diag("REGTMP used in large offset load: %v", p) + } + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid ldp source: %v", p) + } + o1 = c.omovlit(AMOVD, p, &p.From, REGTMP) + o2 = c.opxrrr(p, AADD, false) + o2 |= (REGTMP & 31) << 16 + o2 |= uint32(r&31) << 5 + o2 |= uint32(REGTMP & 31) + o3 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.To.Reg), uint32(p.To.Offset), 1) + + case 76: + // add $O, R, Rtmp or sub $O, R, Rtmp + // stp (R1, R2), (Rtmp) + if p.From.Reg == REGTMP || p.From.Offset == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v", p) + } + r := int(p.To.Reg) + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid stp destination: %v", p) + } + v := int32(c.regoff(&p.To)) + if v > 0 { + if v > 4095 { + c.ctxt.Diag("offset out of range: %v", p) + } + o1 = c.oaddi(p, int32(c.opirr(p, AADD)), v, r, REGTMP) + } + if v < 0 { + if v < -4095 { + c.ctxt.Diag("offset out of range: %v", p) + } + o1 = c.oaddi(p, int32(c.opirr(p, ASUB)), -v, r, REGTMP) + } + o2 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.From.Reg), uint32(p.From.Offset), 0) + + case 77: + // mov $L, Rtmp (from constant pool) + // add Rtmp, R, Rtmp + // stp (R1, R2), (Rtmp) + r := int(p.To.Reg) + if r == REGTMP || p.From.Reg == REGTMP || p.From.Offset == REGTMP { + c.ctxt.Diag("REGTMP used in large offset store: %v", p) + } + if r == obj.REG_NONE { + r = int(o.param) + } + if r == obj.REG_NONE { + c.ctxt.Diag("invalid stp destination: %v", p) + } + o1 = c.omovlit(AMOVD, p, &p.To, REGTMP) + o2 = c.opxrrr(p, AADD, false) + o2 |= REGTMP & 31 << 16 + o2 |= uint32(r&31) << 5 + o2 |= uint32(REGTMP & 31) + o3 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.From.Reg), uint32(p.From.Offset), 0) + + case 78: /* vmov R, V.[index] */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + o1 = 1<<30 | 7<<25 | 7<<10 + index := int(p.To.Index) + switch (p.To.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index, 15) + imm5 |= 1 + imm5 |= index << 1 + case ARNG_H: + c.checkindex(p, index, 7) + imm5 |= 2 + imm5 |= index << 2 + case ARNG_S: + c.checkindex(p, index, 3) + imm5 |= 4 + imm5 |= index << 3 + case ARNG_D: + c.checkindex(p, index, 1) + imm5 |= 8 + imm5 |= index << 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 79: /* vdup Vn.[index], Vd. */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + o1 = 7<<25 | 1<<10 + var imm5, Q int + index := int(p.From.Index) + switch (p.To.Reg >> 5) & 15 { + case ARNG_16B: + c.checkindex(p, index, 15) + Q = 1 + imm5 = 1 + imm5 |= index << 1 + case ARNG_2D: + c.checkindex(p, index, 1) + Q = 1 + imm5 = 8 + imm5 |= index << 4 + case ARNG_2S: + c.checkindex(p, index, 3) + Q = 0 + imm5 = 4 + imm5 |= index << 3 + case ARNG_4H: + c.checkindex(p, index, 7) + Q = 0 + imm5 = 2 + imm5 |= index << 2 + case ARNG_4S: + c.checkindex(p, index, 3) + Q = 1 + imm5 = 4 + imm5 |= index << 3 + case ARNG_8B: + c.checkindex(p, index, 15) + Q = 0 + imm5 = 1 + imm5 |= index << 1 + case ARNG_8H: + c.checkindex(p, index, 7) + Q = 1 + imm5 = 2 + imm5 |= index << 2 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(Q&1) << 30) | (uint32(imm5&0x1f) << 16) + o1 |= (uint32(rf&31) << 5) | uint32(rt&31) + + case 80: /* vmov/vdup V.[index], Vn */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + index := int(p.From.Index) + switch p.As { + case AVMOV, AVDUP: + o1 = 1<<30 | 15<<25 | 1<<10 + switch (p.From.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index, 15) + imm5 |= 1 + imm5 |= index << 1 + case ARNG_H: + c.checkindex(p, index, 7) + imm5 |= 2 + imm5 |= index << 2 + case ARNG_S: + c.checkindex(p, index, 3) + imm5 |= 4 + imm5 |= index << 3 + case ARNG_D: + c.checkindex(p, index, 1) + imm5 |= 8 + imm5 |= index << 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + default: + c.ctxt.Diag("unsupported op %v", p.As) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 81: /* vld[1-4]|vld[1-4]r (Rn), [Vt1., Vt2., ...] */ + c.checkoffset(p, p.As) + r := int(p.From.Reg) + o1 = c.oprrr(p, p.As) + if o.scond == C_XPOST { + o1 |= 1 << 23 + if p.From.Index == 0 { + // immediate offset variant + o1 |= 0x1f << 16 + } else { + // register offset variant + if isRegShiftOrExt(&p.From) { + c.ctxt.Diag("invalid extended register op: %v\n", p) + } + o1 |= uint32(p.From.Index&0x1f) << 16 + } + } + o1 |= uint32(p.To.Offset) + // cmd/asm/internal/arch/arm64.go:ARM64RegisterListOffset + // add opcode(bit 12-15) for vld1, mask it off if it's not vld1 + o1 = c.maskOpvldvst(p, o1) + o1 |= uint32(r&31) << 5 + + case 82: /* vmov/vdup Rn, Vd. */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + o1 = 7<<25 | 3<<10 + var imm5, Q uint32 + switch (p.To.Reg >> 5) & 15 { + case ARNG_16B: + Q = 1 + imm5 = 1 + case ARNG_2D: + Q = 1 + imm5 = 8 + case ARNG_2S: + Q = 0 + imm5 = 4 + case ARNG_4H: + Q = 0 + imm5 = 2 + case ARNG_4S: + Q = 1 + imm5 = 4 + case ARNG_8B: + Q = 0 + imm5 = 1 + case ARNG_8H: + Q = 1 + imm5 = 2 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 |= (Q & 1 << 30) | (imm5 & 0x1f << 16) + o1 |= (uint32(rf&31) << 5) | uint32(rt&31) + + case 83: /* vmov Vn., Vd. */ + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + if af != at { + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + + var Q, size uint32 + switch af { + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_8H: + Q = 1 + size = 1 + case ARNG_2S: + Q = 0 + size = 2 + case ARNG_4S: + Q = 1 + size = 2 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + + if (p.As == AVMOV || p.As == AVRBIT || p.As == AVCNT) && (af != ARNG_16B && af != ARNG_8B) { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if p.As == AVREV32 && (af == ARNG_2S || af == ARNG_4S) { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if p.As == AVREV16 && af != ARNG_8B && af != ARNG_16B { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if p.As == AVMOV { + o1 |= uint32(rf&31) << 16 + } + + if p.As == AVRBIT { + size = 1 + } + + o1 |= (Q&1)<<30 | (size&3)<<22 | uint32(rf&31)<<5 | uint32(rt&31) + + case 84: /* vst[1-4] [Vt1., Vt2., ...], (Rn) */ + c.checkoffset(p, p.As) + r := int(p.To.Reg) + o1 = 3 << 26 + if o.scond == C_XPOST { + o1 |= 1 << 23 + if p.To.Index == 0 { + // immediate offset variant + o1 |= 0x1f << 16 + } else { + // register offset variant + if isRegShiftOrExt(&p.To) { + c.ctxt.Diag("invalid extended register: %v\n", p) + } + o1 |= uint32(p.To.Index&31) << 16 + } + } + o1 |= uint32(p.From.Offset) + // cmd/asm/internal/arch/arm64.go:ARM64RegisterListOffset + // add opcode(bit 12-15) for vst1, mask it off if it's not vst1 + o1 = c.maskOpvldvst(p, o1) + o1 |= uint32(r&31) << 5 + + case 85: /* vaddv/vuaddlv Vn., Vd*/ + af := int((p.From.Reg >> 5) & 15) + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + Q := 0 + size := 0 + switch af { + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_8H: + Q = 1 + size = 1 + case ARNG_4S: + Q = 1 + size = 2 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 86: /* vmovi $imm8, Vd.*/ + at := int((p.To.Reg >> 5) & 15) + r := int(p.From.Offset) + if r > 255 || r < 0 { + c.ctxt.Diag("immediate constant out of range: %v\n", p) + } + rt := int((p.To.Reg) & 31) + Q := 0 + switch at { + case ARNG_8B: + Q = 0 + case ARNG_16B: + Q = 1 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 = 0xf<<24 | 0xe<<12 | 1<<10 + o1 |= (uint32(Q&1) << 30) | (uint32((r>>5)&7) << 16) | (uint32(r&0x1f) << 5) | uint32(rt&31) + + case 87: /* stp (r,r), addr(SB) -> adrp + add + stp */ + if p.From.Reg == REGTMP || p.From.Offset == REGTMP { + c.ctxt.Diag("cannot use REGTMP as source: %v", p) + } + o1 = ADR(1, 0, REGTMP) + o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31 + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_ADDRARM64 + o3 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.From.Reg), uint32(p.From.Offset), 0) + + case 88: /* ldp addr(SB), (r,r) -> adrp + add + ldp */ + o1 = ADR(1, 0, REGTMP) + o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31 + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = p.From.Offset + rel.Type = objabi.R_ADDRARM64 + o3 |= c.opldpstp(p, o, 0, uint32(REGTMP), uint32(p.To.Reg), uint32(p.To.Offset), 1) + + case 89: /* vadd/vsub Vm, Vn, Vd */ + switch p.As { + case AVADD: + o1 = 5<<28 | 7<<25 | 7<<21 | 1<<15 | 1<<10 + + case AVSUB: + o1 = 7<<28 | 7<<25 | 7<<21 | 1<<15 | 1<<10 + + default: + c.ctxt.Diag("bad opcode: %v\n", p) + break + } + + rf := int(p.From.Reg) + rt := int(p.To.Reg) + r := int(p.Reg) + if r == 0 { + r = rt + } + o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + // This is supposed to be something that stops execution. + // It's not supposed to be reached, ever, but if it is, we'd + // like to be able to tell how we got there. Assemble as + // 0xbea71700 which is guaranteed to raise undefined instruction + // exception. + case 90: + o1 = 0xbea71700 + + case 91: /* prfm imm(Rn), */ + imm := uint32(p.From.Offset) + r := p.From.Reg + var v uint32 + var ok bool + if p.To.Type == obj.TYPE_CONST { + v = uint32(p.To.Offset) + ok = v <= 31 + } else { + v, ok = prfopfield[SpecialOperand(p.To.Offset)] + } + if !ok { + c.ctxt.Diag("illegal prefetch operation:\n%v", p) + } + + o1 = c.opirr(p, p.As) + o1 |= (uint32(r&31) << 5) | (uint32((imm>>3)&0xfff) << 10) | (uint32(v & 31)) + + case 92: /* vmov Vn.[index], Vd.[index] */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm4 := 0 + imm5 := 0 + o1 = 3<<29 | 7<<25 | 1<<10 + index1 := int(p.To.Index) + index2 := int(p.From.Index) + if ((p.To.Reg >> 5) & 15) != ((p.From.Reg >> 5) & 15) { + c.ctxt.Diag("operand mismatch: %v", p) + } + switch (p.To.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index1, 15) + c.checkindex(p, index2, 15) + imm5 |= 1 + imm5 |= index1 << 1 + imm4 |= index2 + case ARNG_H: + c.checkindex(p, index1, 7) + c.checkindex(p, index2, 7) + imm5 |= 2 + imm5 |= index1 << 2 + imm4 |= index2 << 1 + case ARNG_S: + c.checkindex(p, index1, 3) + c.checkindex(p, index2, 3) + imm5 |= 4 + imm5 |= index1 << 3 + imm4 |= index2 << 2 + case ARNG_D: + c.checkindex(p, index1, 1) + c.checkindex(p, index2, 1) + imm5 |= 8 + imm5 |= index1 << 4 + imm4 |= index2 << 3 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(imm4&0xf) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 93: /* vpmull{2} Vm., Vn., Vd. */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if af != a { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + var Q, size uint32 + if p.As == AVPMULL2 { + Q = 1 + } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): + size = 0 + case pack(0, ARNG_1Q, ARNG_1D), pack(1, ARNG_1Q, ARNG_2D): + size = 3 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 94: /* vext $imm4, Vm., Vn., Vd. */ + af := int(((p.GetFrom3().Reg) >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + a := int((p.Reg >> 5) & 15) + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + var Q uint32 + var b int + if af == ARNG_8B { + Q = 0 + b = 7 + } else if af == ARNG_16B { + Q = 1 + b = 15 + } else { + c.ctxt.Diag("invalid arrangement, should be B8 or B16: %v", p) + break + } + + if index < 0 || index > b { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := int((p.GetFrom3().Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + + o1 |= ((Q & 1) << 30) | (uint32(r&31) << 16) | (uint32(index&15) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 95: /* vushr/vshl/vsri/vsli/vusra $shift, Vn., Vd. */ + at := int((p.To.Reg >> 5) & 15) + af := int((p.Reg >> 5) & 15) + shift := int(p.From.Offset) + + if af != at { + c.ctxt.Diag("invalid arrangement on op Vn., Vd.: %v", p) + } + + var Q uint32 + var imax, esize int + + switch af { + case ARNG_8B, ARNG_4H, ARNG_2S: + Q = 0 + case ARNG_16B, ARNG_8H, ARNG_4S, ARNG_2D: + Q = 1 + default: + c.ctxt.Diag("invalid arrangement on op Vn., Vd.: %v", p) + } + + switch af { + case ARNG_8B, ARNG_16B: + imax = 15 + esize = 8 + case ARNG_4H, ARNG_8H: + imax = 31 + esize = 16 + case ARNG_2S, ARNG_4S: + imax = 63 + esize = 32 + case ARNG_2D: + imax = 127 + esize = 64 + } + + imm := 0 + switch p.As { + case AVUSHR, AVSRI, AVUSRA: + imm = esize*2 - shift + if imm < esize || imm > imax { + c.ctxt.Diag("shift out of range: %v", p) + } + case AVSHL, AVSLI: + imm = esize + shift + if imm > imax { + c.ctxt.Diag("shift out of range: %v", p) + } + default: + c.ctxt.Diag("invalid instruction %v\n", p) + } + + o1 = c.opirr(p, p.As) + rt := int((p.To.Reg) & 31) + rf := int((p.Reg) & 31) + + o1 |= ((Q & 1) << 30) | (uint32(imm&0x7f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 96: /* vst1 Vt1.[index], offset(Rn) */ + af := int((p.From.Reg >> 5) & 15) + rt := int((p.From.Reg) & 31) + rf := int((p.To.Reg) & 31) + r := int(p.To.Index & 31) + index := int(p.From.Index) + offset := int32(c.regoff(&p.To)) + + if o.scond == C_XPOST { + if (p.To.Index != 0) && (offset != 0) { + c.ctxt.Diag("invalid offset: %v", p) + } + if p.To.Index == 0 && offset == 0 { + c.ctxt.Diag("invalid offset: %v", p) + } + } + + if offset != 0 { + r = 31 + } + + var Q, S, size int + var opcode uint32 + switch af { + case ARNG_B: + c.checkindex(p, index, 15) + if o.scond == C_XPOST && offset != 0 && offset != 1 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 3 + S = (index >> 2) & 1 + size = index & 3 + opcode = 0 + case ARNG_H: + c.checkindex(p, index, 7) + if o.scond == C_XPOST && offset != 0 && offset != 2 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 2 + S = (index >> 1) & 1 + size = (index & 1) << 1 + opcode = 2 + case ARNG_S: + c.checkindex(p, index, 3) + if o.scond == C_XPOST && offset != 0 && offset != 4 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 1 + S = index & 1 + size = 0 + opcode = 4 + case ARNG_D: + c.checkindex(p, index, 1) + if o.scond == C_XPOST && offset != 0 && offset != 8 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index + S = 0 + size = 1 + opcode = 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if o.scond == C_XPOST { + o1 |= 27 << 23 + } else { + o1 |= 26 << 23 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(r&31) << 16) | ((opcode & 7) << 13) | (uint32(S&1) << 12) | (uint32(size&3) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 97: /* vld1 offset(Rn), vt.[index] */ + at := int((p.To.Reg >> 5) & 15) + rt := int((p.To.Reg) & 31) + rf := int((p.From.Reg) & 31) + r := int(p.From.Index & 31) + index := int(p.To.Index) + offset := int32(c.regoff(&p.From)) + + if o.scond == C_XPOST { + if (p.From.Index != 0) && (offset != 0) { + c.ctxt.Diag("invalid offset: %v", p) + } + if p.From.Index == 0 && offset == 0 { + c.ctxt.Diag("invalid offset: %v", p) + } + } + + if offset != 0 { + r = 31 + } + + Q := 0 + S := 0 + size := 0 + var opcode uint32 + switch at { + case ARNG_B: + c.checkindex(p, index, 15) + if o.scond == C_XPOST && offset != 0 && offset != 1 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 3 + S = (index >> 2) & 1 + size = index & 3 + opcode = 0 + case ARNG_H: + c.checkindex(p, index, 7) + if o.scond == C_XPOST && offset != 0 && offset != 2 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 2 + S = (index >> 1) & 1 + size = (index & 1) << 1 + opcode = 2 + case ARNG_S: + c.checkindex(p, index, 3) + if o.scond == C_XPOST && offset != 0 && offset != 4 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 1 + S = index & 1 + size = 0 + opcode = 4 + case ARNG_D: + c.checkindex(p, index, 1) + if o.scond == C_XPOST && offset != 0 && offset != 8 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index + S = 0 + size = 1 + opcode = 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if o.scond == C_XPOST { + o1 |= 110 << 21 + } else { + o1 |= 106 << 21 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(r&31) << 16) | ((opcode & 7) << 13) | (uint32(S&1) << 12) | (uint32(size&3) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 98: /* MOVD (Rn)(Rm.SXTW[<, [Vt1., Vt2., ...], Vd. */ + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + if af != at { + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + var q, len uint32 + switch af { + case ARNG_8B: + q = 0 + case ARNG_16B: + q = 1 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + rf := int(p.From.Reg) + rt := int(p.To.Reg) + offset := int(p.GetFrom3().Offset) + opcode := (offset >> 12) & 15 + switch opcode { + case 0x7: + len = 0 // one register + case 0xa: + len = 1 // two register + case 0x6: + len = 2 // three registers + case 0x2: + len = 3 // four registers + default: + c.ctxt.Diag("invalid register numbers in ARM64 register list: %v", p) + } + var op uint32 + switch p.As { + case AVTBL: + op = 0 + case AVTBX: + op = 1 + } + o1 = q<<30 | 0xe<<24 | len<<13 | op<<12 + o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) + + case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool. + o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) + + case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ + o1 = c.opirr(p, p.As) + rf := p.Reg + af := uint8((p.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + shift := int(p.From.Offset) + if p.As == AVUXTL || p.As == AVUXTL2 { + rf = p.From.Reg + af = uint8((p.From.Reg >> 5) & 15) + shift = 0 + } + + Q := (o1 >> 30) & 1 + var immh, width uint8 + switch pack(Q, af, at) { + case pack(0, ARNG_8B, ARNG_8H): + immh, width = 1, 8 + case pack(1, ARNG_16B, ARNG_8H): + immh, width = 1, 8 + case pack(0, ARNG_4H, ARNG_4S): + immh, width = 2, 16 + case pack(1, ARNG_8H, ARNG_4S): + immh, width = 2, 16 + case pack(0, ARNG_2S, ARNG_2D): + immh, width = 4, 32 + case pack(1, ARNG_4S, ARNG_2D): + immh, width = 4, 32 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + if !(0 <= shift && shift <= int(width-1)) { + c.ctxt.Diag("shift amount out of range: %v\n", p) + } + o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ + ta := (p.From.Reg >> 5) & 15 + tm := (p.Reg >> 5) & 15 + td := (p.To.Reg >> 5) & 15 + tn := ((p.GetFrom3().Reg) >> 5) & 15 + + if ta != tm || ta != tn || ta != td || ta != ARNG_16B { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + o1 = c.oprrr(p, p.As) + ra := int(p.From.Reg) + rm := int(p.Reg) + rn := int(p.GetFrom3().Reg) + rd := int(p.To.Reg) + o1 |= uint32(rm&31)<<16 | uint32(ra&31)<<10 | uint32(rn&31)<<5 | uint32(rd)&31 + + case 104: /* vxar $imm4, Vm., Vn., Vd. */ + af := ((p.GetFrom3().Reg) >> 5) & 15 + at := (p.To.Reg >> 5) & 15 + a := (p.Reg >> 5) & 15 + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement, should be D2: %v", p) + break + } + + if index < 0 || index > 63 { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := (p.GetFrom3().Reg) & 31 + rt := (p.To.Reg) & 31 + r := (p.Reg) & 31 + + o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 105: /* vuaddw{2} Vm., Vn., Vd. */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if at != a { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + var Q, size uint32 + if p.As == AVUADDW2 { + Q = 1 + } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): + size = 0 + case pack(0, ARNG_4S, ARNG_4H), pack(1, ARNG_4S, ARNG_8H): + size = 1 + case pack(0, ARNG_2D, ARNG_2S), pack(1, ARNG_2D, ARNG_4S): + size = 2 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 106: // CASPx (Rs, Rs+1), (Rb), (Rt, Rt+1) + rs := p.From.Reg + rt := p.GetTo2().Reg + rb := p.To.Reg + rs1 := int16(p.From.Offset) + rt1 := int16(p.GetTo2().Offset) + + enc, ok := atomicCASP[p.As] + if !ok { + c.ctxt.Diag("invalid CASP-like atomic instructions: %v\n", p) + } + // for CASPx-like instructions, Rs<0> != 1 && Rt<0> != 1 + switch { + case rs&1 != 0: + c.ctxt.Diag("source register pair must start from even register: %v\n", p) + break + case rt&1 != 0: + c.ctxt.Diag("destination register pair must start from even register: %v\n", p) + break + case rs != rs1-1: + c.ctxt.Diag("source register pair must be contiguous: %v\n", p) + break + case rt != rt1-1: + c.ctxt.Diag("destination register pair must be contiguous: %v\n", p) + break + } + // rt can't be sp. + if rt == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) + + case 107: /* tlbi, dc */ + op, ok := sysInstFields[SpecialOperand(p.From.Offset)] + if !ok || (p.As == ATLBI && op.cn != 8) || (p.As == ADC && op.cn != 7) { + c.ctxt.Diag("illegal argument: %v\n", p) + break + } + o1 = c.opirr(p, p.As) + if op.hasOperand2 { + if p.To.Reg == 0 { + c.ctxt.Diag("missing register at operand 2: %v\n", p) + } + o1 |= uint32(p.To.Reg & 0x1F) + } else { + if p.To.Reg != 0 || p.Reg != 0 { + c.ctxt.Diag("extraneous register at operand 2: %v\n", p) + } + o1 |= uint32(0x1F) + } + o1 |= uint32(SYSARG4(int(op.op1), int(op.cn), int(op.cm), int(op.op2))) + } + out[0] = o1 + out[1] = o2 + out[2] = o3 + out[3] = o4 + out[4] = o5 +} + +func (c *ctxt7) addrRelocType(p *obj.Prog) objabi.RelocType { + switch movesize(p.As) { + case 0: + return objabi.R_ARM64_PCREL_LDST8 + case 1: + return objabi.R_ARM64_PCREL_LDST16 + case 2: + return objabi.R_ARM64_PCREL_LDST32 + case 3: + return objabi.R_ARM64_PCREL_LDST64 + default: + c.ctxt.Diag("use R_ADDRARM64 relocation type for: %v\n", p) + } + return -1 +} + +/* + * basic Rm op Rn -> Rd (using shifted register with 0) + * also op Rn -> Rt + * also Rm*Rn op Ra -> Rd + * also Vm op Vn -> Vd + */ +func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { + switch a { + case AADC: + return S64 | 0<<30 | 0<<29 | 0xd0<<21 | 0<<10 + + case AADCW: + return S32 | 0<<30 | 0<<29 | 0xd0<<21 | 0<<10 + + case AADCS: + return S64 | 0<<30 | 1<<29 | 0xd0<<21 | 0<<10 + + case AADCSW: + return S32 | 0<<30 | 1<<29 | 0xd0<<21 | 0<<10 + + case ANGC, ASBC: + return S64 | 1<<30 | 0<<29 | 0xd0<<21 | 0<<10 + + case ANGCS, ASBCS: + return S64 | 1<<30 | 1<<29 | 0xd0<<21 | 0<<10 + + case ANGCW, ASBCW: + return S32 | 1<<30 | 0<<29 | 0xd0<<21 | 0<<10 + + case ANGCSW, ASBCSW: + return S32 | 1<<30 | 1<<29 | 0xd0<<21 | 0<<10 + + case AADD: + return S64 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case AADDW: + return S32 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ACMN, AADDS: + return S64 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ACMNW, AADDSW: + return S32 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ASUB: + return S64 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ASUBW: + return S32 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ACMP, ASUBS: + return S64 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case ACMPW, ASUBSW: + return S32 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10 + + case AAND: + return S64 | 0<<29 | 0xA<<24 + + case AANDW: + return S32 | 0<<29 | 0xA<<24 + + case AMOVD, AORR: + return S64 | 1<<29 | 0xA<<24 + + // case AMOVW: + case AMOVWU, AORRW: + return S32 | 1<<29 | 0xA<<24 + + case AEOR: + return S64 | 2<<29 | 0xA<<24 + + case AEORW: + return S32 | 2<<29 | 0xA<<24 + + case AANDS, ATST: + return S64 | 3<<29 | 0xA<<24 + + case AANDSW, ATSTW: + return S32 | 3<<29 | 0xA<<24 + + case ABIC: + return S64 | 0<<29 | 0xA<<24 | 1<<21 + + case ABICW: + return S32 | 0<<29 | 0xA<<24 | 1<<21 + + case ABICS: + return S64 | 3<<29 | 0xA<<24 | 1<<21 + + case ABICSW: + return S32 | 3<<29 | 0xA<<24 | 1<<21 + + case AEON: + return S64 | 2<<29 | 0xA<<24 | 1<<21 + + case AEONW: + return S32 | 2<<29 | 0xA<<24 | 1<<21 + + case AMVN, AORN: + return S64 | 1<<29 | 0xA<<24 | 1<<21 + + case AMVNW, AORNW: + return S32 | 1<<29 | 0xA<<24 | 1<<21 + + case AASR: + return S64 | OPDP2(10) /* also ASRV */ + + case AASRW: + return S32 | OPDP2(10) + + case ALSL: + return S64 | OPDP2(8) + + case ALSLW: + return S32 | OPDP2(8) + + case ALSR: + return S64 | OPDP2(9) + + case ALSRW: + return S32 | OPDP2(9) + + case AROR: + return S64 | OPDP2(11) + + case ARORW: + return S32 | OPDP2(11) + + case ACCMN: + return S64 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 /* cond<<12 | nzcv<<0 */ + + case ACCMNW: + return S32 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 + + case ACCMP: + return S64 | 1<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 /* imm5<<16 | cond<<12 | nzcv<<0 */ + + case ACCMPW: + return S32 | 1<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 + + case ACRC32B: + return S32 | OPDP2(16) + + case ACRC32H: + return S32 | OPDP2(17) + + case ACRC32W: + return S32 | OPDP2(18) + + case ACRC32X: + return S64 | OPDP2(19) + + case ACRC32CB: + return S32 | OPDP2(20) + + case ACRC32CH: + return S32 | OPDP2(21) + + case ACRC32CW: + return S32 | OPDP2(22) + + case ACRC32CX: + return S64 | OPDP2(23) + + case ACSEL: + return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACSELW: + return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACSET: + return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACSETW: + return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACSETM: + return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACSETMW: + return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACINC, ACSINC: + return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACINCW, ACSINCW: + return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACINV, ACSINV: + return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACINVW, ACSINVW: + return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10 + + case ACNEG, ACSNEG: + return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case ACNEGW, ACSNEGW: + return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10 + + case AMUL, AMADD: + return S64 | 0<<29 | 0x1B<<24 | 0<<21 | 0<<15 + + case AMULW, AMADDW: + return S32 | 0<<29 | 0x1B<<24 | 0<<21 | 0<<15 + + case AMNEG, AMSUB: + return S64 | 0<<29 | 0x1B<<24 | 0<<21 | 1<<15 + + case AMNEGW, AMSUBW: + return S32 | 0<<29 | 0x1B<<24 | 0<<21 | 1<<15 + + case AMRS: + return SYSOP(1, 2, 0, 0, 0, 0, 0) + + case AMSR: + return SYSOP(0, 2, 0, 0, 0, 0, 0) + + case ANEG: + return S64 | 1<<30 | 0<<29 | 0xB<<24 | 0<<21 + + case ANEGW: + return S32 | 1<<30 | 0<<29 | 0xB<<24 | 0<<21 + + case ANEGS: + return S64 | 1<<30 | 1<<29 | 0xB<<24 | 0<<21 + + case ANEGSW: + return S32 | 1<<30 | 1<<29 | 0xB<<24 | 0<<21 + + case AREM, ASDIV: + return S64 | OPDP2(3) + + case AREMW, ASDIVW: + return S32 | OPDP2(3) + + case ASMULL, ASMADDL: + return OPDP3(1, 0, 1, 0) + + case ASMNEGL, ASMSUBL: + return OPDP3(1, 0, 1, 1) + + case ASMULH: + return OPDP3(1, 0, 2, 0) + + case AUMULL, AUMADDL: + return OPDP3(1, 0, 5, 0) + + case AUMNEGL, AUMSUBL: + return OPDP3(1, 0, 5, 1) + + case AUMULH: + return OPDP3(1, 0, 6, 0) + + case AUREM, AUDIV: + return S64 | OPDP2(2) + + case AUREMW, AUDIVW: + return S32 | OPDP2(2) + + case AAESE: + return 0x4E<<24 | 2<<20 | 8<<16 | 4<<12 | 2<<10 + + case AAESD: + return 0x4E<<24 | 2<<20 | 8<<16 | 5<<12 | 2<<10 + + case AAESMC: + return 0x4E<<24 | 2<<20 | 8<<16 | 6<<12 | 2<<10 + + case AAESIMC: + return 0x4E<<24 | 2<<20 | 8<<16 | 7<<12 | 2<<10 + + case ASHA1C: + return 0x5E<<24 | 0<<12 + + case ASHA1P: + return 0x5E<<24 | 1<<12 + + case ASHA1M: + return 0x5E<<24 | 2<<12 + + case ASHA1SU0: + return 0x5E<<24 | 3<<12 + + case ASHA256H: + return 0x5E<<24 | 4<<12 + + case ASHA256H2: + return 0x5E<<24 | 5<<12 + + case ASHA256SU1: + return 0x5E<<24 | 6<<12 + + case ASHA1H: + return 0x5E<<24 | 2<<20 | 8<<16 | 0<<12 | 2<<10 + + case ASHA1SU1: + return 0x5E<<24 | 2<<20 | 8<<16 | 1<<12 | 2<<10 + + case ASHA256SU0: + return 0x5E<<24 | 2<<20 | 8<<16 | 2<<12 | 2<<10 + + case ASHA512H: + return 0xCE<<24 | 3<<21 | 8<<12 + + case ASHA512H2: + return 0xCE<<24 | 3<<21 | 8<<12 | 4<<8 + + case ASHA512SU1: + return 0xCE<<24 | 3<<21 | 8<<12 | 8<<8 + + case ASHA512SU0: + return 0xCE<<24 | 3<<22 | 8<<12 + + case AFCVTZSD: + return FPCVTI(1, 0, 1, 3, 0) + + case AFCVTZSDW: + return FPCVTI(0, 0, 1, 3, 0) + + case AFCVTZSS: + return FPCVTI(1, 0, 0, 3, 0) + + case AFCVTZSSW: + return FPCVTI(0, 0, 0, 3, 0) + + case AFCVTZUD: + return FPCVTI(1, 0, 1, 3, 1) + + case AFCVTZUDW: + return FPCVTI(0, 0, 1, 3, 1) + + case AFCVTZUS: + return FPCVTI(1, 0, 0, 3, 1) + + case AFCVTZUSW: + return FPCVTI(0, 0, 0, 3, 1) + + case ASCVTFD: + return FPCVTI(1, 0, 1, 0, 2) + + case ASCVTFS: + return FPCVTI(1, 0, 0, 0, 2) + + case ASCVTFWD: + return FPCVTI(0, 0, 1, 0, 2) + + case ASCVTFWS: + return FPCVTI(0, 0, 0, 0, 2) + + case AUCVTFD: + return FPCVTI(1, 0, 1, 0, 3) + + case AUCVTFS: + return FPCVTI(1, 0, 0, 0, 3) + + case AUCVTFWD: + return FPCVTI(0, 0, 1, 0, 3) + + case AUCVTFWS: + return FPCVTI(0, 0, 0, 0, 3) + + case AFADDS: + return FPOP2S(0, 0, 0, 2) + + case AFADDD: + return FPOP2S(0, 0, 1, 2) + + case AFSUBS: + return FPOP2S(0, 0, 0, 3) + + case AFSUBD: + return FPOP2S(0, 0, 1, 3) + + case AFMADDD: + return FPOP3S(0, 0, 1, 0, 0) + + case AFMADDS: + return FPOP3S(0, 0, 0, 0, 0) + + case AFMSUBD: + return FPOP3S(0, 0, 1, 0, 1) + + case AFMSUBS: + return FPOP3S(0, 0, 0, 0, 1) + + case AFNMADDD: + return FPOP3S(0, 0, 1, 1, 0) + + case AFNMADDS: + return FPOP3S(0, 0, 0, 1, 0) + + case AFNMSUBD: + return FPOP3S(0, 0, 1, 1, 1) + + case AFNMSUBS: + return FPOP3S(0, 0, 0, 1, 1) + + case AFMULS: + return FPOP2S(0, 0, 0, 0) + + case AFMULD: + return FPOP2S(0, 0, 1, 0) + + case AFDIVS: + return FPOP2S(0, 0, 0, 1) + + case AFDIVD: + return FPOP2S(0, 0, 1, 1) + + case AFMAXS: + return FPOP2S(0, 0, 0, 4) + + case AFMINS: + return FPOP2S(0, 0, 0, 5) + + case AFMAXD: + return FPOP2S(0, 0, 1, 4) + + case AFMIND: + return FPOP2S(0, 0, 1, 5) + + case AFMAXNMS: + return FPOP2S(0, 0, 0, 6) + + case AFMAXNMD: + return FPOP2S(0, 0, 1, 6) + + case AFMINNMS: + return FPOP2S(0, 0, 0, 7) + + case AFMINNMD: + return FPOP2S(0, 0, 1, 7) + + case AFNMULS: + return FPOP2S(0, 0, 0, 8) + + case AFNMULD: + return FPOP2S(0, 0, 1, 8) + + case AFCMPS: + return FPCMP(0, 0, 0, 0, 0) + + case AFCMPD: + return FPCMP(0, 0, 1, 0, 0) + + case AFCMPES: + return FPCMP(0, 0, 0, 0, 16) + + case AFCMPED: + return FPCMP(0, 0, 1, 0, 16) + + case AFCCMPS: + return FPCCMP(0, 0, 0, 0) + + case AFCCMPD: + return FPCCMP(0, 0, 1, 0) + + case AFCCMPES: + return FPCCMP(0, 0, 0, 1) + + case AFCCMPED: + return FPCCMP(0, 0, 1, 1) + + case AFCSELS: + return 0x1E<<24 | 0<<22 | 1<<21 | 3<<10 + + case AFCSELD: + return 0x1E<<24 | 1<<22 | 1<<21 | 3<<10 + + case AFMOVS: + return FPOP1S(0, 0, 0, 0) + + case AFABSS: + return FPOP1S(0, 0, 0, 1) + + case AFNEGS: + return FPOP1S(0, 0, 0, 2) + + case AFSQRTS: + return FPOP1S(0, 0, 0, 3) + + case AFCVTSD: + return FPOP1S(0, 0, 0, 5) + + case AFCVTSH: + return FPOP1S(0, 0, 0, 7) + + case AFRINTNS: + return FPOP1S(0, 0, 0, 8) + + case AFRINTPS: + return FPOP1S(0, 0, 0, 9) + + case AFRINTMS: + return FPOP1S(0, 0, 0, 10) + + case AFRINTZS: + return FPOP1S(0, 0, 0, 11) + + case AFRINTAS: + return FPOP1S(0, 0, 0, 12) + + case AFRINTXS: + return FPOP1S(0, 0, 0, 14) + + case AFRINTIS: + return FPOP1S(0, 0, 0, 15) + + case AFMOVD: + return FPOP1S(0, 0, 1, 0) + + case AFABSD: + return FPOP1S(0, 0, 1, 1) + + case AFNEGD: + return FPOP1S(0, 0, 1, 2) + + case AFSQRTD: + return FPOP1S(0, 0, 1, 3) + + case AFCVTDS: + return FPOP1S(0, 0, 1, 4) + + case AFCVTDH: + return FPOP1S(0, 0, 1, 7) + + case AFRINTND: + return FPOP1S(0, 0, 1, 8) + + case AFRINTPD: + return FPOP1S(0, 0, 1, 9) + + case AFRINTMD: + return FPOP1S(0, 0, 1, 10) + + case AFRINTZD: + return FPOP1S(0, 0, 1, 11) + + case AFRINTAD: + return FPOP1S(0, 0, 1, 12) + + case AFRINTXD: + return FPOP1S(0, 0, 1, 14) + + case AFRINTID: + return FPOP1S(0, 0, 1, 15) + + case AFCVTHS: + return FPOP1S(0, 0, 3, 4) + + case AFCVTHD: + return FPOP1S(0, 0, 3, 5) + + case AVADD: + return 7<<25 | 1<<21 | 1<<15 | 1<<10 + + case AVSUB: + return 0x17<<25 | 1<<21 | 1<<15 | 1<<10 + + case AVADDP: + return 7<<25 | 1<<21 | 1<<15 | 15<<10 + + case AVAND: + return 7<<25 | 1<<21 | 7<<10 + + case AVBCAX: + return 0xCE<<24 | 1<<21 + + case AVCMEQ: + return 1<<29 | 0x71<<21 | 0x23<<10 + + case AVCNT: + return 0xE<<24 | 0x10<<17 | 5<<12 | 2<<10 + + case AVZIP1: + return 0xE<<24 | 3<<12 | 2<<10 + + case AVZIP2: + return 0xE<<24 | 1<<14 | 3<<12 | 2<<10 + + case AVEOR: + return 1<<29 | 0x71<<21 | 7<<10 + + case AVEOR3: + return 0xCE << 24 + + case AVORR: + return 7<<25 | 5<<21 | 7<<10 + + case AVREV16: + return 3<<26 | 2<<24 | 1<<21 | 3<<11 + + case AVRAX1: + return 0xCE<<24 | 3<<21 | 1<<15 | 3<<10 + + case AVREV32: + return 11<<26 | 2<<24 | 1<<21 | 1<<11 + + case AVREV64: + return 3<<26 | 2<<24 | 1<<21 | 1<<11 + + case AVMOV: + return 7<<25 | 5<<21 | 7<<10 + + case AVADDV: + return 7<<25 | 3<<20 | 3<<15 | 7<<11 + + case AVUADDLV: + return 1<<29 | 7<<25 | 3<<20 | 7<<11 + + case AVFMLA: + return 7<<25 | 0<<23 | 1<<21 | 3<<14 | 3<<10 + + case AVFMLS: + return 7<<25 | 1<<23 | 1<<21 | 3<<14 | 3<<10 + + case AVPMULL, AVPMULL2: + return 0xE<<24 | 1<<21 | 0x38<<10 + + case AVRBIT: + return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10 + + case AVLD1, AVLD2, AVLD3, AVLD4: + return 3<<26 | 1<<22 + + case AVLD1R, AVLD3R: + return 0xD<<24 | 1<<22 + + case AVLD2R, AVLD4R: + return 0xD<<24 | 3<<21 + + case AVBIF: + return 1<<29 | 7<<25 | 7<<21 | 7<<10 + + case AVBIT: + return 1<<29 | 0x75<<21 | 7<<10 + + case AVBSL: + return 1<<29 | 0x73<<21 | 7<<10 + + case AVCMTST: + return 0xE<<24 | 1<<21 | 0x23<<10 + + case AVUMAX: + return 1<<29 | 7<<25 | 1<<21 | 0x19<<10 + + case AVUMIN: + return 1<<29 | 7<<25 | 1<<21 | 0x1b<<10 + + case AVUZP1: + return 7<<25 | 3<<11 + + case AVUZP2: + return 7<<25 | 1<<14 | 3<<11 + + case AVUADDW, AVUADDW2: + return 0x17<<25 | 1<<21 | 1<<12 + + case AVTRN1: + return 7<<25 | 5<<11 + + case AVTRN2: + return 7<<25 | 1<<14 | 5<<11 + } + + c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) + return 0 +} + +/* + * imm -> Rd + * imm op Rn -> Rd + */ +func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { + switch a { + /* op $addcon, Rn, Rd */ + case AMOVD, AADD: + return S64 | 0<<30 | 0<<29 | 0x11<<24 + + case ACMN, AADDS: + return S64 | 0<<30 | 1<<29 | 0x11<<24 + + case AMOVW, AADDW: + return S32 | 0<<30 | 0<<29 | 0x11<<24 + + case ACMNW, AADDSW: + return S32 | 0<<30 | 1<<29 | 0x11<<24 + + case ASUB: + return S64 | 1<<30 | 0<<29 | 0x11<<24 + + case ACMP, ASUBS: + return S64 | 1<<30 | 1<<29 | 0x11<<24 + + case ASUBW: + return S32 | 1<<30 | 0<<29 | 0x11<<24 + + case ACMPW, ASUBSW: + return S32 | 1<<30 | 1<<29 | 0x11<<24 + + /* op $imm(SB), Rd; op label, Rd */ + case AADR: + return 0<<31 | 0x10<<24 + + case AADRP: + return 1<<31 | 0x10<<24 + + /* op $bimm, Rn, Rd */ + case AAND, ABIC: + return S64 | 0<<29 | 0x24<<23 + + case AANDW, ABICW: + return S32 | 0<<29 | 0x24<<23 | 0<<22 + + case AORR, AORN: + return S64 | 1<<29 | 0x24<<23 + + case AORRW, AORNW: + return S32 | 1<<29 | 0x24<<23 | 0<<22 + + case AEOR, AEON: + return S64 | 2<<29 | 0x24<<23 + + case AEORW, AEONW: + return S32 | 2<<29 | 0x24<<23 | 0<<22 + + case AANDS, ABICS, ATST: + return S64 | 3<<29 | 0x24<<23 + + case AANDSW, ABICSW, ATSTW: + return S32 | 3<<29 | 0x24<<23 | 0<<22 + + case AASR: + return S64 | 0<<29 | 0x26<<23 /* alias of SBFM */ + + case AASRW: + return S32 | 0<<29 | 0x26<<23 | 0<<22 + + /* op $width, $lsb, Rn, Rd */ + case ABFI: + return S64 | 2<<29 | 0x26<<23 | 1<<22 + /* alias of BFM */ + + case ABFIW: + return S32 | 2<<29 | 0x26<<23 | 0<<22 + + /* op $imms, $immr, Rn, Rd */ + case ABFM: + return S64 | 1<<29 | 0x26<<23 | 1<<22 + + case ABFMW: + return S32 | 1<<29 | 0x26<<23 | 0<<22 + + case ASBFM: + return S64 | 0<<29 | 0x26<<23 | 1<<22 + + case ASBFMW: + return S32 | 0<<29 | 0x26<<23 | 0<<22 + + case AUBFM: + return S64 | 2<<29 | 0x26<<23 | 1<<22 + + case AUBFMW: + return S32 | 2<<29 | 0x26<<23 | 0<<22 + + case ABFXIL: + return S64 | 1<<29 | 0x26<<23 | 1<<22 /* alias of BFM */ + + case ABFXILW: + return S32 | 1<<29 | 0x26<<23 | 0<<22 + + case AEXTR: + return S64 | 0<<29 | 0x27<<23 | 1<<22 | 0<<21 + + case AEXTRW: + return S32 | 0<<29 | 0x27<<23 | 0<<22 | 0<<21 + + case ACBNZ: + return S64 | 0x1A<<25 | 1<<24 + + case ACBNZW: + return S32 | 0x1A<<25 | 1<<24 + + case ACBZ: + return S64 | 0x1A<<25 | 0<<24 + + case ACBZW: + return S32 | 0x1A<<25 | 0<<24 + + case ACCMN: + return S64 | 0<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4 /* imm5<<16 | cond<<12 | nzcv<<0 */ + + case ACCMNW: + return S32 | 0<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4 + + case ACCMP: + return S64 | 1<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4 /* imm5<<16 | cond<<12 | nzcv<<0 */ + + case ACCMPW: + return S32 | 1<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4 + + case AMOVK: + return S64 | 3<<29 | 0x25<<23 + + case AMOVKW: + return S32 | 3<<29 | 0x25<<23 + + case AMOVN: + return S64 | 0<<29 | 0x25<<23 + + case AMOVNW: + return S32 | 0<<29 | 0x25<<23 + + case AMOVZ: + return S64 | 2<<29 | 0x25<<23 + + case AMOVZW: + return S32 | 2<<29 | 0x25<<23 + + case AMSR: + return SYSOP(0, 0, 0, 4, 0, 0, 0x1F) /* MSR (immediate) */ + + case AAT, + ADC, + AIC, + ATLBI, + ASYS: + return SYSOP(0, 1, 0, 0, 0, 0, 0) + + case ASYSL: + return SYSOP(1, 1, 0, 0, 0, 0, 0) + + case ATBZ: + return 0x36 << 24 + + case ATBNZ: + return 0x37 << 24 + + case ADSB: + return SYSOP(0, 0, 3, 3, 0, 4, 0x1F) + + case ADMB: + return SYSOP(0, 0, 3, 3, 0, 5, 0x1F) + + case AISB: + return SYSOP(0, 0, 3, 3, 0, 6, 0x1F) + + case AHINT: + return SYSOP(0, 0, 3, 2, 0, 0, 0x1F) + + case AVEXT: + return 0x2E<<24 | 0<<23 | 0<<21 | 0<<15 + + case AVUSHR: + return 0x5E<<23 | 1<<10 + + case AVSHL: + return 0x1E<<23 | 21<<10 + + case AVSRI: + return 0x5E<<23 | 17<<10 + + case AVSLI: + return 0x5E<<23 | 21<<10 + + case AVUSHLL, AVUXTL: + return 1<<29 | 15<<24 | 0x29<<10 + + case AVUSHLL2, AVUXTL2: + return 3<<29 | 15<<24 | 0x29<<10 + + case AVXAR: + return 0xCE<<24 | 1<<23 + + case AVUSRA: + return 1<<29 | 15<<24 | 5<<10 + + case APRFM: + return 0xf9<<24 | 2<<22 + } + + c.ctxt.Diag("%v: bad irr %v", p, a) + return 0 +} + +func (c *ctxt7) opbit(p *obj.Prog, a obj.As) uint32 { + switch a { + case ACLS: + return S64 | OPBIT(5) + + case ACLSW: + return S32 | OPBIT(5) + + case ACLZ: + return S64 | OPBIT(4) + + case ACLZW: + return S32 | OPBIT(4) + + case ARBIT: + return S64 | OPBIT(0) + + case ARBITW: + return S32 | OPBIT(0) + + case AREV: + return S64 | OPBIT(3) + + case AREVW: + return S32 | OPBIT(2) + + case AREV16: + return S64 | OPBIT(1) + + case AREV16W: + return S32 | OPBIT(1) + + case AREV32: + return S64 | OPBIT(2) + + default: + c.ctxt.Diag("bad bit op\n%v", p) + return 0 + } +} + +/* + * add/subtract sign or zero-extended register + */ +func (c *ctxt7) opxrrr(p *obj.Prog, a obj.As, extend bool) uint32 { + extension := uint32(0) + if !extend { + if isADDop(a) { + extension = LSL0_64 + } + if isADDWop(a) { + extension = LSL0_32 + } + } + + switch a { + case AADD: + return S64 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case AADDW: + return S32 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ACMN, AADDS: + return S64 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ACMNW, AADDSW: + return S32 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ASUB: + return S64 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ASUBW: + return S32 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ACMP, ASUBS: + return S64 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + + case ACMPW, ASUBSW: + return S32 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | extension + } + + c.ctxt.Diag("bad opxrrr %v\n%v", a, p) + return 0 +} + +func (c *ctxt7) opimm(p *obj.Prog, a obj.As) uint32 { + switch a { + case ASVC: + return 0xD4<<24 | 0<<21 | 1 /* imm16<<5 */ + + case AHVC: + return 0xD4<<24 | 0<<21 | 2 + + case ASMC: + return 0xD4<<24 | 0<<21 | 3 + + case ABRK: + return 0xD4<<24 | 1<<21 | 0 + + case AHLT: + return 0xD4<<24 | 2<<21 | 0 + + case ADCPS1: + return 0xD4<<24 | 5<<21 | 1 + + case ADCPS2: + return 0xD4<<24 | 5<<21 | 2 + + case ADCPS3: + return 0xD4<<24 | 5<<21 | 3 + + case ACLREX: + return SYSOP(0, 0, 3, 3, 0, 2, 0x1F) + } + + c.ctxt.Diag("%v: bad imm %v", p, a) + return 0 +} + +func (c *ctxt7) brdist(p *obj.Prog, preshift int, flen int, shift int) int64 { + v := int64(0) + t := int64(0) + var q *obj.Prog + if p.To.Type == obj.TYPE_BRANCH { + q = p.To.Target() + } else if p.From.Type == obj.TYPE_BRANCH { // adr, adrp + q = p.From.Target() + } + if q == nil { + // TODO: don't use brdist for this case, as it isn't a branch. + // (Calls from omovlit, and maybe adr/adrp opcodes as well.) + q = p.Pool + } + if q != nil { + v = (q.Pc >> uint(preshift)) - (c.pc >> uint(preshift)) + if (v & ((1 << uint(shift)) - 1)) != 0 { + c.ctxt.Diag("misaligned label\n%v", p) + } + v >>= uint(shift) + t = int64(1) << uint(flen-1) + if v < -t || v >= t { + c.ctxt.Diag("branch too far %#x vs %#x [%p]\n%v\n%v", v, t, c.blitrl, p, q) + panic("branch too far") + } + } + + return v & ((t << 1) - 1) +} + +/* + * pc-relative branches + */ +func (c *ctxt7) opbra(p *obj.Prog, a obj.As) uint32 { + switch a { + case ABEQ: + return OPBcc(0x0) + + case ABNE: + return OPBcc(0x1) + + case ABCS: + return OPBcc(0x2) + + case ABHS: + return OPBcc(0x2) + + case ABCC: + return OPBcc(0x3) + + case ABLO: + return OPBcc(0x3) + + case ABMI: + return OPBcc(0x4) + + case ABPL: + return OPBcc(0x5) + + case ABVS: + return OPBcc(0x6) + + case ABVC: + return OPBcc(0x7) + + case ABHI: + return OPBcc(0x8) + + case ABLS: + return OPBcc(0x9) + + case ABGE: + return OPBcc(0xa) + + case ABLT: + return OPBcc(0xb) + + case ABGT: + return OPBcc(0xc) + + case ABLE: + return OPBcc(0xd) /* imm19<<5 | cond */ + + case AB: + return 0<<31 | 5<<26 /* imm26 */ + + case obj.ADUFFZERO, obj.ADUFFCOPY, ABL: + return 1<<31 | 5<<26 + } + + c.ctxt.Diag("%v: bad bra %v", p, a) + return 0 +} + +func (c *ctxt7) opbrr(p *obj.Prog, a obj.As) uint32 { + switch a { + case ABL: + return OPBLR(1) /* BLR */ + + case AB: + return OPBLR(0) /* BR */ + + case obj.ARET: + return OPBLR(2) /* RET */ + } + + c.ctxt.Diag("%v: bad brr %v", p, a) + return 0 +} + +func (c *ctxt7) op0(p *obj.Prog, a obj.As) uint32 { + switch a { + case ADRPS: + return 0x6B<<25 | 5<<21 | 0x1F<<16 | 0x1F<<5 + + case AERET: + return 0x6B<<25 | 4<<21 | 0x1F<<16 | 0<<10 | 0x1F<<5 + + case ANOOP: + return SYSHINT(0) + + case AYIELD: + return SYSHINT(1) + + case AWFE: + return SYSHINT(2) + + case AWFI: + return SYSHINT(3) + + case ASEV: + return SYSHINT(4) + + case ASEVL: + return SYSHINT(5) + } + + c.ctxt.Diag("%v: bad op0 %v", p, a) + return 0 +} + +/* + * register offset + */ +func (c *ctxt7) opload(p *obj.Prog, a obj.As) uint32 { + switch a { + case ALDAR: + return LDSTX(3, 1, 1, 0, 1) | 0x1F<<10 + + case ALDARW: + return LDSTX(2, 1, 1, 0, 1) | 0x1F<<10 + + case ALDARB: + return LDSTX(0, 1, 1, 0, 1) | 0x1F<<10 + + case ALDARH: + return LDSTX(1, 1, 1, 0, 1) | 0x1F<<10 + + case ALDAXP: + return LDSTX(3, 0, 1, 1, 1) + + case ALDAXPW: + return LDSTX(2, 0, 1, 1, 1) + + case ALDAXR: + return LDSTX(3, 0, 1, 0, 1) | 0x1F<<10 + + case ALDAXRW: + return LDSTX(2, 0, 1, 0, 1) | 0x1F<<10 + + case ALDAXRB: + return LDSTX(0, 0, 1, 0, 1) | 0x1F<<10 + + case ALDAXRH: + return LDSTX(1, 0, 1, 0, 1) | 0x1F<<10 + + case ALDXR: + return LDSTX(3, 0, 1, 0, 0) | 0x1F<<10 + + case ALDXRB: + return LDSTX(0, 0, 1, 0, 0) | 0x1F<<10 + + case ALDXRH: + return LDSTX(1, 0, 1, 0, 0) | 0x1F<<10 + + case ALDXRW: + return LDSTX(2, 0, 1, 0, 0) | 0x1F<<10 + + case ALDXP: + return LDSTX(3, 0, 1, 1, 0) + + case ALDXPW: + return LDSTX(2, 0, 1, 1, 0) + + case AMOVNP: + return S64 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22 + + case AMOVNPW: + return S32 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22 + } + + c.ctxt.Diag("bad opload %v\n%v", a, p) + return 0 +} + +func (c *ctxt7) opstore(p *obj.Prog, a obj.As) uint32 { + switch a { + case ASTLR: + return LDSTX(3, 1, 0, 0, 1) | 0x1F<<10 + + case ASTLRB: + return LDSTX(0, 1, 0, 0, 1) | 0x1F<<10 + + case ASTLRH: + return LDSTX(1, 1, 0, 0, 1) | 0x1F<<10 + + case ASTLP: + return LDSTX(3, 0, 0, 1, 1) + + case ASTLPW: + return LDSTX(2, 0, 0, 1, 1) + + case ASTLRW: + return LDSTX(2, 1, 0, 0, 1) | 0x1F<<10 + + case ASTLXP: + return LDSTX(3, 0, 0, 1, 1) + + case ASTLXPW: + return LDSTX(2, 0, 0, 1, 1) + + case ASTLXR: + return LDSTX(3, 0, 0, 0, 1) | 0x1F<<10 + + case ASTLXRB: + return LDSTX(0, 0, 0, 0, 1) | 0x1F<<10 + + case ASTLXRH: + return LDSTX(1, 0, 0, 0, 1) | 0x1F<<10 + + case ASTLXRW: + return LDSTX(2, 0, 0, 0, 1) | 0x1F<<10 + + case ASTXR: + return LDSTX(3, 0, 0, 0, 0) | 0x1F<<10 + + case ASTXRB: + return LDSTX(0, 0, 0, 0, 0) | 0x1F<<10 + + case ASTXRH: + return LDSTX(1, 0, 0, 0, 0) | 0x1F<<10 + + case ASTXP: + return LDSTX(3, 0, 0, 1, 0) + + case ASTXPW: + return LDSTX(2, 0, 0, 1, 0) + + case ASTXRW: + return LDSTX(2, 0, 0, 0, 0) | 0x1F<<10 + + case AMOVNP: + return S64 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22 + + case AMOVNPW: + return S32 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22 + } + + c.ctxt.Diag("bad opstore %v\n%v", a, p) + return 0 +} + +/* + * load/store register (scaled 12-bit unsigned immediate) C3.3.13 + * these produce 64-bit values (when there's an option) + */ +func (c *ctxt7) olsr12u(p *obj.Prog, o int32, v int32, b int, r int) uint32 { + if v < 0 || v >= (1<<12) { + c.ctxt.Diag("offset out of range: %d\n%v", v, p) + } + o |= (v & 0xFFF) << 10 + o |= int32(b&31) << 5 + o |= int32(r & 31) + o |= 1 << 24 + return uint32(o) +} + +/* + * load/store register (unscaled 9-bit signed immediate) C3.3.12 + */ +func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { + if v < -256 || v > 255 { + c.ctxt.Diag("offset out of range: %d\n%v", v, p) + } + o |= (v & 0x1FF) << 12 + o |= int32(b&31) << 5 + o |= int32(r & 31) + return uint32(o) +} + +// store(immediate) +// scaled 12-bit unsigned immediate offset. +// unscaled 9-bit signed immediate offset. +// pre/post-indexed store. +// and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. +func (c *ctxt7) opstr(p *obj.Prog, a obj.As) uint32 { + enc := c.opldr(p, a) + switch p.As { + case AFMOVQ: + enc = enc &^ (1 << 22) + default: + enc = LD2STR(enc) + } + return enc +} + +// load(immediate) +// scaled 12-bit unsigned immediate offset. +// unscaled 9-bit signed immediate offset. +// pre/post-indexed load. +// and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. +func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 { + switch a { + case AMOVD: + return LDSTR(3, 0, 1) /* simm9<<12 | Rn<<5 | Rt */ + + case AMOVW: + return LDSTR(2, 0, 2) + + case AMOVWU: + return LDSTR(2, 0, 1) + + case AMOVH: + return LDSTR(1, 0, 2) + + case AMOVHU: + return LDSTR(1, 0, 1) + + case AMOVB: + return LDSTR(0, 0, 2) + + case AMOVBU: + return LDSTR(0, 0, 1) + + case AFMOVS: + return LDSTR(2, 1, 1) + + case AFMOVD: + return LDSTR(3, 1, 1) + + case AFMOVQ: + return LDSTR(0, 1, 3) + } + + c.ctxt.Diag("bad opldr %v\n%v", a, p) + return 0 +} + +// olsxrr attaches register operands to a load/store opcode supplied in o. +// The result either encodes a load of r from (r1+r2) or a store of r to (r1+r2). +func (c *ctxt7) olsxrr(p *obj.Prog, o int32, r int, r1 int, r2 int) uint32 { + o |= int32(r1&31) << 5 + o |= int32(r2&31) << 16 + o |= int32(r & 31) + return uint32(o) +} + +// opldrr returns the ARM64 opcode encoding corresponding to the obj.As opcode +// for load instruction with register offset. +// The offset register can be (Rn)(Rm.UXTW<<2) or (Rn)(Rm<<2) or (Rn)(Rm). +func (c *ctxt7) opldrr(p *obj.Prog, a obj.As, extension bool) uint32 { + OptionS := uint32(0x1a) + if extension { + OptionS = uint32(0) // option value and S value have been encoded into p.From.Offset. + } + switch a { + case AMOVD: + return OptionS<<10 | 0x3<<21 | 0x1f<<27 + case AMOVW: + return OptionS<<10 | 0x5<<21 | 0x17<<27 + case AMOVWU: + return OptionS<<10 | 0x3<<21 | 0x17<<27 + case AMOVH: + return OptionS<<10 | 0x5<<21 | 0x0f<<27 + case AMOVHU: + return OptionS<<10 | 0x3<<21 | 0x0f<<27 + case AMOVB: + return OptionS<<10 | 0x5<<21 | 0x07<<27 + case AMOVBU: + return OptionS<<10 | 0x3<<21 | 0x07<<27 + case AFMOVS: + return OptionS<<10 | 0x3<<21 | 0x17<<27 | 1<<26 + case AFMOVD: + return OptionS<<10 | 0x3<<21 | 0x1f<<27 | 1<<26 + } + c.ctxt.Diag("bad opldrr %v\n%v", a, p) + return 0 +} + +// opstrr returns the ARM64 opcode encoding corresponding to the obj.As opcode +// for store instruction with register offset. +// The offset register can be (Rn)(Rm.UXTW<<2) or (Rn)(Rm<<2) or (Rn)(Rm). +func (c *ctxt7) opstrr(p *obj.Prog, a obj.As, extension bool) uint32 { + OptionS := uint32(0x1a) + if extension { + OptionS = uint32(0) // option value and S value have been encoded into p.To.Offset. + } + switch a { + case AMOVD: + return OptionS<<10 | 0x1<<21 | 0x1f<<27 + case AMOVW, AMOVWU: + return OptionS<<10 | 0x1<<21 | 0x17<<27 + case AMOVH, AMOVHU: + return OptionS<<10 | 0x1<<21 | 0x0f<<27 + case AMOVB, AMOVBU: + return OptionS<<10 | 0x1<<21 | 0x07<<27 + case AFMOVS: + return OptionS<<10 | 0x1<<21 | 0x17<<27 | 1<<26 + case AFMOVD: + return OptionS<<10 | 0x1<<21 | 0x1f<<27 | 1<<26 + } + c.ctxt.Diag("bad opstrr %v\n%v", a, p) + return 0 +} + +func (c *ctxt7) oaddi(p *obj.Prog, o1 int32, v int32, r int, rt int) uint32 { + if (v & 0xFFF000) != 0 { + if v&0xFFF != 0 { + c.ctxt.Diag("%v misuses oaddi", p) + } + v >>= 12 + o1 |= 1 << 22 + } + + o1 |= ((v & 0xFFF) << 10) | (int32(r&31) << 5) | int32(rt&31) + return uint32(o1) +} + +/* + * load a literal value into dr + */ +func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { + var o1 int32 + if p.Pool == nil { /* not in literal pool */ + c.aclass(a) + c.ctxt.Logf("omovlit add %d (%#x)\n", c.instoffset, uint64(c.instoffset)) + + /* TODO: could be clever, and use general constant builder */ + o1 = int32(c.opirr(p, AADD)) + + v := int32(c.instoffset) + if v != 0 && (v&0xFFF) == 0 { + v >>= 12 + o1 |= 1 << 22 /* shift, by 12 */ + } + + o1 |= ((v & 0xFFF) << 10) | (REGZERO & 31 << 5) | int32(dr&31) + } else { + fp, w := 0, 0 + switch as { + case AFMOVS, AVMOVS: + fp = 1 + w = 0 /* 32-bit SIMD/FP */ + + case AFMOVD, AVMOVD: + fp = 1 + w = 1 /* 64-bit SIMD/FP */ + + case AVMOVQ: + fp = 1 + w = 2 /* 128-bit SIMD/FP */ + + case AMOVD: + if p.Pool.As == ADWORD { + w = 1 /* 64-bit */ + } else if p.Pool.To.Offset < 0 { + w = 2 /* 32-bit, sign-extended to 64-bit */ + } else if p.Pool.To.Offset >= 0 { + w = 0 /* 32-bit, zero-extended to 64-bit */ + } else { + c.ctxt.Diag("invalid operand %v in %v", a, p) + } + + case AMOVBU, AMOVHU, AMOVWU: + w = 0 /* 32-bit, zero-extended to 64-bit */ + + case AMOVB, AMOVH, AMOVW: + w = 2 /* 32-bit, sign-extended to 64-bit */ + + default: + c.ctxt.Diag("invalid operation %v in %v", as, p) + } + + v := int32(c.brdist(p, 0, 19, 2)) + o1 = (int32(w) << 30) | (int32(fp) << 26) | (3 << 27) + o1 |= (v & 0x7FFFF) << 5 + o1 |= int32(dr & 31) + } + + return uint32(o1) +} + +// load a constant (MOVCON or BITCON) in a into rt +func (c *ctxt7) omovconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int) (o1 uint32) { + if cls := oclass(a); (cls == C_BITCON || cls == C_ABCON || cls == C_ABCON0) && rt != REGZERO { + // or $bitcon, REGZERO, rt. rt can't be ZR. + mode := 64 + var as1 obj.As + switch as { + case AMOVW: + as1 = AORRW + mode = 32 + case AMOVD: + as1 = AORR + } + o1 = c.opirr(p, as1) + o1 |= bitconEncode(uint64(a.Offset), mode) | uint32(REGZERO&31)<<5 | uint32(rt&31) + return o1 + } + + if as == AMOVW { + d := uint32(a.Offset) + s := movcon(int64(d)) + if s < 0 || 16*s >= 32 { + d = ^d + s = movcon(int64(d)) + if s < 0 || 16*s >= 32 { + c.ctxt.Diag("impossible 32-bit move wide: %#x\n%v", uint32(a.Offset), p) + } + o1 = c.opirr(p, AMOVNW) + } else { + o1 = c.opirr(p, AMOVZW) + } + o1 |= MOVCONST(int64(d), s, rt) + } + if as == AMOVD { + d := a.Offset + s := movcon(d) + if s < 0 || 16*s >= 64 { + d = ^d + s = movcon(d) + if s < 0 || 16*s >= 64 { + c.ctxt.Diag("impossible 64-bit move wide: %#x\n%v", uint64(a.Offset), p) + } + o1 = c.opirr(p, AMOVN) + } else { + o1 = c.opirr(p, AMOVZ) + } + o1 |= MOVCONST(d, s, rt) + } + return o1 +} + +// load a 32-bit/64-bit large constant (LCON or VCON) in a.Offset into rt +// put the instruction sequence in os and return the number of instructions. +func (c *ctxt7) omovlconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int, os []uint32) (num uint8) { + switch as { + case AMOVW: + d := uint32(a.Offset) + // use MOVZW and MOVKW to load a constant to rt + os[0] = c.opirr(p, AMOVZW) + os[0] |= MOVCONST(int64(d), 0, rt) + os[1] = c.opirr(p, AMOVKW) + os[1] |= MOVCONST(int64(d), 1, rt) + return 2 + + case AMOVD: + d := a.Offset + dn := ^d + var immh [4]uint64 + var i int + zeroCount := int(0) + negCount := int(0) + for i = 0; i < 4; i++ { + immh[i] = uint64((d >> uint(i*16)) & 0xffff) + if immh[i] == 0 { + zeroCount++ + } else if immh[i] == 0xffff { + negCount++ + } + } + + if zeroCount == 4 || negCount == 4 { + c.ctxt.Diag("the immediate should be MOVCON: %v", p) + } + switch { + case zeroCount == 3: + // one MOVZ + for i = 0; i < 4; i++ { + if immh[i] != 0 { + os[0] = c.opirr(p, AMOVZ) + os[0] |= MOVCONST(d, i, rt) + break + } + } + return 1 + + case negCount == 3: + // one MOVN + for i = 0; i < 4; i++ { + if immh[i] != 0xffff { + os[0] = c.opirr(p, AMOVN) + os[0] |= MOVCONST(dn, i, rt) + break + } + } + return 1 + + case zeroCount == 2: + // one MOVZ and one MOVK + for i = 0; i < 4; i++ { + if immh[i] != 0 { + os[0] = c.opirr(p, AMOVZ) + os[0] |= MOVCONST(d, i, rt) + i++ + break + } + } + for ; i < 4; i++ { + if immh[i] != 0 { + os[1] = c.opirr(p, AMOVK) + os[1] |= MOVCONST(d, i, rt) + } + } + return 2 + + case negCount == 2: + // one MOVN and one MOVK + for i = 0; i < 4; i++ { + if immh[i] != 0xffff { + os[0] = c.opirr(p, AMOVN) + os[0] |= MOVCONST(dn, i, rt) + i++ + break + } + } + for ; i < 4; i++ { + if immh[i] != 0xffff { + os[1] = c.opirr(p, AMOVK) + os[1] |= MOVCONST(d, i, rt) + } + } + return 2 + + case zeroCount == 1: + // one MOVZ and two MOVKs + for i = 0; i < 4; i++ { + if immh[i] != 0 { + os[0] = c.opirr(p, AMOVZ) + os[0] |= MOVCONST(d, i, rt) + i++ + break + } + } + + for j := 1; i < 4; i++ { + if immh[i] != 0 { + os[j] = c.opirr(p, AMOVK) + os[j] |= MOVCONST(d, i, rt) + j++ + } + } + return 3 + + case negCount == 1: + // one MOVN and two MOVKs + for i = 0; i < 4; i++ { + if immh[i] != 0xffff { + os[0] = c.opirr(p, AMOVN) + os[0] |= MOVCONST(dn, i, rt) + i++ + break + } + } + + for j := 1; i < 4; i++ { + if immh[i] != 0xffff { + os[j] = c.opirr(p, AMOVK) + os[j] |= MOVCONST(d, i, rt) + j++ + } + } + return 3 + + default: + // one MOVZ and 3 MOVKs + os[0] = c.opirr(p, AMOVZ) + os[0] |= MOVCONST(d, 0, rt) + for i = 1; i < 4; i++ { + os[i] = c.opirr(p, AMOVK) + os[i] |= MOVCONST(d, i, rt) + } + return 4 + } + default: + return 0 + } +} + +func (c *ctxt7) opbfm(p *obj.Prog, a obj.As, r int, s int, rf int, rt int) uint32 { + var b uint32 + o := c.opirr(p, a) + if (o & (1 << 31)) == 0 { + b = 32 + } else { + b = 64 + } + if r < 0 || uint32(r) >= b { + c.ctxt.Diag("illegal bit number\n%v", p) + } + o |= (uint32(r) & 0x3F) << 16 + if s < 0 || uint32(s) >= b { + c.ctxt.Diag("illegal bit number\n%v", p) + } + o |= (uint32(s) & 0x3F) << 10 + o |= (uint32(rf&31) << 5) | uint32(rt&31) + return o +} + +func (c *ctxt7) opextr(p *obj.Prog, a obj.As, v int32, rn int, rm int, rt int) uint32 { + var b uint32 + o := c.opirr(p, a) + if (o & (1 << 31)) != 0 { + b = 63 + } else { + b = 31 + } + if v < 0 || uint32(v) > b { + c.ctxt.Diag("illegal bit number\n%v", p) + } + o |= uint32(v) << 10 + o |= uint32(rn&31) << 5 + o |= uint32(rm&31) << 16 + o |= uint32(rt & 31) + return o +} + +/* generate instruction encoding for ldp and stp series */ +func (c *ctxt7) opldpstp(p *obj.Prog, o *Optab, vo int32, rbase, rl, rh, ldp uint32) uint32 { + wback := false + if o.scond == C_XPOST || o.scond == C_XPRE { + wback = true + } + switch p.As { + case ALDP, ALDPW, ALDPSW: + c.checkUnpredictable(p, true, wback, p.From.Reg, p.To.Reg, int16(p.To.Offset)) + case ASTP, ASTPW: + if wback { + c.checkUnpredictable(p, false, true, p.To.Reg, p.From.Reg, int16(p.From.Offset)) + } + case AFLDPD, AFLDPQ, AFLDPS: + c.checkUnpredictable(p, true, false, p.From.Reg, p.To.Reg, int16(p.To.Offset)) + } + var ret uint32 + // check offset + switch p.As { + case AFLDPQ, AFSTPQ: + if vo < -1024 || vo > 1008 || vo%16 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 16 + ret = 2<<30 | 1<<26 + case AFLDPD, AFSTPD: + if vo < -512 || vo > 504 || vo%8 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 8 + ret = 1<<30 | 1<<26 + case AFLDPS, AFSTPS: + if vo < -256 || vo > 252 || vo%4 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 4 + ret = 1 << 26 + case ALDP, ASTP: + if vo < -512 || vo > 504 || vo%8 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 8 + ret = 2 << 30 + case ALDPW, ASTPW: + if vo < -256 || vo > 252 || vo%4 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 4 + ret = 0 + case ALDPSW: + if vo < -256 || vo > 252 || vo%4 != 0 { + c.ctxt.Diag("invalid offset %v\n", p) + } + vo /= 4 + ret = 1 << 30 + default: + c.ctxt.Diag("invalid instruction %v\n", p) + } + // check register pair + switch p.As { + case AFLDPQ, AFLDPD, AFLDPS, AFSTPQ, AFSTPD, AFSTPS: + if rl < REG_F0 || REG_F31 < rl || rh < REG_F0 || REG_F31 < rh { + c.ctxt.Diag("invalid register pair %v\n", p) + } + case ALDP, ALDPW, ALDPSW: + if rl < REG_R0 || REG_R30 < rl || rh < REG_R0 || REG_R30 < rh { + c.ctxt.Diag("invalid register pair %v\n", p) + } + case ASTP, ASTPW: + if rl < REG_R0 || REG_R31 < rl || rh < REG_R0 || REG_R31 < rh { + c.ctxt.Diag("invalid register pair %v\n", p) + } + } + // other conditional flag bits + switch o.scond { + case C_XPOST: + ret |= 1 << 23 + case C_XPRE: + ret |= 3 << 23 + default: + ret |= 2 << 23 + } + ret |= 5<<27 | (ldp&1)<<22 | uint32(vo&0x7f)<<15 | (rh&31)<<10 | (rbase&31)<<5 | (rl & 31) + return ret +} + +func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 { + if p.As == AVLD1 || p.As == AVST1 { + return o1 + } + + o1 &^= 0xf000 // mask out "opcode" field (bit 12-15) + switch p.As { + case AVLD1R, AVLD2R: + o1 |= 0xC << 12 + case AVLD3R, AVLD4R: + o1 |= 0xE << 12 + case AVLD2, AVST2: + o1 |= 8 << 12 + case AVLD3, AVST3: + o1 |= 4 << 12 + case AVLD4, AVST4: + default: + c.ctxt.Diag("unsupported instruction:%v\n", p.As) + } + return o1 +} + +/* + * size in log2(bytes) + */ +func movesize(a obj.As) int { + switch a { + case AFMOVQ: + return 4 + + case AMOVD, AFMOVD: + return 3 + + case AMOVW, AMOVWU, AFMOVS: + return 2 + + case AMOVH, AMOVHU: + return 1 + + case AMOVB, AMOVBU: + return 0 + + default: + return -1 + } +} + +// rm is the Rm register value, o is the extension, amount is the left shift value. +func roff(rm int16, o uint32, amount int16) uint32 { + return uint32(rm&31)<<16 | o<<13 | uint32(amount)<<10 +} + +// encRegShiftOrExt returns the encoding of shifted/extended register, Rx<> 5) & 7 + rm = r & 31 + switch { + case REG_UXTB <= r && r < REG_UXTH: + return roff(rm, 0, num) + case REG_UXTH <= r && r < REG_UXTW: + return roff(rm, 1, num) + case REG_UXTW <= r && r < REG_UXTX: + if a.Type == obj.TYPE_MEM { + if num == 0 { + return roff(rm, 2, 2) + } else { + return roff(rm, 2, 6) + } + } else { + return roff(rm, 2, num) + } + case REG_UXTX <= r && r < REG_SXTB: + return roff(rm, 3, num) + case REG_SXTB <= r && r < REG_SXTH: + return roff(rm, 4, num) + case REG_SXTH <= r && r < REG_SXTW: + return roff(rm, 5, num) + case REG_SXTW <= r && r < REG_SXTX: + if a.Type == obj.TYPE_MEM { + if num == 0 { + return roff(rm, 6, 2) + } else { + return roff(rm, 6, 6) + } + } else { + return roff(rm, 6, num) + } + case REG_SXTX <= r && r < REG_SPECIAL: + if a.Type == obj.TYPE_MEM { + if num == 0 { + return roff(rm, 7, 2) + } else { + return roff(rm, 7, 6) + } + } else { + return roff(rm, 7, num) + } + case REG_LSL <= r && r < REG_ARNG: + if a.Type == obj.TYPE_MEM { // (R1)(R2<<1) + return roff(rm, 3, 6) + } else if isADDWop(p.As) { + return roff(rm, 2, num) + } + return roff(rm, 3, num) + default: + c.ctxt.Diag("unsupported register extension type.") + } + + return 0 +} + +// pack returns the encoding of the "Q" field and two arrangement specifiers. +func pack(q uint32, arngA, arngB uint8) uint32 { + return uint32(q)<<16 | uint32(arngA)<<8 | uint32(arngB) +} diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.go b/src/cmd/internal/obj/arm64/asm_arm64_test.go new file mode 100644 index 0000000..c52717d --- /dev/null +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.go @@ -0,0 +1,171 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package arm64 + +import ( + "bytes" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "regexp" + "testing" +) + +// TestLarge generates a very large file to verify that large +// program builds successfully, in particular, too-far +// conditional branches are fixed, and also verify that the +// instruction's pc can be correctly aligned even when branches +// need to be fixed. +func TestLarge(t *testing.T) { + if testing.Short() { + t.Skip("Skip in short mode") + } + testenv.MustHaveGoBuild(t) + + dir, err := os.MkdirTemp("", "testlarge") + if err != nil { + t.Fatalf("could not create directory: %v", err) + } + defer os.RemoveAll(dir) + + // generate a very large function + buf := bytes.NewBuffer(make([]byte, 0, 7000000)) + gen(buf) + + tmpfile := filepath.Join(dir, "x.s") + err = os.WriteFile(tmpfile, buf.Bytes(), 0644) + if err != nil { + t.Fatalf("can't write output: %v\n", err) + } + + pattern := `0x0080\s00128\s\(.*\)\tMOVD\t\$3,\sR3` + + // assemble generated file + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Assemble failed: %v, output: %s", err, out) + } + matched, err := regexp.MatchString(pattern, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The alignment is not correct: %t, output:%s\n", matched, out) + } + + // build generated file + cmd = testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux") + out, err = cmd.CombinedOutput() + if err != nil { + t.Errorf("Build failed: %v, output: %s", err, out) + } +} + +// gen generates a very large program, with a very far conditional branch. +func gen(buf *bytes.Buffer) { + fmt.Fprintln(buf, "TEXT f(SB),0,$0-0") + fmt.Fprintln(buf, "TBZ $5, R0, label") + fmt.Fprintln(buf, "CBZ R0, label") + fmt.Fprintln(buf, "BEQ label") + fmt.Fprintln(buf, "PCALIGN $128") + fmt.Fprintln(buf, "MOVD $3, R3") + for i := 0; i < 1<<19; i++ { + fmt.Fprintln(buf, "MOVD R0, R1") + } + fmt.Fprintln(buf, "label:") + fmt.Fprintln(buf, "RET") +} + +// Issue 20348. +func TestNoRet(t *testing.T) { + dir, err := os.MkdirTemp("", "testnoret") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + tmpfile := filepath.Join(dir, "x.s") + if err := os.WriteFile(tmpfile, []byte("TEXT ·stub(SB),$0-0\nNOP\n"), 0644); err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux") + if out, err := cmd.CombinedOutput(); err != nil { + t.Errorf("%v\n%s", err, out) + } +} + +// TestPCALIGN verifies the correctness of the PCALIGN by checking if the +// code can be aligned to the alignment value. +func TestPCALIGN(t *testing.T) { + testenv.MustHaveGoBuild(t) + dir, err := os.MkdirTemp("", "testpcalign") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + tmpfile := filepath.Join(dir, "test.s") + tmpout := filepath.Join(dir, "test.o") + + code1 := []byte("TEXT ·foo(SB),$0-0\nMOVD $0, R0\nPCALIGN $8\nMOVD $1, R1\nRET\n") + code2 := []byte("TEXT ·foo(SB),$0-0\nMOVD $0, R0\nPCALIGN $16\nMOVD $2, R2\nRET\n") + // If the output contains this pattern, the pc-offsite of "MOVD $1, R1" is 8 bytes aligned. + out1 := `0x0008\s00008\s\(.*\)\tMOVD\t\$1,\sR1` + // If the output contains this pattern, the pc-offsite of "MOVD $2, R2" is 16 bytes aligned. + out2 := `0x0010\s00016\s\(.*\)\tMOVD\t\$2,\sR2` + var testCases = []struct { + name string + code []byte + out string + }{ + {"8-byte alignment", code1, out1}, + {"16-byte alignment", code2, out2}, + } + + for _, test := range testCases { + if err := os.WriteFile(tmpfile, test.code, 0644); err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-S", "-o", tmpout, tmpfile) + cmd.Env = append(os.Environ(), "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("The %s build failed: %v, output: %s", test.name, err, out) + continue + } + + matched, err := regexp.MatchString(test.out, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The %s testing failed!\ninput: %s\noutput: %s\n", test.name, test.code, out) + } + } +} + +func testvmovq() (r1, r2 uint64) + +// TestVMOVQ checks if the arm64 VMOVQ instruction is working properly. +func TestVMOVQ(t *testing.T) { + a, b := testvmovq() + if a != 0x7040201008040201 || b != 0x3040201008040201 { + t.Errorf("TestVMOVQ got: a=0x%x, b=0x%x, want: a=0x7040201008040201, b=0x3040201008040201", a, b) + } +} + +func testmovk() uint64 + +// TestMOVK makes sure MOVK with a very large constant works. See issue 52261. +func TestMOVK(t *testing.T) { + x := testmovk() + want := uint64(40000 << 48) + if x != want { + t.Errorf("TestMOVK got %x want %x\n", x, want) + } +} diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.s b/src/cmd/internal/obj/arm64/asm_arm64_test.s new file mode 100644 index 0000000..f85433c --- /dev/null +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.s @@ -0,0 +1,21 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// testvmovq() (r1, r2 uint64) +TEXT ·testvmovq(SB), NOSPLIT, $0-16 + VMOVQ $0x7040201008040201, $0x3040201008040201, V1 + VMOV V1.D[0], R0 + VMOV V1.D[1], R1 + MOVD R0, r1+0(FP) + MOVD R1, r2+8(FP) + RET + +// testmovk() uint64 +TEXT ·testmovk(SB), NOSPLIT, $0-8 + MOVD $0, R0 + MOVK $(40000<<48), R0 + MOVD R0, ret+0(FP) + RET diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go new file mode 100644 index 0000000..4606e98 --- /dev/null +++ b/src/cmd/internal/obj/arm64/doc.go @@ -0,0 +1,296 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package arm64 implements an ARM64 assembler. Go assembly syntax is different from GNU ARM64 +syntax, but we can still follow the general rules to map between them. + +# Instructions mnemonics mapping rules + +1. Most instructions use width suffixes of instruction names to indicate operand width rather than +using different register names. + +Examples: + + ADC R24, R14, R12 <=> adc x12, x14, x24 + ADDW R26->24, R21, R15 <=> add w15, w21, w26, asr #24 + FCMPS F2, F3 <=> fcmp s3, s2 + FCMPD F2, F3 <=> fcmp d3, d2 + FCVTDH F2, F3 <=> fcvt h3, d2 + +2. Go uses .P and .W suffixes to indicate post-increment and pre-increment. + +Examples: + + MOVD.P -8(R10), R8 <=> ldr x8, [x10],#-8 + MOVB.W 16(R16), R10 <=> ldrsb x10, [x16,#16]! + MOVBU.W 16(R16), R10 <=> ldrb x10, [x16,#16]! + +3. Go uses a series of MOV instructions as load and store. + +64-bit variant ldr, str, stur => MOVD; +32-bit variant str, stur, ldrsw => MOVW; +32-bit variant ldr => MOVWU; +ldrb => MOVBU; ldrh => MOVHU; +ldrsb, sturb, strb => MOVB; +ldrsh, sturh, strh => MOVH. + +4. Go moves conditions into opcode suffix, like BLT. + +5. Go adds a V prefix for most floating-point and SIMD instructions, except cryptographic extension +instructions and floating-point(scalar) instructions. + +Examples: + + VADD V5.H8, V18.H8, V9.H8 <=> add v9.8h, v18.8h, v5.8h + VLD1.P (R6)(R11), [V31.D1] <=> ld1 {v31.1d}, [x6], x11 + VFMLA V29.S2, V20.S2, V14.S2 <=> fmla v14.2s, v20.2s, v29.2s + AESD V22.B16, V19.B16 <=> aesd v19.16b, v22.16b + SCVTFWS R3, F16 <=> scvtf s17, w6 + +6. Align directive + +Go asm supports the PCALIGN directive, which indicates that the next instruction should be aligned +to a specified boundary by padding with NOOP instruction. The alignment value supported on arm64 +must be a power of 2 and in the range of [8, 2048]. + +Examples: + + PCALIGN $16 + MOVD $2, R0 // This instruction is aligned with 16 bytes. + PCALIGN $1024 + MOVD $3, R1 // This instruction is aligned with 1024 bytes. + +PCALIGN also changes the function alignment. If a function has one or more PCALIGN directives, +its address will be aligned to the same or coarser boundary, which is the maximum of all the +alignment values. + +In the following example, the function Add is aligned with 128 bytes. + +Examples: + + TEXT ·Add(SB),$40-16 + MOVD $2, R0 + PCALIGN $32 + MOVD $4, R1 + PCALIGN $128 + MOVD $8, R2 + RET + +On arm64, functions in Go are aligned to 16 bytes by default, we can also use PCALGIN to set the +function alignment. The functions that need to be aligned are preferably using NOFRAME and NOSPLIT +to avoid the impact of the prologues inserted by the assembler, so that the function address will +have the same alignment as the first hand-written instruction. + +In the following example, PCALIGN at the entry of the function Add will align its address to 2048 bytes. + +Examples: + + TEXT ·Add(SB),NOSPLIT|NOFRAME,$0 + PCALIGN $2048 + MOVD $1, R0 + MOVD $1, R1 + RET + +7. Move large constants to vector registers. + +Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively. +And for a 128-bit interger, it take two 64-bit operands, for the low and high parts separately. + +Examples: + + VMOVS $0x11223344, V0 + VMOVD $0x1122334455667788, V1 + VMOVQ $0x1122334455667788, $0x99aabbccddeeff00, V2 // V2=0x99aabbccddeeff001122334455667788 + +8. Move an optionally-shifted 16-bit immediate value to a register. + +The instructions are MOVK(W), MOVZ(W) and MOVN(W), the assembly syntax is "op $(uimm16<". The +is the 16-bit unsigned immediate, in the range 0 to 65535; For the 32-bit variant, the is 0 or 16, for the +64-bit variant, the is 0, 16, 32 or 48. + +The current Go assembler does not accept zero shifts, such as "op $0, Rd" and "op $(0<<(16|32|48)), Rd" instructions. + +Examples: + + MOVK $(10<<32), R20 <=> movk x20, #10, lsl #32 + MOVZW $(20<<16), R8 <=> movz w8, #20, lsl #16 + MOVK $(0<<16), R10 will be reported as an error by the assembler. + +Special Cases. + +(1) umov is written as VMOV. + +(2) br is renamed JMP, blr is renamed CALL. + +(3) No need to add "W" suffix: LDARB, LDARH, LDAXRB, LDAXRH, LDTRH, LDXRB, LDXRH. + +(4) In Go assembly syntax, NOP is a zero-width pseudo-instruction serves generic purpose, nothing +related to real ARM64 instruction. NOOP serves for the hardware nop instruction. NOOP is an alias of +HINT $0. + +Examples: + + VMOV V13.B[1], R20 <=> mov x20, v13.b[1] + VMOV V13.H[1], R20 <=> mov w20, v13.h[1] + JMP (R3) <=> br x3 + CALL (R17) <=> blr x17 + LDAXRB (R19), R16 <=> ldaxrb w16, [x19] + NOOP <=> nop + +# Register mapping rules + +1. All basic register names are written as Rn. + +2. Go uses ZR as the zero register and RSP as the stack pointer. + +3. Bn, Hn, Dn, Sn and Qn instructions are written as Fn in floating-point instructions and as Vn +in SIMD instructions. + +# Argument mapping rules + +1. The operands appear in left-to-right assignment order. + +Go reverses the arguments of most instructions. + +Examples: + + ADD R11.SXTB<<1, RSP, R25 <=> add x25, sp, w11, sxtb #1 + VADD V16, V19, V14 <=> add d14, d19, d16 + +Special Cases. + +(1) Argument order is the same as in the GNU ARM64 syntax: cbz, cbnz and some store instructions, +such as str, stur, strb, sturb, strh, sturh stlr, stlrb. stlrh, st1. + +Examples: + + MOVD R29, 384(R19) <=> str x29, [x19,#384] + MOVB.P R30, 30(R4) <=> strb w30, [x4],#30 + STLRH R21, (R19) <=> stlrh w21, [x19] + +(2) MADD, MADDW, MSUB, MSUBW, SMADDL, SMSUBL, UMADDL, UMSUBL , , , + +Examples: + + MADD R2, R30, R22, R6 <=> madd x6, x22, x2, x30 + SMSUBL R10, R3, R17, R27 <=> smsubl x27, w17, w10, x3 + +(3) FMADDD, FMADDS, FMSUBD, FMSUBS, FNMADDD, FNMADDS, FNMSUBD, FNMSUBS , , , + +Examples: + + FMADDD F30, F20, F3, F29 <=> fmadd d29, d3, d30, d20 + FNMSUBS F7, F25, F7, F22 <=> fnmsub s22, s7, s7, s25 + +(4) BFI, BFXIL, SBFIZ, SBFX, UBFIZ, UBFX $, , $, + +Examples: + + BFIW $16, R20, $6, R0 <=> bfi w0, w20, #16, #6 + UBFIZ $34, R26, $5, R20 <=> ubfiz x20, x26, #34, #5 + +(5) FCCMPD, FCCMPS, FCCMPED, FCCMPES , Fm. Fn, $ + +Examples: + + FCCMPD AL, F8, F26, $0 <=> fccmp d26, d8, #0x0, al + FCCMPS VS, F29, F4, $4 <=> fccmp s4, s29, #0x4, vs + FCCMPED LE, F20, F5, $13 <=> fccmpe d5, d20, #0xd, le + FCCMPES NE, F26, F10, $0 <=> fccmpe s10, s26, #0x0, ne + +(6) CCMN, CCMNW, CCMP, CCMPW , , $, $ + +Examples: + + CCMP MI, R22, $12, $13 <=> ccmp x22, #0xc, #0xd, mi + CCMNW AL, R1, $11, $8 <=> ccmn w1, #0xb, #0x8, al + +(7) CCMN, CCMNW, CCMP, CCMPW , , , $ + +Examples: + + CCMN VS, R13, R22, $10 <=> ccmn x13, x22, #0xa, vs + CCMPW HS, R19, R14, $11 <=> ccmp w19, w14, #0xb, cs + +(9) CSEL, CSELW, CSNEG, CSNEGW, CSINC, CSINCW , , , ; +FCSELD, FCSELS , , , + +Examples: + + CSEL GT, R0, R19, R1 <=> csel x1, x0, x19, gt + CSNEGW GT, R7, R17, R8 <=> csneg w8, w7, w17, gt + FCSELD EQ, F15, F18, F16 <=> fcsel d16, d15, d18, eq + +(10) TBNZ, TBZ $, ,